diff options
| author | Laurent Bercot <ska-skaware@skarnet.org> | 2025-10-09 17:14:56 +0000 |
|---|---|---|
| committer | Laurent Bercot <ska-skaware@skarnet.org> | 2025-10-09 17:14:56 +0000 |
| commit | 46f4d80c4c40559a383d001838d766b2b438f4f0 (patch) | |
| tree | a1471d419df12d24edea840df64da269a656ca33 /src | |
| parent | eb56bacc8f5fff95cb752a7f8b3aefe8ed3f3d9a (diff) | |
| download | tipidee-46f4d80c4c40559a383d001838d766b2b438f4f0.tar.gz | |
Move string cmp operations to skalibs. Add cgit-nollmcrawler.
Diffstat (limited to 'src')
| -rw-r--r-- | src/config/deps-exe/tipidee-config | 1 | ||||
| -rw-r--r-- | src/config/tipidee-config-internal.h | 7 | ||||
| -rw-r--r-- | src/config/util.c | 15 | ||||
| -rw-r--r-- | src/misc/cgit-nollmcrawler.c | 140 | ||||
| -rw-r--r-- | src/misc/deps-exe/cgit-nollmcrawler | 1 | ||||
| -rw-r--r-- | src/misc/tipidee-logaggregate.c | 17 | ||||
| -rw-r--r-- | src/tipideed/cgi.c | 2 | ||||
| -rw-r--r-- | src/tipideed/stream.c | 4 |
8 files changed, 154 insertions, 33 deletions
diff --git a/src/config/deps-exe/tipidee-config b/src/config/deps-exe/tipidee-config index 2e844d8..55cf760 100644 --- a/src/config/deps-exe/tipidee-config +++ b/src/config/deps-exe/tipidee-config @@ -1,4 +1,3 @@ -util.o node.o repo.o conftree.o diff --git a/src/config/tipidee-config-internal.h b/src/config/tipidee-config-internal.h index d3977c0..c9849f2 100644 --- a/src/config/tipidee-config-internal.h +++ b/src/config/tipidee-config-internal.h @@ -7,6 +7,7 @@ #include <string.h> #include <stdlib.h> +#include <skalibs/bytestr.h> #include <skalibs/buffer.h> #include <skalibs/strerr.h> #include <skalibs/stralloc.h> @@ -45,11 +46,7 @@ struct global_s extern struct global_s g ; - - /* util */ - -extern int keycmp (void const *, void const *) ; /* for any struct starting with a string key */ -#define BSEARCH(type, key, array) bsearch(key, (array), sizeof(array)/sizeof(type), sizeof(type), &keycmp) +#define BSEARCH(type, key, array) bsearch(key, (array), sizeof(array)/sizeof(type), sizeof(type), &stringkey_bcmp) /* node */ diff --git a/src/config/util.c b/src/config/util.c deleted file mode 100644 index bee1503..0000000 --- a/src/config/util.c +++ /dev/null @@ -1,15 +0,0 @@ -/* ISC license. */ - -#include <string.h> - -#include "tipidee-config-internal.h" - -struct starts_with_a_string_key_s -{ - char const *s ; -} ; - -int keycmp (void const *a, void const *b) -{ - return strcmp((char const *)a, ((struct starts_with_a_string_key_s const *)b)->s) ; -} diff --git a/src/misc/cgit-nollmcrawler.c b/src/misc/cgit-nollmcrawler.c new file mode 100644 index 0000000..71d31ae --- /dev/null +++ b/src/misc/cgit-nollmcrawler.c @@ -0,0 +1,140 @@ +/* ISC license. */ + +#include <string.h> +#include <unistd.h> +#include <stdlib.h> +#include <errno.h> +#include <regex.h> + +#include <skalibs/posixplz.h> +#include <skalibs/bytestr.h> +#include <skalibs/stat.h> +#include <skalibs/prog.h> +#include <skalibs/buffer.h> +#include <skalibs/strerr.h> +#include <skalibs/gol.h> +#include <skalibs/fmtscan.h> +#include <skalibs/exec.h> + +#define USAGE "as a CGI script: cgit-nollmcrawler [ -v verbosity ] [ -d pathinfodepth ] rulesdir badregex realcgit..." +#define dieusage() strerr_dieusage(100, USAGE) + +enum golb_e +{ + GOLB_FORCE = 0x01 +} ; + +static gol_bool const rgolb[] = +{ + { .so = 'f', .lo = "force", .clear = 0, .set = GOLB_FORCE } +} ; + +enum gola_e +{ + GOLA_VERBOSITY, + GOLA_DEPTH, + GOLA_N +} ; + +static gol_arg const rgola[] = +{ + { .so = 'v', .lo = "verbosity", .i = GOLA_VERBOSITY }, + { .so = 'd', .lo = "pathinfo-depth", .i = GOLA_DEPTH } +} ; + +int main (int argc, char const *const *argv) +{ + unsigned int verbosity = 1 ; + char const *wgola[GOLA_N] = { 0 } ; + uint64_t wgolb = 0 ; + unsigned int golc ; + unsigned int depth = 1 ; + char const *remoteaddr ; + char const *x ; + size_t rdlen, m = 0 ; + char ip[16] ; + int is6 ; + PROG = "cgit-nollmcrawler" ; + + golc = GOL_main(argc, argv, rgolb, rgola, &wgolb, wgola) ; + argc -= golc ; argv += golc ; + if (wgola[GOLA_VERBOSITY] && !uint0_scan(wgola[GOLA_VERBOSITY], &verbosity)) + strerr_dief1x(100, "verbosity needs to be an unsigned integer") ; + if (wgola[GOLA_DEPTH] && !uint0_scan(wgola[GOLA_DEPTH], &depth)) + strerr_dief1x(100, "pathinfo-depth needs to be an unsigned integer") ; + if (argc < 3) dieusage() ; + + remoteaddr = getenv("REMOTE_ADDR") ; + if (ip6_scan(remoteaddr, ip)) is6 = 1 ; + else if (ip4_scan(remoteaddr, ip)) is6 = 0 ; + else strerr_dieinvalid(100, "REMOTE_ADDR") ; + + rdlen = strlen(argv[0]) ; + char fn[rdlen + IP6_FMT + 16] ; + + memcpy(fn + m, argv[0], rdlen) ; m += rdlen ; + memcpy(fn + m, "/ip", 3) ; m += 3 ; + fn[m++] = is6 ? '6' : '4' ; + fn[m++] = '/' ; + m += is6 ? ip6_fmt(fn + m, ip) : ip4_fmt(fn + m, ip) ; + fn[m++] = '_' ; + memcpy(fn + m, is6 ? "128" : "32", is6 ? 3 : 2) ; m += 2 + is6 ; + fn[m] = 0 ; + memcpy(fn + m, "/allow", 7) ; + if (access(fn, W_OK) == 0) goto allow ; + if (errno != ENOENT) strerr_diefu2sys(111, "access ", fn) ; + memcpy(fn + m + 1, "deny", 5) ; + if (access(fn, W_OK) == 0) goto deny ; + if (errno != ENOENT) strerr_diefu2sys(111, "access ", fn) ; + fn[m] = 0 ; + + x = getenv("PATH_INFO") ; + if (!x) goto writeandallow ; + if (byte_count(x, strlen(x), '/') <= depth) goto writeandallow ; + x = getenv("QUERY_STRING") ; + if (!x) goto writeandallow ; + + { + regex_t re ; + int e = regcomp(&re, argv[1], REG_EXTENDED) ; + if (e == REG_BADPAT) strerr_dief2x(100, "invalid regex: ", argv[1]) ; + if (e) + { + char fmt[INT_FMT] ; + fmt[int_fmt(fmt, e)] = 0 ; + strerr_diefu4x(111, "regcomp ", argv[1], ": error code is ", fmt) ; + } + e = regexec(&re, x, 0, 0, REG_NOSUB) ; + if (e == 0) goto writeanddeny ; + if (e != REG_NOMATCH) + { + char fmt[INT_FMT] ; + fmt[int_fmt(fmt, e)] = 0 ; + strerr_diefu4x(111, "regexec ", argv[1], ": error code is ", fmt) ; + } + // regfree(&re) ; + } + + writeandallow: + if (symlink("../outputs/allow", fn) == -1) + { + if (errno != EEXIST) strerr_diefu4sys(111, "symlink ", "../outputs/allow", " to ", fn) ; + } + if (verbosity >= 2) + { + x = getenv("REQUEST_URI") ; + strerr_warni("allowing ", remoteaddr, " requesting ", x) ; + } + allow: + exec(argv + 2) ; + strerr_dieexec(111, argv[2]) ; + + writeanddeny: + if (symlink("../outputs/deny", fn) == -1) + { + if (errno != EEXIST) strerr_diefu4sys(111, "symlink ", "../outputs/deny", " to ", fn) ; + } + deny: + buffer_putsflush(buffer_1small, "Status: 403 Go fuck yourself, crawler\nContent-Length: 0\n\n") ; + _exit(0) ; +} diff --git a/src/misc/deps-exe/cgit-nollmcrawler b/src/misc/deps-exe/cgit-nollmcrawler new file mode 100644 index 0000000..e7187fe --- /dev/null +++ b/src/misc/deps-exe/cgit-nollmcrawler @@ -0,0 +1 @@ +-lskarnet diff --git a/src/misc/tipidee-logaggregate.c b/src/misc/tipidee-logaggregate.c index 83cda5e..7b61579 100644 --- a/src/misc/tipidee-logaggregate.c +++ b/src/misc/tipidee-logaggregate.c @@ -29,8 +29,7 @@ enum golb_e { - GOLB_IS6, - GOLB_N + GOLB_IS6 = 0x01 } ; static uint64_t wgolb = 0 ; @@ -102,7 +101,7 @@ static void pidip_add (pid_t pid, char const *ip) if (!gensetdyn_new(&pidip_list, &d)) dienomem() ; pidip *p = GENSETDYN_P(pidip, &pidip_list, d) ; p->pid = pid ; - memcpy(p->ip, ip, wgolb & (1 << GOLB_IS6) ? 16 : 4) ; + memcpy(p->ip, ip, wgolb & GOLB_IS6 ? 16 : 4) ; if (!avltree_insert(&pidip_map, d)) dienomem() ; } @@ -127,7 +126,7 @@ static void *ipinfo_dtok (uint32_t d, void *data) static int ip_cmp (void const *a, void const *b, void *data) { (void)data ; - return memcmp(a, b, wgolb & (1 << GOLB_IS6) ? 16 : 4) ; + return memcmp(a, b, wgolb & GOLB_IS6 ? 16 : 4) ; } @@ -164,7 +163,7 @@ static void add_request (pid_t pid, char const *host, char const *path, char con if (!avltree_search(&ipinfo_map, p->ip, &d)) { ipinfo i = IPINFO_ZERO ; - memcpy(i.ip, p->ip, wgolb & (1 << GOLB_IS6) ? 16 : 4) ; + memcpy(i.ip, p->ip, wgolb & GOLB_IS6 ? 16 : 4) ; d = genalloc_len(ipinfo, &ipinfo_list) ; if (!genalloc_append(ipinfo, &ipinfo_list, &i)) dienomem() ; if (!avltree_insert(&ipinfo_map, d)) dienomem() ; @@ -298,7 +297,7 @@ static void parse_start (pid_t pid, char const *host, char *s) { char fmtline[UINT32_FMT] ; fmtline[uint32_fmt(fmtline, line)] = 0 ; - strerr_warnw("line ", fmtline, ": invalid ipv6") ; + strerr_warnw("line ", fmtline, ": invalid ipv6: ", s) ; return ; } } @@ -308,7 +307,7 @@ static void parse_start (pid_t pid, char const *host, char *s) { char fmtline[UINT32_FMT] ; fmtline[uint32_fmt(fmtline, line)] = 0 ; - strerr_warnw("line ", fmtline, ": invalid ipv6") ; + strerr_warnw("line ", fmtline, ": invalid ipv4") ; return ; } } @@ -376,8 +375,8 @@ static int print_iter (uint32_t d, unsigned int h, void *data) static gol_bool const rgolb[2] = { - { .so = '4', .lo = "ipv4", .clear = 1 << GOLB_IS6, .set = 0 }, - { .so = '6', .lo = "ipv6", .clear = 0, .set = 1 << GOLB_IS6 } + { .so = '4', .lo = "ipv4", .clear = GOLB_IS6, .set = 0 }, + { .so = '6', .lo = "ipv6", .clear = 0, .set = GOLB_IS6 } } ; int main (int argc, char const *const *argv) diff --git a/src/tipideed/cgi.c b/src/tipideed/cgi.c index d5daf56..14c6de8 100644 --- a/src/tipideed/cgi.c +++ b/src/tipideed/cgi.c @@ -320,7 +320,7 @@ static inline int do_cgi (tipidee_rql *rql, char const *docroot, char const *con else { if (!status) status = 200 ; - if (status != 304 && !tipidee_headers_search(&hdr, "Content-Type")) + if (status != 304 && (status < 400 || status > 599) && !tipidee_headers_search(&hdr, "Content-Type")) die502x(rql, 2, docroot, "cgi ", argv[0], " didn't output a ", "Content-Type", " header") ; } diff --git a/src/tipideed/stream.c b/src/tipideed/stream.c index 5d368b2..9774812 100644 --- a/src/tipideed/stream.c +++ b/src/tipideed/stream.c @@ -52,14 +52,14 @@ static ssize_t fixed_get (void *b) void cork (int fd) { static int const val = 1 ; - if (setsockopt(fd, SOL_TCP, TCP_CORK, &val, sizeof(int)) == -1 && g.logv) + if (!g.ssl && setsockopt(fd, SOL_TCP, TCP_CORK, &val, sizeof(int)) == -1 && g.logv) strerr_warnwu1sys("uncork stdout") ; } void uncork (int fd) { static int const val = 0 ; - if (setsockopt(fd, SOL_TCP, TCP_CORK, &val, sizeof(int)) == -1 && g.logv) + if (!g.ssl && setsockopt(fd, SOL_TCP, TCP_CORK, &val, sizeof(int)) == -1 && g.logv) strerr_warnwu1sys("uncork stdout") ; } |
