diff options
| author | Laurent Bercot <ska-skaware@skarnet.org> | 2025-10-09 17:14:56 +0000 |
|---|---|---|
| committer | Laurent Bercot <ska-skaware@skarnet.org> | 2025-10-09 17:14:56 +0000 |
| commit | 46f4d80c4c40559a383d001838d766b2b438f4f0 (patch) | |
| tree | a1471d419df12d24edea840df64da269a656ca33 | |
| parent | eb56bacc8f5fff95cb752a7f8b3aefe8ed3f3d9a (diff) | |
| download | tipidee-46f4d80c4c40559a383d001838d766b2b438f4f0.tar.gz | |
Move string cmp operations to skalibs. Add cgit-nollmcrawler.
| -rw-r--r-- | .gitignore | 1 | ||||
| -rw-r--r-- | package/deps.mak | 6 | ||||
| -rw-r--r-- | package/modes | 1 | ||||
| -rw-r--r-- | package/targets.mak | 3 | ||||
| -rw-r--r-- | src/config/deps-exe/tipidee-config | 1 | ||||
| -rw-r--r-- | src/config/tipidee-config-internal.h | 7 | ||||
| -rw-r--r-- | src/config/util.c | 15 | ||||
| -rw-r--r-- | src/misc/cgit-nollmcrawler.c | 140 | ||||
| -rw-r--r-- | src/misc/deps-exe/cgit-nollmcrawler | 1 | ||||
| -rw-r--r-- | src/misc/tipidee-logaggregate.c | 17 | ||||
| -rw-r--r-- | src/tipideed/cgi.c | 2 | ||||
| -rw-r--r-- | src/tipideed/stream.c | 4 |
12 files changed, 162 insertions, 36 deletions
@@ -11,3 +11,4 @@ /tipidee-config-preprocess /tipidee-logaggregate /ls.cgi +/cgit-nollmcrawler.cgi diff --git a/package/deps.mak b/package/deps.mak index 83dacd8..ad04202 100644 --- a/package/deps.mak +++ b/package/deps.mak @@ -16,7 +16,6 @@ src/config/node.o src/config/node.lo: src/config/node.c src/config/tipidee-confi src/config/repo.o src/config/repo.lo: src/config/repo.c src/config/tipidee-config-internal.h src/config/tipidee-config-preprocess.o src/config/tipidee-config-preprocess.lo: src/config/tipidee-config-preprocess.c src/config/tipidee-config.o src/config/tipidee-config.lo: src/config/tipidee-config.c src/config/tipidee-config-internal.h src/include/tipidee/config.h -src/config/util.o src/config/util.lo: src/config/util.c src/config/tipidee-config-internal.h src/libtipidee/tipidee_conf_free.o src/libtipidee/tipidee_conf_free.lo: src/libtipidee/tipidee_conf_free.c src/include/tipidee/conf.h src/libtipidee/tipidee_conf_get.o src/libtipidee/tipidee_conf_get.lo: src/libtipidee/tipidee_conf_get.c src/include/tipidee/conf.h src/libtipidee/tipidee_conf_get_argv.o src/libtipidee/tipidee_conf_get_argv.lo: src/libtipidee/tipidee_conf_get_argv.c src/include/tipidee/conf.h @@ -64,6 +63,7 @@ src/libtipidee/tipidee_util_defaulttext.o src/libtipidee/tipidee_util_defaulttex src/libtipidee/tipidee_util_htmlescape.o src/libtipidee/tipidee_util_htmlescape.lo: src/libtipidee/tipidee_util_htmlescape.c src/include/tipidee/util.h src/libtipidee/tipidee_util_httpdate.o src/libtipidee/tipidee_util_httpdate.lo: src/libtipidee/tipidee_util_httpdate.c src/include/tipidee/util.h src/libtipidee/tipidee_util_parse_range.o src/libtipidee/tipidee_util_parse_range.lo: src/libtipidee/tipidee_util_parse_range.c src/include/tipidee/util.h +src/misc/cgit-nollmcrawler.o src/misc/cgit-nollmcrawler.lo: src/misc/cgit-nollmcrawler.c src/misc/ls.cgi.o src/misc/ls.cgi.lo: src/misc/ls.cgi.c src/include/tipidee/response.h src/include/tipidee/util.h src/misc/tipidee-logaggregate.o src/misc/tipidee-logaggregate.lo: src/misc/tipidee-logaggregate.c src/tipideed/cgi.o src/tipideed/cgi.lo: src/tipideed/cgi.c src/include/tipidee/tipidee.h src/tipideed/tipideed-internal.h @@ -80,7 +80,7 @@ src/tipideed/trace.o src/tipideed/trace.lo: src/tipideed/trace.c src/include/tip src/tipideed/util.o src/tipideed/util.lo: src/tipideed/util.c src/tipideed/tipideed-internal.h tipidee-config: EXTRA_LIBS := ${SPAWN_LIB} -tipidee-config: src/config/tipidee-config.o src/config/util.o src/config/node.o src/config/repo.o src/config/conftree.o src/config/headers.o src/config/defaults.o src/config/lexparse.o -lskarnet +tipidee-config: src/config/tipidee-config.o src/config/node.o src/config/repo.o src/config/conftree.o src/config/headers.o src/config/defaults.o src/config/lexparse.o -lskarnet tipidee-config-preprocess: EXTRA_LIBS := tipidee-config-preprocess: src/config/tipidee-config-preprocess.o -lskarnet ifeq ($(strip $(STATIC_LIBS_ARE_PIC)),) @@ -93,6 +93,8 @@ libtipidee.so.xyzzy: EXTRA_LIBS := -lskarnet libtipidee.so.xyzzy:src/libtipidee/tipidee_conf_free.lo src/libtipidee/tipidee_conf_get.lo src/libtipidee/tipidee_conf_get_argv.lo src/libtipidee/tipidee_conf_get_content_type.lo src/libtipidee/tipidee_conf_get_errorfile.lo src/libtipidee/tipidee_conf_get_redirection.lo src/libtipidee/tipidee_conf_get_resattr.lo src/libtipidee/tipidee_conf_get_resattr1.lo src/libtipidee/tipidee_conf_get_responseheaders.lo src/libtipidee/tipidee_conf_get_string.lo src/libtipidee/tipidee_conf_get_uint32.lo src/libtipidee/tipidee_conf_init.lo src/libtipidee/tipidee_headers_get_content_length.lo src/libtipidee/tipidee_headers_init.lo src/libtipidee/tipidee_headers_parse.lo src/libtipidee/tipidee_headers_search.lo src/libtipidee/tipidee_log_answer.lo src/libtipidee/tipidee_log_exit.lo src/libtipidee/tipidee_log_request.lo src/libtipidee/tipidee_log_resource.lo src/libtipidee/tipidee_log_start.lo src/libtipidee/tipidee_method.lo src/libtipidee/tipidee_response_error_nofile.lo src/libtipidee/tipidee_response_error_nofile_G.lo src/libtipidee/tipidee_response_file.lo src/libtipidee/tipidee_response_file_G.lo src/libtipidee/tipidee_response_header_date.lo src/libtipidee/tipidee_response_header_date_G.lo src/libtipidee/tipidee_response_header_date_fmt.lo src/libtipidee/tipidee_response_header_end.lo src/libtipidee/tipidee_response_header_lastmodified.lo src/libtipidee/tipidee_response_header_preparebuiltin.lo src/libtipidee/tipidee_response_header_write.lo src/libtipidee/tipidee_response_header_writeall.lo src/libtipidee/tipidee_response_header_writeall_G.lo src/libtipidee/tipidee_response_header_writemerge.lo src/libtipidee/tipidee_response_header_writemerge_G.lo src/libtipidee/tipidee_response_partial.lo src/libtipidee/tipidee_response_partial_G.lo src/libtipidee/tipidee_response_status.lo src/libtipidee/tipidee_rql_read.lo src/libtipidee/tipidee_uri_parse.lo src/libtipidee/tipidee_util_chunked_read.lo src/libtipidee/tipidee_util_defaulttext.lo src/libtipidee/tipidee_util_htmlescape.lo src/libtipidee/tipidee_util_httpdate.lo src/libtipidee/tipidee_util_parse_range.lo libtipidee.dylib.xyzzy: EXTRA_LIBS := -lskarnet libtipidee.dylib.xyzzy:src/libtipidee/tipidee_conf_free.lo src/libtipidee/tipidee_conf_get.lo src/libtipidee/tipidee_conf_get_argv.lo src/libtipidee/tipidee_conf_get_content_type.lo src/libtipidee/tipidee_conf_get_errorfile.lo src/libtipidee/tipidee_conf_get_redirection.lo src/libtipidee/tipidee_conf_get_resattr.lo src/libtipidee/tipidee_conf_get_resattr1.lo src/libtipidee/tipidee_conf_get_responseheaders.lo src/libtipidee/tipidee_conf_get_string.lo src/libtipidee/tipidee_conf_get_uint32.lo src/libtipidee/tipidee_conf_init.lo src/libtipidee/tipidee_headers_get_content_length.lo src/libtipidee/tipidee_headers_init.lo src/libtipidee/tipidee_headers_parse.lo src/libtipidee/tipidee_headers_search.lo src/libtipidee/tipidee_log_answer.lo src/libtipidee/tipidee_log_exit.lo src/libtipidee/tipidee_log_request.lo src/libtipidee/tipidee_log_resource.lo src/libtipidee/tipidee_log_start.lo src/libtipidee/tipidee_method.lo src/libtipidee/tipidee_response_error_nofile.lo src/libtipidee/tipidee_response_error_nofile_G.lo src/libtipidee/tipidee_response_file.lo src/libtipidee/tipidee_response_file_G.lo src/libtipidee/tipidee_response_header_date.lo src/libtipidee/tipidee_response_header_date_G.lo src/libtipidee/tipidee_response_header_date_fmt.lo src/libtipidee/tipidee_response_header_end.lo src/libtipidee/tipidee_response_header_lastmodified.lo src/libtipidee/tipidee_response_header_preparebuiltin.lo src/libtipidee/tipidee_response_header_write.lo src/libtipidee/tipidee_response_header_writeall.lo src/libtipidee/tipidee_response_header_writeall_G.lo src/libtipidee/tipidee_response_header_writemerge.lo src/libtipidee/tipidee_response_header_writemerge_G.lo src/libtipidee/tipidee_response_partial.lo src/libtipidee/tipidee_response_partial_G.lo src/libtipidee/tipidee_response_status.lo src/libtipidee/tipidee_rql_read.lo src/libtipidee/tipidee_uri_parse.lo src/libtipidee/tipidee_util_chunked_read.lo src/libtipidee/tipidee_util_defaulttext.lo src/libtipidee/tipidee_util_htmlescape.lo src/libtipidee/tipidee_util_httpdate.lo src/libtipidee/tipidee_util_parse_range.lo +cgit-nollmcrawler: EXTRA_LIBS := +cgit-nollmcrawler: src/misc/cgit-nollmcrawler.o -lskarnet ls.cgi: EXTRA_LIBS := ls.cgi: src/misc/ls.cgi.o libtipidee.a.xyzzy -lskarnet tipidee-logaggregate: EXTRA_LIBS := diff --git a/package/modes b/package/modes index 2bdeb12..9602f3f 100644 --- a/package/modes +++ b/package/modes @@ -3,3 +3,4 @@ tipidee-config-preprocess 0755 tipidee-config 0755 ls.cgi 0755 tipidee-logaggregate 0755 +cgit-nollmcrawler 0755 diff --git a/package/targets.mak b/package/targets.mak index 02780d7..87d871a 100644 --- a/package/targets.mak +++ b/package/targets.mak @@ -1,7 +1,8 @@ BIN_TARGETS := \ tipideed \ tipidee-config \ -tipidee-logaggregate +tipidee-logaggregate \ +cgit-nollmcrawler LIBEXEC_TARGETS := \ tipidee-config-preprocess \ diff --git a/src/config/deps-exe/tipidee-config b/src/config/deps-exe/tipidee-config index 2e844d8..55cf760 100644 --- a/src/config/deps-exe/tipidee-config +++ b/src/config/deps-exe/tipidee-config @@ -1,4 +1,3 @@ -util.o node.o repo.o conftree.o diff --git a/src/config/tipidee-config-internal.h b/src/config/tipidee-config-internal.h index d3977c0..c9849f2 100644 --- a/src/config/tipidee-config-internal.h +++ b/src/config/tipidee-config-internal.h @@ -7,6 +7,7 @@ #include <string.h> #include <stdlib.h> +#include <skalibs/bytestr.h> #include <skalibs/buffer.h> #include <skalibs/strerr.h> #include <skalibs/stralloc.h> @@ -45,11 +46,7 @@ struct global_s extern struct global_s g ; - - /* util */ - -extern int keycmp (void const *, void const *) ; /* for any struct starting with a string key */ -#define BSEARCH(type, key, array) bsearch(key, (array), sizeof(array)/sizeof(type), sizeof(type), &keycmp) +#define BSEARCH(type, key, array) bsearch(key, (array), sizeof(array)/sizeof(type), sizeof(type), &stringkey_bcmp) /* node */ diff --git a/src/config/util.c b/src/config/util.c deleted file mode 100644 index bee1503..0000000 --- a/src/config/util.c +++ /dev/null @@ -1,15 +0,0 @@ -/* ISC license. */ - -#include <string.h> - -#include "tipidee-config-internal.h" - -struct starts_with_a_string_key_s -{ - char const *s ; -} ; - -int keycmp (void const *a, void const *b) -{ - return strcmp((char const *)a, ((struct starts_with_a_string_key_s const *)b)->s) ; -} diff --git a/src/misc/cgit-nollmcrawler.c b/src/misc/cgit-nollmcrawler.c new file mode 100644 index 0000000..71d31ae --- /dev/null +++ b/src/misc/cgit-nollmcrawler.c @@ -0,0 +1,140 @@ +/* ISC license. */ + +#include <string.h> +#include <unistd.h> +#include <stdlib.h> +#include <errno.h> +#include <regex.h> + +#include <skalibs/posixplz.h> +#include <skalibs/bytestr.h> +#include <skalibs/stat.h> +#include <skalibs/prog.h> +#include <skalibs/buffer.h> +#include <skalibs/strerr.h> +#include <skalibs/gol.h> +#include <skalibs/fmtscan.h> +#include <skalibs/exec.h> + +#define USAGE "as a CGI script: cgit-nollmcrawler [ -v verbosity ] [ -d pathinfodepth ] rulesdir badregex realcgit..." +#define dieusage() strerr_dieusage(100, USAGE) + +enum golb_e +{ + GOLB_FORCE = 0x01 +} ; + +static gol_bool const rgolb[] = +{ + { .so = 'f', .lo = "force", .clear = 0, .set = GOLB_FORCE } +} ; + +enum gola_e +{ + GOLA_VERBOSITY, + GOLA_DEPTH, + GOLA_N +} ; + +static gol_arg const rgola[] = +{ + { .so = 'v', .lo = "verbosity", .i = GOLA_VERBOSITY }, + { .so = 'd', .lo = "pathinfo-depth", .i = GOLA_DEPTH } +} ; + +int main (int argc, char const *const *argv) +{ + unsigned int verbosity = 1 ; + char const *wgola[GOLA_N] = { 0 } ; + uint64_t wgolb = 0 ; + unsigned int golc ; + unsigned int depth = 1 ; + char const *remoteaddr ; + char const *x ; + size_t rdlen, m = 0 ; + char ip[16] ; + int is6 ; + PROG = "cgit-nollmcrawler" ; + + golc = GOL_main(argc, argv, rgolb, rgola, &wgolb, wgola) ; + argc -= golc ; argv += golc ; + if (wgola[GOLA_VERBOSITY] && !uint0_scan(wgola[GOLA_VERBOSITY], &verbosity)) + strerr_dief1x(100, "verbosity needs to be an unsigned integer") ; + if (wgola[GOLA_DEPTH] && !uint0_scan(wgola[GOLA_DEPTH], &depth)) + strerr_dief1x(100, "pathinfo-depth needs to be an unsigned integer") ; + if (argc < 3) dieusage() ; + + remoteaddr = getenv("REMOTE_ADDR") ; + if (ip6_scan(remoteaddr, ip)) is6 = 1 ; + else if (ip4_scan(remoteaddr, ip)) is6 = 0 ; + else strerr_dieinvalid(100, "REMOTE_ADDR") ; + + rdlen = strlen(argv[0]) ; + char fn[rdlen + IP6_FMT + 16] ; + + memcpy(fn + m, argv[0], rdlen) ; m += rdlen ; + memcpy(fn + m, "/ip", 3) ; m += 3 ; + fn[m++] = is6 ? '6' : '4' ; + fn[m++] = '/' ; + m += is6 ? ip6_fmt(fn + m, ip) : ip4_fmt(fn + m, ip) ; + fn[m++] = '_' ; + memcpy(fn + m, is6 ? "128" : "32", is6 ? 3 : 2) ; m += 2 + is6 ; + fn[m] = 0 ; + memcpy(fn + m, "/allow", 7) ; + if (access(fn, W_OK) == 0) goto allow ; + if (errno != ENOENT) strerr_diefu2sys(111, "access ", fn) ; + memcpy(fn + m + 1, "deny", 5) ; + if (access(fn, W_OK) == 0) goto deny ; + if (errno != ENOENT) strerr_diefu2sys(111, "access ", fn) ; + fn[m] = 0 ; + + x = getenv("PATH_INFO") ; + if (!x) goto writeandallow ; + if (byte_count(x, strlen(x), '/') <= depth) goto writeandallow ; + x = getenv("QUERY_STRING") ; + if (!x) goto writeandallow ; + + { + regex_t re ; + int e = regcomp(&re, argv[1], REG_EXTENDED) ; + if (e == REG_BADPAT) strerr_dief2x(100, "invalid regex: ", argv[1]) ; + if (e) + { + char fmt[INT_FMT] ; + fmt[int_fmt(fmt, e)] = 0 ; + strerr_diefu4x(111, "regcomp ", argv[1], ": error code is ", fmt) ; + } + e = regexec(&re, x, 0, 0, REG_NOSUB) ; + if (e == 0) goto writeanddeny ; + if (e != REG_NOMATCH) + { + char fmt[INT_FMT] ; + fmt[int_fmt(fmt, e)] = 0 ; + strerr_diefu4x(111, "regexec ", argv[1], ": error code is ", fmt) ; + } + // regfree(&re) ; + } + + writeandallow: + if (symlink("../outputs/allow", fn) == -1) + { + if (errno != EEXIST) strerr_diefu4sys(111, "symlink ", "../outputs/allow", " to ", fn) ; + } + if (verbosity >= 2) + { + x = getenv("REQUEST_URI") ; + strerr_warni("allowing ", remoteaddr, " requesting ", x) ; + } + allow: + exec(argv + 2) ; + strerr_dieexec(111, argv[2]) ; + + writeanddeny: + if (symlink("../outputs/deny", fn) == -1) + { + if (errno != EEXIST) strerr_diefu4sys(111, "symlink ", "../outputs/deny", " to ", fn) ; + } + deny: + buffer_putsflush(buffer_1small, "Status: 403 Go fuck yourself, crawler\nContent-Length: 0\n\n") ; + _exit(0) ; +} diff --git a/src/misc/deps-exe/cgit-nollmcrawler b/src/misc/deps-exe/cgit-nollmcrawler new file mode 100644 index 0000000..e7187fe --- /dev/null +++ b/src/misc/deps-exe/cgit-nollmcrawler @@ -0,0 +1 @@ +-lskarnet diff --git a/src/misc/tipidee-logaggregate.c b/src/misc/tipidee-logaggregate.c index 83cda5e..7b61579 100644 --- a/src/misc/tipidee-logaggregate.c +++ b/src/misc/tipidee-logaggregate.c @@ -29,8 +29,7 @@ enum golb_e { - GOLB_IS6, - GOLB_N + GOLB_IS6 = 0x01 } ; static uint64_t wgolb = 0 ; @@ -102,7 +101,7 @@ static void pidip_add (pid_t pid, char const *ip) if (!gensetdyn_new(&pidip_list, &d)) dienomem() ; pidip *p = GENSETDYN_P(pidip, &pidip_list, d) ; p->pid = pid ; - memcpy(p->ip, ip, wgolb & (1 << GOLB_IS6) ? 16 : 4) ; + memcpy(p->ip, ip, wgolb & GOLB_IS6 ? 16 : 4) ; if (!avltree_insert(&pidip_map, d)) dienomem() ; } @@ -127,7 +126,7 @@ static void *ipinfo_dtok (uint32_t d, void *data) static int ip_cmp (void const *a, void const *b, void *data) { (void)data ; - return memcmp(a, b, wgolb & (1 << GOLB_IS6) ? 16 : 4) ; + return memcmp(a, b, wgolb & GOLB_IS6 ? 16 : 4) ; } @@ -164,7 +163,7 @@ static void add_request (pid_t pid, char const *host, char const *path, char con if (!avltree_search(&ipinfo_map, p->ip, &d)) { ipinfo i = IPINFO_ZERO ; - memcpy(i.ip, p->ip, wgolb & (1 << GOLB_IS6) ? 16 : 4) ; + memcpy(i.ip, p->ip, wgolb & GOLB_IS6 ? 16 : 4) ; d = genalloc_len(ipinfo, &ipinfo_list) ; if (!genalloc_append(ipinfo, &ipinfo_list, &i)) dienomem() ; if (!avltree_insert(&ipinfo_map, d)) dienomem() ; @@ -298,7 +297,7 @@ static void parse_start (pid_t pid, char const *host, char *s) { char fmtline[UINT32_FMT] ; fmtline[uint32_fmt(fmtline, line)] = 0 ; - strerr_warnw("line ", fmtline, ": invalid ipv6") ; + strerr_warnw("line ", fmtline, ": invalid ipv6: ", s) ; return ; } } @@ -308,7 +307,7 @@ static void parse_start (pid_t pid, char const *host, char *s) { char fmtline[UINT32_FMT] ; fmtline[uint32_fmt(fmtline, line)] = 0 ; - strerr_warnw("line ", fmtline, ": invalid ipv6") ; + strerr_warnw("line ", fmtline, ": invalid ipv4") ; return ; } } @@ -376,8 +375,8 @@ static int print_iter (uint32_t d, unsigned int h, void *data) static gol_bool const rgolb[2] = { - { .so = '4', .lo = "ipv4", .clear = 1 << GOLB_IS6, .set = 0 }, - { .so = '6', .lo = "ipv6", .clear = 0, .set = 1 << GOLB_IS6 } + { .so = '4', .lo = "ipv4", .clear = GOLB_IS6, .set = 0 }, + { .so = '6', .lo = "ipv6", .clear = 0, .set = GOLB_IS6 } } ; int main (int argc, char const *const *argv) diff --git a/src/tipideed/cgi.c b/src/tipideed/cgi.c index d5daf56..14c6de8 100644 --- a/src/tipideed/cgi.c +++ b/src/tipideed/cgi.c @@ -320,7 +320,7 @@ static inline int do_cgi (tipidee_rql *rql, char const *docroot, char const *con else { if (!status) status = 200 ; - if (status != 304 && !tipidee_headers_search(&hdr, "Content-Type")) + if (status != 304 && (status < 400 || status > 599) && !tipidee_headers_search(&hdr, "Content-Type")) die502x(rql, 2, docroot, "cgi ", argv[0], " didn't output a ", "Content-Type", " header") ; } diff --git a/src/tipideed/stream.c b/src/tipideed/stream.c index 5d368b2..9774812 100644 --- a/src/tipideed/stream.c +++ b/src/tipideed/stream.c @@ -52,14 +52,14 @@ static ssize_t fixed_get (void *b) void cork (int fd) { static int const val = 1 ; - if (setsockopt(fd, SOL_TCP, TCP_CORK, &val, sizeof(int)) == -1 && g.logv) + if (!g.ssl && setsockopt(fd, SOL_TCP, TCP_CORK, &val, sizeof(int)) == -1 && g.logv) strerr_warnwu1sys("uncork stdout") ; } void uncork (int fd) { static int const val = 0 ; - if (setsockopt(fd, SOL_TCP, TCP_CORK, &val, sizeof(int)) == -1 && g.logv) + if (!g.ssl && setsockopt(fd, SOL_TCP, TCP_CORK, &val, sizeof(int)) == -1 && g.logv) strerr_warnwu1sys("uncork stdout") ; } |
