aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorLaurent Bercot <ska-skaware@skarnet.org>2025-10-09 17:14:56 +0000
committerLaurent Bercot <ska-skaware@skarnet.org>2025-10-09 17:14:56 +0000
commit46f4d80c4c40559a383d001838d766b2b438f4f0 (patch)
treea1471d419df12d24edea840df64da269a656ca33 /src
parenteb56bacc8f5fff95cb752a7f8b3aefe8ed3f3d9a (diff)
downloadtipidee-46f4d80c4c40559a383d001838d766b2b438f4f0.tar.gz
Move string cmp operations to skalibs. Add cgit-nollmcrawler.
Diffstat (limited to 'src')
-rw-r--r--src/config/deps-exe/tipidee-config1
-rw-r--r--src/config/tipidee-config-internal.h7
-rw-r--r--src/config/util.c15
-rw-r--r--src/misc/cgit-nollmcrawler.c140
-rw-r--r--src/misc/deps-exe/cgit-nollmcrawler1
-rw-r--r--src/misc/tipidee-logaggregate.c17
-rw-r--r--src/tipideed/cgi.c2
-rw-r--r--src/tipideed/stream.c4
8 files changed, 154 insertions, 33 deletions
diff --git a/src/config/deps-exe/tipidee-config b/src/config/deps-exe/tipidee-config
index 2e844d8..55cf760 100644
--- a/src/config/deps-exe/tipidee-config
+++ b/src/config/deps-exe/tipidee-config
@@ -1,4 +1,3 @@
-util.o
node.o
repo.o
conftree.o
diff --git a/src/config/tipidee-config-internal.h b/src/config/tipidee-config-internal.h
index d3977c0..c9849f2 100644
--- a/src/config/tipidee-config-internal.h
+++ b/src/config/tipidee-config-internal.h
@@ -7,6 +7,7 @@
#include <string.h>
#include <stdlib.h>
+#include <skalibs/bytestr.h>
#include <skalibs/buffer.h>
#include <skalibs/strerr.h>
#include <skalibs/stralloc.h>
@@ -45,11 +46,7 @@ struct global_s
extern struct global_s g ;
-
- /* util */
-
-extern int keycmp (void const *, void const *) ; /* for any struct starting with a string key */
-#define BSEARCH(type, key, array) bsearch(key, (array), sizeof(array)/sizeof(type), sizeof(type), &keycmp)
+#define BSEARCH(type, key, array) bsearch(key, (array), sizeof(array)/sizeof(type), sizeof(type), &stringkey_bcmp)
/* node */
diff --git a/src/config/util.c b/src/config/util.c
deleted file mode 100644
index bee1503..0000000
--- a/src/config/util.c
+++ /dev/null
@@ -1,15 +0,0 @@
-/* ISC license. */
-
-#include <string.h>
-
-#include "tipidee-config-internal.h"
-
-struct starts_with_a_string_key_s
-{
- char const *s ;
-} ;
-
-int keycmp (void const *a, void const *b)
-{
- return strcmp((char const *)a, ((struct starts_with_a_string_key_s const *)b)->s) ;
-}
diff --git a/src/misc/cgit-nollmcrawler.c b/src/misc/cgit-nollmcrawler.c
new file mode 100644
index 0000000..71d31ae
--- /dev/null
+++ b/src/misc/cgit-nollmcrawler.c
@@ -0,0 +1,140 @@
+/* ISC license. */
+
+#include <string.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <regex.h>
+
+#include <skalibs/posixplz.h>
+#include <skalibs/bytestr.h>
+#include <skalibs/stat.h>
+#include <skalibs/prog.h>
+#include <skalibs/buffer.h>
+#include <skalibs/strerr.h>
+#include <skalibs/gol.h>
+#include <skalibs/fmtscan.h>
+#include <skalibs/exec.h>
+
+#define USAGE "as a CGI script: cgit-nollmcrawler [ -v verbosity ] [ -d pathinfodepth ] rulesdir badregex realcgit..."
+#define dieusage() strerr_dieusage(100, USAGE)
+
+enum golb_e
+{
+ GOLB_FORCE = 0x01
+} ;
+
+static gol_bool const rgolb[] =
+{
+ { .so = 'f', .lo = "force", .clear = 0, .set = GOLB_FORCE }
+} ;
+
+enum gola_e
+{
+ GOLA_VERBOSITY,
+ GOLA_DEPTH,
+ GOLA_N
+} ;
+
+static gol_arg const rgola[] =
+{
+ { .so = 'v', .lo = "verbosity", .i = GOLA_VERBOSITY },
+ { .so = 'd', .lo = "pathinfo-depth", .i = GOLA_DEPTH }
+} ;
+
+int main (int argc, char const *const *argv)
+{
+ unsigned int verbosity = 1 ;
+ char const *wgola[GOLA_N] = { 0 } ;
+ uint64_t wgolb = 0 ;
+ unsigned int golc ;
+ unsigned int depth = 1 ;
+ char const *remoteaddr ;
+ char const *x ;
+ size_t rdlen, m = 0 ;
+ char ip[16] ;
+ int is6 ;
+ PROG = "cgit-nollmcrawler" ;
+
+ golc = GOL_main(argc, argv, rgolb, rgola, &wgolb, wgola) ;
+ argc -= golc ; argv += golc ;
+ if (wgola[GOLA_VERBOSITY] && !uint0_scan(wgola[GOLA_VERBOSITY], &verbosity))
+ strerr_dief1x(100, "verbosity needs to be an unsigned integer") ;
+ if (wgola[GOLA_DEPTH] && !uint0_scan(wgola[GOLA_DEPTH], &depth))
+ strerr_dief1x(100, "pathinfo-depth needs to be an unsigned integer") ;
+ if (argc < 3) dieusage() ;
+
+ remoteaddr = getenv("REMOTE_ADDR") ;
+ if (ip6_scan(remoteaddr, ip)) is6 = 1 ;
+ else if (ip4_scan(remoteaddr, ip)) is6 = 0 ;
+ else strerr_dieinvalid(100, "REMOTE_ADDR") ;
+
+ rdlen = strlen(argv[0]) ;
+ char fn[rdlen + IP6_FMT + 16] ;
+
+ memcpy(fn + m, argv[0], rdlen) ; m += rdlen ;
+ memcpy(fn + m, "/ip", 3) ; m += 3 ;
+ fn[m++] = is6 ? '6' : '4' ;
+ fn[m++] = '/' ;
+ m += is6 ? ip6_fmt(fn + m, ip) : ip4_fmt(fn + m, ip) ;
+ fn[m++] = '_' ;
+ memcpy(fn + m, is6 ? "128" : "32", is6 ? 3 : 2) ; m += 2 + is6 ;
+ fn[m] = 0 ;
+ memcpy(fn + m, "/allow", 7) ;
+ if (access(fn, W_OK) == 0) goto allow ;
+ if (errno != ENOENT) strerr_diefu2sys(111, "access ", fn) ;
+ memcpy(fn + m + 1, "deny", 5) ;
+ if (access(fn, W_OK) == 0) goto deny ;
+ if (errno != ENOENT) strerr_diefu2sys(111, "access ", fn) ;
+ fn[m] = 0 ;
+
+ x = getenv("PATH_INFO") ;
+ if (!x) goto writeandallow ;
+ if (byte_count(x, strlen(x), '/') <= depth) goto writeandallow ;
+ x = getenv("QUERY_STRING") ;
+ if (!x) goto writeandallow ;
+
+ {
+ regex_t re ;
+ int e = regcomp(&re, argv[1], REG_EXTENDED) ;
+ if (e == REG_BADPAT) strerr_dief2x(100, "invalid regex: ", argv[1]) ;
+ if (e)
+ {
+ char fmt[INT_FMT] ;
+ fmt[int_fmt(fmt, e)] = 0 ;
+ strerr_diefu4x(111, "regcomp ", argv[1], ": error code is ", fmt) ;
+ }
+ e = regexec(&re, x, 0, 0, REG_NOSUB) ;
+ if (e == 0) goto writeanddeny ;
+ if (e != REG_NOMATCH)
+ {
+ char fmt[INT_FMT] ;
+ fmt[int_fmt(fmt, e)] = 0 ;
+ strerr_diefu4x(111, "regexec ", argv[1], ": error code is ", fmt) ;
+ }
+ // regfree(&re) ;
+ }
+
+ writeandallow:
+ if (symlink("../outputs/allow", fn) == -1)
+ {
+ if (errno != EEXIST) strerr_diefu4sys(111, "symlink ", "../outputs/allow", " to ", fn) ;
+ }
+ if (verbosity >= 2)
+ {
+ x = getenv("REQUEST_URI") ;
+ strerr_warni("allowing ", remoteaddr, " requesting ", x) ;
+ }
+ allow:
+ exec(argv + 2) ;
+ strerr_dieexec(111, argv[2]) ;
+
+ writeanddeny:
+ if (symlink("../outputs/deny", fn) == -1)
+ {
+ if (errno != EEXIST) strerr_diefu4sys(111, "symlink ", "../outputs/deny", " to ", fn) ;
+ }
+ deny:
+ buffer_putsflush(buffer_1small, "Status: 403 Go fuck yourself, crawler\nContent-Length: 0\n\n") ;
+ _exit(0) ;
+}
diff --git a/src/misc/deps-exe/cgit-nollmcrawler b/src/misc/deps-exe/cgit-nollmcrawler
new file mode 100644
index 0000000..e7187fe
--- /dev/null
+++ b/src/misc/deps-exe/cgit-nollmcrawler
@@ -0,0 +1 @@
+-lskarnet
diff --git a/src/misc/tipidee-logaggregate.c b/src/misc/tipidee-logaggregate.c
index 83cda5e..7b61579 100644
--- a/src/misc/tipidee-logaggregate.c
+++ b/src/misc/tipidee-logaggregate.c
@@ -29,8 +29,7 @@
enum golb_e
{
- GOLB_IS6,
- GOLB_N
+ GOLB_IS6 = 0x01
} ;
static uint64_t wgolb = 0 ;
@@ -102,7 +101,7 @@ static void pidip_add (pid_t pid, char const *ip)
if (!gensetdyn_new(&pidip_list, &d)) dienomem() ;
pidip *p = GENSETDYN_P(pidip, &pidip_list, d) ;
p->pid = pid ;
- memcpy(p->ip, ip, wgolb & (1 << GOLB_IS6) ? 16 : 4) ;
+ memcpy(p->ip, ip, wgolb & GOLB_IS6 ? 16 : 4) ;
if (!avltree_insert(&pidip_map, d)) dienomem() ;
}
@@ -127,7 +126,7 @@ static void *ipinfo_dtok (uint32_t d, void *data)
static int ip_cmp (void const *a, void const *b, void *data)
{
(void)data ;
- return memcmp(a, b, wgolb & (1 << GOLB_IS6) ? 16 : 4) ;
+ return memcmp(a, b, wgolb & GOLB_IS6 ? 16 : 4) ;
}
@@ -164,7 +163,7 @@ static void add_request (pid_t pid, char const *host, char const *path, char con
if (!avltree_search(&ipinfo_map, p->ip, &d))
{
ipinfo i = IPINFO_ZERO ;
- memcpy(i.ip, p->ip, wgolb & (1 << GOLB_IS6) ? 16 : 4) ;
+ memcpy(i.ip, p->ip, wgolb & GOLB_IS6 ? 16 : 4) ;
d = genalloc_len(ipinfo, &ipinfo_list) ;
if (!genalloc_append(ipinfo, &ipinfo_list, &i)) dienomem() ;
if (!avltree_insert(&ipinfo_map, d)) dienomem() ;
@@ -298,7 +297,7 @@ static void parse_start (pid_t pid, char const *host, char *s)
{
char fmtline[UINT32_FMT] ;
fmtline[uint32_fmt(fmtline, line)] = 0 ;
- strerr_warnw("line ", fmtline, ": invalid ipv6") ;
+ strerr_warnw("line ", fmtline, ": invalid ipv6: ", s) ;
return ;
}
}
@@ -308,7 +307,7 @@ static void parse_start (pid_t pid, char const *host, char *s)
{
char fmtline[UINT32_FMT] ;
fmtline[uint32_fmt(fmtline, line)] = 0 ;
- strerr_warnw("line ", fmtline, ": invalid ipv6") ;
+ strerr_warnw("line ", fmtline, ": invalid ipv4") ;
return ;
}
}
@@ -376,8 +375,8 @@ static int print_iter (uint32_t d, unsigned int h, void *data)
static gol_bool const rgolb[2] =
{
- { .so = '4', .lo = "ipv4", .clear = 1 << GOLB_IS6, .set = 0 },
- { .so = '6', .lo = "ipv6", .clear = 0, .set = 1 << GOLB_IS6 }
+ { .so = '4', .lo = "ipv4", .clear = GOLB_IS6, .set = 0 },
+ { .so = '6', .lo = "ipv6", .clear = 0, .set = GOLB_IS6 }
} ;
int main (int argc, char const *const *argv)
diff --git a/src/tipideed/cgi.c b/src/tipideed/cgi.c
index d5daf56..14c6de8 100644
--- a/src/tipideed/cgi.c
+++ b/src/tipideed/cgi.c
@@ -320,7 +320,7 @@ static inline int do_cgi (tipidee_rql *rql, char const *docroot, char const *con
else
{
if (!status) status = 200 ;
- if (status != 304 && !tipidee_headers_search(&hdr, "Content-Type"))
+ if (status != 304 && (status < 400 || status > 599) && !tipidee_headers_search(&hdr, "Content-Type"))
die502x(rql, 2, docroot, "cgi ", argv[0], " didn't output a ", "Content-Type", " header") ;
}
diff --git a/src/tipideed/stream.c b/src/tipideed/stream.c
index 5d368b2..9774812 100644
--- a/src/tipideed/stream.c
+++ b/src/tipideed/stream.c
@@ -52,14 +52,14 @@ static ssize_t fixed_get (void *b)
void cork (int fd)
{
static int const val = 1 ;
- if (setsockopt(fd, SOL_TCP, TCP_CORK, &val, sizeof(int)) == -1 && g.logv)
+ if (!g.ssl && setsockopt(fd, SOL_TCP, TCP_CORK, &val, sizeof(int)) == -1 && g.logv)
strerr_warnwu1sys("uncork stdout") ;
}
void uncork (int fd)
{
static int const val = 0 ;
- if (setsockopt(fd, SOL_TCP, TCP_CORK, &val, sizeof(int)) == -1 && g.logv)
+ if (!g.ssl && setsockopt(fd, SOL_TCP, TCP_CORK, &val, sizeof(int)) == -1 && g.logv)
strerr_warnwu1sys("uncork stdout") ;
}