--- /dev/null
+/*
+ * Copyright (c) 2009 Christopher L. Mikkelson <chris@mikk.net>
+ * All Rights Reserved, for now.
+ */
+
+/* Adapted from phk@freebsd.dk's public domain implementation. */
+
+#include <string.h>
+#include "decoders.h"
+
+static const char *b64c =
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
+static char i64[256];
+
+void
+base64_init(void)
+{
+ int i;
+ const char *p;
+ for (i = 0; i < 256; i++)
+ i64[i] = -1;
+ for (p = b64c, i = 0; *p; p++, i++)
+ i64[(unsigned int)*p] = i;
+ i64['='] = 0;
+}
+
+struct b64_state *
+b64_start (struct b64_state *b64s)
+{
+ bzero(b64s, sizeof(*b64s));
+ return b64s;
+}
+
+void
+b64_stop (struct b64_state *b64s)
+{
+ bzero(b64s, sizeof(*b64s));
+}
+
+int
+b64_decode(struct b64_state *b64s, char *s, int len,
+ char *out, int size, int *outlen)
+{
+ char *p;
+ unsigned u, v, l;
+ *outlen = 0;
+
+ l = 0;
+ u = b64s->u;
+ v = b64s->v;
+ b64s->u = b64s->v = 0;
+
+ for (p = s; p < s + len;) {
+ if (*p == '\r' || *p == '\n') continue;
+ if (size < 3) return (int)(p - s);
+ for (; v < 4; v++) {
+ if (p == s + len) {
+ b64s->u = u;
+ b64s->v = v;
+ return len;
+ }
+ l = i64[(unsigned int)*p++];
+ if (l < 0) return -1;
+ u <<= 6;
+ u |= l;
+ }
+ for (v = 0; v < 3; v++) {
+ /* XXX: out must have space for decoded
+ '=' padding. For (valid) encoded
+ text, strlen() will get the true
+ length of output. For binary, this
+ won't work. */
+ *out++ = (u >> 16) & 0xff;
+ (*outlen)++;
+ size --;
+ u <<= 8;
+ }
+ v = 0;
+ }
+ return p - s;
+}
+
+#ifdef _UNIT_TEST
+#include <stdio.h>
+
+int main(void) {
+ char *intext = "aGVsbG8sIHdvcmxkCg==";
+ char *s, out[80];
+ int outlen, i;
+ struct b64_state b64s;
+
+ base64_init();
+
+ b64_start(&b64s);
+ for (s = intext; *s; s += 4) {
+ b64_decode(&b64s, s, 4, out, sizeof(out), &outlen);
+ if (outlen)
+ for (i = 0; i < outlen; i++) putchar(out[i]);
+ }
+ b64_stop(&b64s);
+ return 0;
+}
+#endif
--- /dev/null
+/*
+ * Copyright (c) 2009 Christopher L. Mikkelson <chris@mikk.net>
+ * All Rights Reserved, for now.
+ */
+
+/* Base64 decoder */
+void base64_init(void);
+
+struct b64_state {
+ unsigned u;
+ int v;
+};
+
+struct b64_state *b64_start(struct b64_state *);
+void b64_end(struct b64_state *);
+int b64_decode(struct b64_state *, char *, int, char *, int, int *);
+
+/* Quoted-printable decoder */
+
+struct qp_state {
+ unsigned char c;
+ int state;
+};
+
+struct qp_state *qp_start(struct qp_state *);
+void qp_stop(struct qp_state *);
+int qp_decode(struct qp_state *, char *, int, char *, int, int *);
--- /dev/null
+/*
+ * Copyright (c) 2009 Christopher L. Mikkelson <chris@mikk.net>
+ * All Rights Reserved, for now.
+ */
+
+#include "msgproc.h"
+
+struct msgproc_stage *
+mps_alloc(struct msgproc_stage *parent)
+{
+ struct msgproc_stage *child = malloc(sizeof(struct msgproc_stage));
+ if (parent) parent->next = child;
+ if (child) bzero(child, sizeof(*child));
+ return child;
+}
+
+/*
+ void finish_type(struct msgproc_stage *m)
+ {
+ ...clean up
+ free(m->state);
+ mps_free(m);
+ }
+*/
+
+void
+mps_free(struct msgproc_stage *m)
+{
+ mps_finish_next(m);
+ if (m->prev) m->prev->next = 0;
+ free(m);
+}
+
+/*
+ void process_decoder_type(struct msgproc_stage *m, char *buf, int len)
+ {
+ char tmp[BUFSIZ]; int tmplen;
+ ... process buf/len into tmp/tmplen
+ mps_process_next(m,tmp,tmplen);
+ }
+
+ void process_multipart(struct msgproc_tage *m, char *buf, int len) {
+ char tmp[BUFSIZ]; int tmplen;
+ /* note; this will automatically skip over plain
+ text prior to boundary */
+ do {
+ while (!boundary && remaining input) {
+ copy stuff into tmp
+ mps_process_next(m, tmp, tmplen);
+ }
+ if (boundary)
+ mps_finish_next(m);
+ tmplen = 0;
+ start_multipart_chunk(m);
+ }
+ } while input remains
+ }
+*/
+
+void
+mps_process_next(struct msgproc_stage *m, char *buf, int len)
+{
+ if (m->next && m->next->process) {
+ m->next->process(m,buf,len);
+ }
+}
+
+void
+mps_finish_next(struct msgproc_stage *m, char *buf, int len)
+{
+ if (m->next && m->next->finish) {
+ m->next->finish(m);
+ }
+}
--- /dev/null
+/*
+ * Copyright (c) 2009 Christopher L. Mikkelson <chris@mikk.net>
+ * All Rights Reserved, for now.
+ */
+
+struct msgproc_stage;
+
+struct msgproc_base {
+ void (*callback)();
+ void *call_data;
+ struct msgproc_stage *msgproc;
+};
+
+struct msgproc_stage {
+ int type;
+ void *state;
+ void (*process)(struct msgproc_stage *, char *, int);
+ void (*finish)(struct msgproc_stage *); /* finish frees its argument */
+ struct msgproc_base *base;
+ struct msgproc_stage *prev, *next;
+};
+
+/* <NYI> main interface to the user. could wrap _start further... */
+struct msgproc_base *msgproc_init(void);
+void msgproc_start(struct msgproc_base *, struct msgproc_stage *);
+void msgproc_process(struct msgproc_base *, char *, int);
+void msgproc_finish(struct msgproc_base *);
+/* </NYI> */
+
+struct msgproc_stage *mps_alloc(struct msgproc_stage *);
+void mps_free(struct msgproc_stage *);
+void mps_process_next(struct msgproc_stage *, char *, int);
+void mps_finish_next(struct msgproc_stage *);
--- /dev/null
+/*
+ * Copyright (c) 2009 Christopher L. Mikkelson <chris@mikk.net>
+ * All Rights Reserved, for now.
+ */
+
+#include "msgproc.h"
+#include "parser.h"
+
+static inline struct msgproc_stage *
+init_mps(struct msgproc_stage *parent,
+ void (*process)(struct msgproc_stage *, char *, int),
+ void (*finish)(struct msgproc_stage *))
+{
+ struct msgproc_stage *child = mps_alloc(parent);
+ if (child) {
+ if (parent) child->base = parent->base;
+ child->process = process;
+ child->finish = finish;
+ }
+ return child;
+}
+
+struct msgproc_stage *
+start_text_parser(struct msgproc_stage *parent)
+{
+ return init_mps(parent, process_text, finish_text);
+}
+
+struct msgproc_stage *
+start_html_parser(struct msgproc_stage *parent)
+{
+ return init_mps(parent, process_html, finish_html);
+}
+
+struct msgproc_stage *
+start_base64_decoder(struct msgproc_stage *parent)
+{
+ return init_mps(parent, process_base64, finish_base64);
+}
+
+struct msgproc_stage *
+start_quoted_decoder(struct msgproc_stage *parent)
+{
+ return init_mps(parent, process_quoted, finish_quoted);
+}
+
+struct msgproc_stage *
+start_multipart_parser(struct msgproc_stage *parent)
+{
+ return init_mps(parent, process_multipart, finish_multipart);
+}
+
+struct msgproc_stage *
+start_rfc822_parser(struct msgproc_stage *parent)
+{
+ struct msgproc_stage *m = init_mps(parent, process_chunk, finish_chunk);
+ /* TBD: use the integer types */
+ set_contenttype(m, "text/plain");
+ set_encoding(m, "7bit");
+ return m;
+}
+
+struct msgproc_stage *
+start_chunk_parser(struct msgproc_stage *parent)
+{
+ return init_mps(parent, process_chunk, finish_chunk);
+}
+
+/* these should probably be static inlines in .c file */
+void set_boundary(struct msgproc_stage *);
+void set_contenttype(struct msgproc_stage *);
+void set_encoding(struct msgproc_stage *);
+void set_disposition(struct msgproc_stage *);
+
+void process_text(struct msgproc_stage *, char *, int);
+void process_html(struct msgproc_stage *, char *, int);
+void process_base64(struct msgproc_stage *, char *, int);
+void process_quoted(struct msgproc_stage *, char *, int);
+void process_multipart(struct msgproc_stage *, char *, int);
+/* used for rfc822 complete messages and MIME parts; former
+ has defaults for content-type (text/plain) and encoding (7bit) */
+void process_message_chunk(struct msgproc_stage *, char *, int);
+
+struct msgproc_stage *finish_text_parser(struct msgproc_stage*);
+struct msgproc_stage *finish_html_parser(struct msgproc_stage*);
+struct msgproc_stage *finish_base64_decoder(struct msgproc_stage*);
+struct msgproc_stage *finish_quoted_decoder(struct msgproc_stage*);
+struct msgproc_stage *finish_multipart_parser(struct msgproc_stage*);
+struct msgproc_stage *finish_rfc822_parser(struct msgproc_stage*);
--- /dev/null
+/*
+ * Copyright (c) 2009 Christopher L. Mikkelson <chris@mikk.net>
+ * All Rights Reserved, for now.
+ */
+
+#define PARSE_TEXT 0
+#define PARSE_HTML 1
+#define DECODE_BASE64 2
+#define DECODE_QUOTED 3
+#define PARSE_MULTIPART 4
+#define PARSE_RFC822 5
+
+struct msgproc_stage *start_text_parser(struct msgproc_stage *);
+struct msgproc_stage *start_html_parser(struct msgproc_stage*);
+struct msgproc_stage *start_base64_decoder(struct msgproc_stage*);
+struct msgproc_stage *start_quoted_decoder(struct msgproc_stage*);
+struct msgproc_stage *start_multipart_parser(struct msgproc_stage*);
+struct msgproc_stage *start_rfc822_parser(struct msgproc_stage*);
+
+/* these should probably be static inlines in .c file */
+void set_boundary(struct msgproc_stage *);
+void set_contenttype(struct msgproc_stage *);
+void set_encoding(struct msgproc_stage *);
+void set_disposition(struct msgproc_stage *);
+
+void process_text(struct msgproc_stage *, char *, int);
+void process_html(struct msgproc_stage *, char *, int);
+void process_base64(struct msgproc_stage *, char *, int);
+void process_quoted(struct msgproc_stage *, char *, int);
+void process_multipart(struct msgproc_stage *, char *, int);
+/* used for rfc822 complete messages and MIME parts; former
+ has defaults for content-type (text/plain) and encoding (7bit) */
+void process_message_chunk(struct msgproc_stage *, char *, int);
+
+struct msgproc_stage *finish_text_parser(struct msgproc_stage*);
+struct msgproc_stage *finish_html_parser(struct msgproc_stage*);
+struct msgproc_stage *finish_base64_decoder(struct msgproc_stage*);
+struct msgproc_stage *finish_quoted_decoder(struct msgproc_stage*);
+struct msgproc_stage *finish_multipart_parser(struct msgproc_stage*);
+struct msgproc_stage *finish_rfc822_parser(struct msgproc_stage*);
--- /dev/null
+/*
+ * Copyright (c) 2009 Christopher L. Mikkelson <chris@mikk.net>
+ * All Rights Reserved, for now.
+ */
+
+#include <string.h>
+#include "decoders.h"
+
+struct qp_state *
+qp_start(struct qp_state *qps)
+{
+ bzero(qps, sizeof(*qps));
+}
+
+void
+qp_stop(struct qp_state *qps)
+{
+ qp_start(qps);
+}
+
+static inline char
+hexval(char c) {
+ if (c >= '0' && c <= '9') return c - '0';
+ if (c >= 'a' && c <= 'f') return c - 'a' + 10;
+ if (c >= 'A' && c <= 'F') return c - 'A' + 10;
+ return -1;
+}
+
+int
+qp_decode(struct qp_state *qps, char *s, int len,
+ char *out, int size, int *olen)
+{
+ char c;
+ int ret;
+
+ for (*olen = 0; len > 0 && size > 0; s++, len--, ret++) {
+ switch(qps->state) {
+ case 0:
+ if (*s == '=') {
+ qps->state = 1;
+ }
+ else {
+ *out++ = *s;
+ (*olen)++;
+ size --;
+ }
+ break;
+ case 1:
+ if (*s == '\r') {
+ qps->state = 2;
+ break;
+ }
+ if (*s == '\n') {
+ qps->state = 0;
+ break;
+ }
+ c = hexval(*s);
+ if (c < 0) {
+ qps->state = 4;
+ break;
+ }
+ qps->c = c;
+ qps->state = 3;
+ break;
+ case 2:
+ if (*s == '\n') qps->state = 0;
+ else {
+ qps->state = 4;
+ break;
+ }
+ break;
+ case 3:
+ c = hexval(*s);
+ if (c < 0) {
+ qps->state = 4;
+ break;
+ }
+ *out++ = (qps->c << 4) | c;
+ (*olen)++;
+ qps->state = 0;
+ break;
+ case 4: /* error state, find next character which
+ is not '=' and start processing there */
+ if (*s != '=') qps->state = 0;
+ break;
+ }
+ }
+ return ret;
+}
+
+#ifdef _UNIT_TEST
+#include <unistd.h>
+char *chunks[] = {
+ "the quick=20brown=\r",
+ "\nfox=3d jumped over\r\n",
+ 0
+};
+int main(void) {
+ struct qp_state qps;
+ char out[80];
+ int ret, off = 0, outinc;
+ char **c;
+
+ qp_start(&qps);
+ for (c=chunks; *c; c++) {
+ ret = qp_decode(&qps, *c, strlen(*c), out + off, sizeof(out) - off, &outinc);
+ off += outinc;
+ }
+
+ write(1,out,off);
+ return 0;
+}
+#endif
--- /dev/null
+/*
+ * Copyright (c) 2009 Christopher L. Mikkelson <chris@mikk.net>
+ * All Rights Reserved, for now.
+ */
+/* Stream-based regular expression matching implementation */
+
+#include <pcre.h>
+#include "re_stream.h"
+
+/* TODO: make these runtime tunables, add upper bound for allocation. */
+#ifndef RALLOC_INIT
+#define RALLOC_INIT 1024
+#endif
+#ifndef RALLOC_INCR
+#define RALLOC_INCR RALLOC_INIT
+#endif
+
+struct stream_re *
+re_stream_start(struct stream_re *sre, pcre *re, int flags)
+{
+ sre->re = re;
+ sre->rlen = 0;
+ sre->flags = flags & ~(PCRE_ANCHORED|PCRE_NOTBOL|PCRE_NOTEOL);
+ sre->state = 0;
+ if (!sre->result) {
+ sre->result = malloc(RALLOC_INIT);
+ if (!sre->result) return NULL;
+ sre->ralloc = RALLOC_INIT;
+ sre->result[0] = 0;
+ }
+ return sre;
+}
+
+void
+re_stream_stop(struct stream_re *sre)
+{
+ re_stream_start(sre, sre->re, sre->flags);
+ if (sre->result) {
+ free(sre->result);
+ }
+ bzero(sre, sizeof(*sre));
+}
+
+/* caller must make copy of result if they wish to use
+ it. Future calls may alter it. */
+char *
+re_stream_getresult(struct stream_re *sre)
+{
+ return sre->result;
+}
+
+/* returns 0 => no match, -1 => partial match, 1 => complete match */
+int
+re_stream_result(struct stream_re *sre)
+{
+ return sre->state;
+}
+
+/* TODO: rewrite below for above interface. Much simpler to use. */
+int
+re_stream_exec(struct stream_re *sre, char *sub, int slen)
+{
+ int match, flags, newlen, end = slen;
+
+ if (sre->state == -1) {
+ flags = PCRE_PARTIAL | PCRE_DFA_RESTART;
+ sre->state = 1;
+ } else {
+ flags = PCRE_PARTIAL;
+ sre->state = 0;
+ }
+
+ match = pcre_dfa_exec(sre->re, 0, sub, slen, 0, sre->flags | flags,
+ sre->ovec, sizeof(sre->ovec),
+ sre->wspace, sizeof(sre->wspace));
+
+
+ if (match > 0 || match == PCRE_ERROR_PARTIAL) {
+ end = sre->ovec[1];
+ sre->state = 1;
+ newlen = sre->rlen + sre->ovec[1] - sre->ovec[0];
+ if (newlen >= sre->ralloc) {
+ sre->ralloc += RALLOC_INIT;
+ sre->result = reallocf(sre->result, sre->ralloc);
+ if (!sre->result) return -1;
+ }
+ pcre_copy_substring(sub, sre->ovec, sizeof(sre->ovec),
+ 0, sre->result + sre->rlen,
+ sre->ralloc - sre->rlen);
+ sre->rlen = newlen;
+
+ if (sre->ovec[1] == slen && match == PCRE_ERROR_PARTIAL)
+ sre->state = -1;
+ }
+ return end;
+}
+
+#ifdef _UNIT_TEST
+#include <stdio.h>
+
+char *pat = "asdf+g";
+
+char *chunks[] = {"this is a",
+ "sd",
+ "ffffff",
+ "ghij",
+ 0};
+
+int main(void) {
+ char **s = chunks;
+ const char *etxt;
+ int epos;
+ pcre *re;
+ struct stream_re sr;
+ int r;
+
+ re = pcre_compile(pat, 0, &etxt, &epos, 0);
+ re_stream_start(&sr,re,0);
+
+ for (s = chunks; *s; s++) {
+ int off = 0;
+ do {
+ r = re_stream_exec(&sr, *s + off, strlen(*s) - off);
+ printf("input = %s, processed %d, m=%d,"
+ " current match %s\n", *s + off, r,
+ re_stream_result(&sr),
+ re_stream_getresult(&sr));
+ off += r;
+ } while (off < strlen(*s));
+ }
+ puts(re_stream_getresult(&sr));
+ re_stream_stop(&sr);
+
+ return 0;
+}
+
+#endif
--- /dev/null
+/*
+ * Copyright (c) 2009 Christopher L. Mikkelson <chris@mikk.net>
+ * All Rights Reserved, for now.
+ */
+
+/* Stream-based regexp matching layer */
+struct stream_re {
+ pcre *re;
+ int rlen, ralloc, flags, state;
+ char *result;
+ int ovec[30], wspace[50];
+};
+
+struct stream_re *re_stream_start(struct stream_re *, pcre *, int);
+struct stream_re *re_stream_stop(struct stream_re *);
+struct stream_re *re_stream_result(struct stream_re *);
+struct stream_re *re_stream_getresult(struct stream_re *);