--- /dev/null
+/*
+ * Copyright (c) 2009 Christopher L. Mikkelson <chris@mikk.net>
+ * All Rights Reserved, for now.
+ */
+
+#include <string.h>
+#include <assert.h>
+#include <pcre.h>
+#include "msgproc.h"
+
+static const char *ctype_pat = "^Content-Type:.*?(^\\S|\\Z)";
+static pcre *ctype_re;
+static const char *cxfer_pat = "^Content-Tranfer-Encoding:.*?(^\\S|\\Z)";
+static pcre *cxfer_re;
+static const char *type_pat = "text/(plain|html)|(multipart)/";
+static pcre *type_re;
+static const char *enc_pat = "7bit|8bit|base64|quoted-printable";
+static pcre *enc_re;
+static const char *bound_pat = "boundary\\s*=\\s*\"?([\\da-z'()+,-./:=? \r\n])\"?";
+static pcre *bound_re;
+
+static void
+message_init(void)
+{
+ static int initialized = 0;
+ const char *etxt;
+ int epos;
+ int options = PCRE_MULTILINE | PCRE_DOTALL | PCRE_CASELESS;
+
+ if (initialized) return;
+ initialized = 1;
+
+ ctype_re = pcre_compile(ctype_pat, options, &etxt, &epos, 0);
+ cxfer_re = pcre_compile(cxfer_pat, options, &etxt, &epos, 0);
+ options = PCRE_CASELESS;
+ type_re = pcre_compile(type_pat, options, &etxt, &epos, 0);
+ enc_re = pcre_compile(enc_pat, options, &etxt, &epos, 0);
+ bound_re = pcre_compile(bound_pat, options, &etxt, &epos, 0);
+}
+
+static msgproc_module *text_module = &msgproc_text,
+ *html_module = &msgproc_html,
+ *multipart_module = &msgproc_multipart;
+
+#define HDRALLOC 512
+#define HDRINCR HDRALLOC
+
+struct message_state {
+ char *header;
+ int hdrlen, hdralloc;
+ enum {
+ START = 0,
+ CR, NL,
+ CR2,
+ BODY
+ } state;
+};
+
+static void
+message_start(msgproc *m)
+{
+ struct message_state *ms = malloc(sizeof(struct message_state));
+ if (ms) {
+ bzero(ms, sizeof(*ms));
+ ms->header = malloc(HDRALLOC);
+ if (!ms->header) {
+ free(ms);
+ ms = 0;
+ }
+ }
+ msgproc_setpriv(m, (void*)ms);
+}
+
+static void
+message_finish(msgproc *m)
+{
+ struct message_state *ms = msgproc_getpriv(m);
+ if (ms) {
+ if (ms->header) free(ms->header);
+ free(ms);
+ }
+ msgproc_free(m);
+}
+
+static inline char *
+append_hdr(char c, struct message_state *ms)
+{
+ if (!ms->header) return ms->header;
+ while (ms->header && ms->hdrlen >= ms->hdralloc) {
+ ms->hdralloc += HDRINCR;
+ ms->header = reallocf(ms->header, ms->hdralloc);
+ }
+ if (ms->header) {
+ ms->header[ms->hdrlen++] = c;
+ }
+ return ms->header;
+}
+
+static int
+parse_cxfer(msgproc *m, char *h, size_t hlen)
+{
+ int match, res = 1;
+ int ovec[30];
+ msgproc *next;
+
+ match = pcre_exec(enc_re, 0, h, hlen, 0, 0, ovec, 30);
+ if (match > 0) {
+ char *enc;
+ pcre_get_substring(h, ovec, match, 1, (const char **)&enc);
+ if (!enc) res = 0;
+ if (!strcasecmp(enc, "quoted-printable")) {
+ next = msgproc_create(m, &msgproc_quoted);
+ } else if (!strcasecmp(enc, "base64")) {
+ next = msgproc_create(m, &msgproc_base64);
+ } else {
+ res = 0;
+ }
+ free(enc);
+ } else {
+ res = 0;
+ }
+ return res;
+}
+
+static int
+parse_ctype(msgproc *m, char *h, size_t hlen)
+{
+ int match, res = 1;
+ int ovec[30];
+ msgproc *next, *parent;
+
+ next = msgproc_next(m);
+ if (next) {
+ parent = next;
+ } else {
+ parent = m;
+ }
+
+ match = pcre_exec(type_re, 0, h, hlen, 0, 0, ovec, 30);
+ if (match > 0) {
+ char *type;
+ pcre_get_substring(h, ovec, match, 1, (const char **)&type);
+ if (!type) res = 0;
+ if (!strcasecmp(type, "plain")) {
+ msgproc_create(parent, text_module);
+ } else if (!strcasecmp(type, "html")) {
+ msgproc_create(parent, html_module);
+ } else if (!strcasecmp(type, "multipart")) {
+ /* XXX -- unlike text/plain and text/html,
+ multipart parts cannot be encoded.
+ Remove any previously-established
+ encoding */
+ next = msgproc_next(m);
+ if (next) msgproc_finish(next);
+
+ match = pcre_exec(bound_re, 0, h, hlen, 0, 0, ovec, 30);
+ if (match > 0) {
+ char *b;
+ pcre_get_substring(h, ovec, match, 1, (const char **)&b);
+ next = msgproc_create(m, multipart_module);
+ msgproc_start(next);
+ msgproc_set(next, 1, b, strlen(b));
+ free(h);
+ res = 2;
+ }
+ } else {
+ res = 0;
+ }
+ free(type);
+ } else {
+ res = 0;
+ }
+ return res;
+}
+
+static void
+message_process(msgproc *m, char *buf, size_t len)
+{
+ struct message_state *ms = msgproc_getpriv(m);
+ char *s, *h;
+ int res, match, ovec[30];
+ msgproc *next = 0;
+
+ if (!ms) return;
+ if (!ms->header) return;
+
+ for (s = buf; len > 0; s++, len--) {
+ if (ms->state == BODY) {
+ next = msgproc_next(m);
+ if (next)
+ msgproc_process(next, s, len);
+ return;
+ }
+ append_hdr(*s, ms);
+ switch(ms->state) {
+ case START:
+ if (*s == '\r') ms->state = CR;
+ if (*s == '\n') ms->state = NL;
+ break;
+ case CR:
+ if (*s == '\n') ms->state = NL;
+ else ms->state = START;
+ break;
+ case NL:
+ if (*s == '\r') ms->state = CR2;
+ if (*s == '\n') ms->state = BODY;
+ break;
+ case CR2:
+ if (*s == '\n') ms->state = BODY;
+ else ms->state = START;
+ break;
+ case BODY:
+ break;
+ }
+ if (ms->state != BODY) continue;
+
+ /* parse saved headers, look for non-default Content-Type
+ and Content-Transfer-Encoding values. */
+
+ match = pcre_exec(cxfer_re, 0, ms->header, ms->hdrlen, 0, 0,
+ ovec, 30);
+ if (match > 0) {
+ pcre_get_substring(ms->header, ovec, match, 0, (const char **)&h);
+ res = parse_cxfer(m, h, strlen(h));
+ free(h);
+ if (!res) return;
+ }
+
+ match = pcre_exec(ctype_re, 0, ms->header, ms->hdrlen, 0, 0,
+ ovec, 30);
+ if (match > 0) {
+ pcre_get_substring(ms->header, ovec, match, 0, (const char **)&h);
+ res = parse_ctype(m, h, strlen(h));
+ free(h);
+ if (!res) return;
+ if (res == 2) continue; /* XXX -- avoid a double
+ start of multipart downstream,
+ which would clobber the
+ boundary value. Better fix
+ would be to include a state
+ in the core msgproc struct
+ to prevent double starts. */
+ }
+
+ next = msgproc_next(m);
+ assert(next);
+ msgproc_start(next);
+ }
+}
+
+msgproc_module msgproc_message = {
+ MSGPROC_MESSAGE, /* type */
+ message_init, /* module init */
+ NULL, /* set module parameter */
+ message_start, /* start module instance */
+ NULL, /* set module instance parameter */
+ message_process, /* process data */
+ message_finish, /* shut down, free module instance */
+ NULL /* shut down, free module */
+};
+
+msgproc_module msgproc_part = {
+ MSGPROC_PART, /* type */
+ message_init,
+ NULL, /* set module parameter */
+ message_start, /* start module instance */
+ NULL, /* set module instance parameter */
+ message_process, /* process data */
+ message_finish, /* shut down, free module instance */
+ NULL /* shut down, free module */
+};