From 99b9f4b618704a0b22eba903f9510f16b666a511 Mon Sep 17 00:00:00 2001 From: chris mikkelson Date: Tue, 9 Mar 2010 23:11:27 -0600 Subject: [PATCH] Mostly complete and working msgproc_message module. --- message.c | 130 +++++++++++++++++++++++++++++++++--------------------- 1 file changed, 80 insertions(+), 50 deletions(-) diff --git a/message.c b/message.c index be32fec..7e28acc 100644 --- a/message.c +++ b/message.c @@ -4,19 +4,20 @@ */ #include +#include #include #include #include "msgproc.h" -static const char *ctype_pat = "^Content-Type:.*?(^\\S|\\Z)"; +static const char *ctype_pat = "^Content-Type:.*?(?=^\\S|\\Z)"; static pcre *ctype_re; -static const char *cxfer_pat = "^Content-Tranfer-Encoding:.*?(^\\S|\\Z)"; +static const char *cxfer_pat = "^Content-Transfer-Encoding:.*?(?=^\\S|\\Z)"; static pcre *cxfer_re; static const char *type_pat = "text/(plain|html)|(multipart)/"; static pcre *type_re; static const char *enc_pat = "7bit|8bit|base64|quoted-printable"; static pcre *enc_re; -static const char *bound_pat = "boundary\\s*=\\s*\"?([\\da-z'()+,-./:=? \r\n])\"?"; +static const char *bound_pat = "boundary\\s*=\\s*\"?([\\da-z'()+,-./:=?_ \r\n]+)\"?"; static pcre *bound_re; static void @@ -40,7 +41,27 @@ message_init(void) static msgproc_module *text_module = &msgproc_text, *html_module = &msgproc_html, - *multipart_module = &msgproc_multipart; + *multipart_module = &msgproc_multipart, + *quoted_module = &msgproc_quoted, + *base64_module = &msgproc_base64; + +#define MESSAGE_MODULE_TEXT 0 +#define MESSAGE_MODULE_HTML 1 +#define MESSAGE_MODULE_MULTIPART 2 +#define MESSAGE_MODULE_QUOTED 3 +#define MESSAGE_MODULE_BASE64 4 + +static void +message_module_set(int type, void *data, size_t size) +{ + switch(type) { + case MESSAGE_MODULE_TEXT: text_module = data; break; + case MESSAGE_MODULE_HTML: html_module = data; break; + case MESSAGE_MODULE_MULTIPART: multipart_module = data; break; + case MESSAGE_MODULE_QUOTED: quoted_module = data; break; + case MESSAGE_MODULE_BASE64: base64_module = data; break; + } +} #define HDRALLOC 512 #define HDRINCR HDRALLOC @@ -48,6 +69,8 @@ static msgproc_module *text_module = &msgproc_text, struct message_state { char *header; int hdrlen, hdralloc; + msgproc_module *type, *encoding; + char *boundary; enum { START = 0, CR, NL, @@ -67,6 +90,7 @@ message_start(msgproc *m) free(ms); ms = 0; } + ms->type = text_module; } msgproc_setpriv(m, (void*)ms); } @@ -97,21 +121,20 @@ append_hdr(char c, struct message_state *ms) } static int -parse_cxfer(msgproc *m, char *h, size_t hlen) +parse_cxfer(struct message_state *ms, char *h, size_t hlen) { int match, res = 1; int ovec[30]; - msgproc *next; match = pcre_exec(enc_re, 0, h, hlen, 0, 0, ovec, 30); if (match > 0) { char *enc; - pcre_get_substring(h, ovec, match, 1, (const char **)&enc); + pcre_get_substring(h, ovec, match, 0, (const char **)&enc); if (!enc) res = 0; if (!strcasecmp(enc, "quoted-printable")) { - next = msgproc_create(m, &msgproc_quoted); + ms->encoding = quoted_module; } else if (!strcasecmp(enc, "base64")) { - next = msgproc_create(m, &msgproc_base64); + ms->encoding = base64_module; } else { res = 0; } @@ -122,46 +145,46 @@ parse_cxfer(msgproc *m, char *h, size_t hlen) return res; } +static inline void +unfold(char *s) { + char *t = s; + do { + if (*t == '\r') t++; + if (*t == '\n') { + t++; + if (*t && isspace(*t)) t++; + } + } while ((*s++ = *t++)); +} + static int -parse_ctype(msgproc *m, char *h, size_t hlen) +parse_ctype(struct message_state *ms, char *h, size_t hlen) { int match, res = 1; int ovec[30]; - msgproc *next, *parent; - - next = msgproc_next(m); - if (next) { - parent = next; - } else { - parent = m; - } match = pcre_exec(type_re, 0, h, hlen, 0, 0, ovec, 30); if (match > 0) { char *type; - pcre_get_substring(h, ovec, match, 1, (const char **)&type); + pcre_get_substring(h, ovec, match, match - 1, (const char **)&type); if (!type) res = 0; if (!strcasecmp(type, "plain")) { - msgproc_create(parent, text_module); + ms->type = text_module; } else if (!strcasecmp(type, "html")) { - msgproc_create(parent, html_module); + ms->type = html_module; } else if (!strcasecmp(type, "multipart")) { - /* XXX -- unlike text/plain and text/html, - multipart parts cannot be encoded. - Remove any previously-established - encoding */ - next = msgproc_next(m); - if (next) msgproc_finish(next); - + if (ms->encoding) { + free(type); + return 0; + } + ms->type = multipart_module; match = pcre_exec(bound_re, 0, h, hlen, 0, 0, ovec, 30); if (match > 0) { char *b; - pcre_get_substring(h, ovec, match, 1, (const char **)&b); - next = msgproc_create(m, multipart_module); - msgproc_start(next); - msgproc_set(next, 1, b, strlen(b)); - free(h); - res = 2; + pcre_get_substring(h, ovec, match, 1, + (const char **)&b); + unfold(b); + ms->boundary = b; } } else { res = 0; @@ -179,7 +202,7 @@ message_process(msgproc *m, char *buf, size_t len) struct message_state *ms = msgproc_getpriv(m); char *s, *h; int res, match, ovec[30]; - msgproc *next = 0; + msgproc *enc, *type, *next; if (!ms) return; if (!ms->header) return; @@ -203,7 +226,8 @@ message_process(msgproc *m, char *buf, size_t len) break; case NL: if (*s == '\r') ms->state = CR2; - if (*s == '\n') ms->state = BODY; + else if (*s == '\n') ms->state = BODY; + else ms->state = START; break; case CR2: if (*s == '\n') ms->state = BODY; @@ -221,7 +245,7 @@ message_process(msgproc *m, char *buf, size_t len) ovec, 30); if (match > 0) { pcre_get_substring(ms->header, ovec, match, 0, (const char **)&h); - res = parse_cxfer(m, h, strlen(h)); + res = parse_cxfer(ms, h, strlen(h)); free(h); if (!res) return; } @@ -230,28 +254,34 @@ message_process(msgproc *m, char *buf, size_t len) ovec, 30); if (match > 0) { pcre_get_substring(ms->header, ovec, match, 0, (const char **)&h); - res = parse_ctype(m, h, strlen(h)); + res = parse_ctype(ms, h, strlen(h)); free(h); if (!res) return; - if (res == 2) continue; /* XXX -- avoid a double - start of multipart downstream, - which would clobber the - boundary value. Better fix - would be to include a state - in the core msgproc struct - to prevent double starts. */ } - next = msgproc_next(m); - assert(next); - msgproc_start(next); + if (ms->encoding) { + enc = msgproc_create(m, ms->encoding); + msgproc_start(enc); + } else { + enc = m; + } + + type = msgproc_create(enc, ms->type); + if (ms->type->mpm_type == MSGPROC_MULTIPART) { + assert(ms->boundary); + msgproc_start(type); + msgproc_set(type, 1, ms->boundary, strlen(ms->boundary)); + free(ms->boundary); + } else { + msgproc_start(type); + } } } msgproc_module msgproc_message = { MSGPROC_MESSAGE, /* type */ message_init, /* module init */ - NULL, /* set module parameter */ + message_module_set, /* set module parameter */ message_start, /* start module instance */ NULL, /* set module instance parameter */ message_process, /* process data */ @@ -262,7 +292,7 @@ msgproc_module msgproc_message = { msgproc_module msgproc_part = { MSGPROC_PART, /* type */ message_init, - NULL, /* set module parameter */ + message_module_set, /* set module parameter */ message_start, /* start module instance */ NULL, /* set module instance parameter */ message_process, /* process data */ -- 2.50.1