*/
#include <string.h>
+#include <ctype.h>
#include <assert.h>
#include <pcre.h>
#include "msgproc.h"
-static const char *ctype_pat = "^Content-Type:.*?(^\\S|\\Z)";
+static const char *ctype_pat = "^Content-Type:.*?(?=^\\S|\\Z)";
static pcre *ctype_re;
-static const char *cxfer_pat = "^Content-Tranfer-Encoding:.*?(^\\S|\\Z)";
+static const char *cxfer_pat = "^Content-Transfer-Encoding:.*?(?=^\\S|\\Z)";
static pcre *cxfer_re;
static const char *type_pat = "text/(plain|html)|(multipart)/";
static pcre *type_re;
static const char *enc_pat = "7bit|8bit|base64|quoted-printable";
static pcre *enc_re;
-static const char *bound_pat = "boundary\\s*=\\s*\"?([\\da-z'()+,-./:=? \r\n])\"?";
+static const char *bound_pat = "boundary\\s*=\\s*\"?([\\da-z'()+,-./:=?_ \r\n]+)\"?";
static pcre *bound_re;
static void
static msgproc_module *text_module = &msgproc_text,
*html_module = &msgproc_html,
- *multipart_module = &msgproc_multipart;
+ *multipart_module = &msgproc_multipart,
+ *quoted_module = &msgproc_quoted,
+ *base64_module = &msgproc_base64;
+
+#define MESSAGE_MODULE_TEXT 0
+#define MESSAGE_MODULE_HTML 1
+#define MESSAGE_MODULE_MULTIPART 2
+#define MESSAGE_MODULE_QUOTED 3
+#define MESSAGE_MODULE_BASE64 4
+
+static void
+message_module_set(int type, void *data, size_t size)
+{
+ switch(type) {
+ case MESSAGE_MODULE_TEXT: text_module = data; break;
+ case MESSAGE_MODULE_HTML: html_module = data; break;
+ case MESSAGE_MODULE_MULTIPART: multipart_module = data; break;
+ case MESSAGE_MODULE_QUOTED: quoted_module = data; break;
+ case MESSAGE_MODULE_BASE64: base64_module = data; break;
+ }
+}
#define HDRALLOC 512
#define HDRINCR HDRALLOC
struct message_state {
char *header;
int hdrlen, hdralloc;
+ msgproc_module *type, *encoding;
+ char *boundary;
enum {
START = 0,
CR, NL,
free(ms);
ms = 0;
}
+ ms->type = text_module;
}
msgproc_setpriv(m, (void*)ms);
}
}
static int
-parse_cxfer(msgproc *m, char *h, size_t hlen)
+parse_cxfer(struct message_state *ms, char *h, size_t hlen)
{
int match, res = 1;
int ovec[30];
- msgproc *next;
match = pcre_exec(enc_re, 0, h, hlen, 0, 0, ovec, 30);
if (match > 0) {
char *enc;
- pcre_get_substring(h, ovec, match, 1, (const char **)&enc);
+ pcre_get_substring(h, ovec, match, 0, (const char **)&enc);
if (!enc) res = 0;
if (!strcasecmp(enc, "quoted-printable")) {
- next = msgproc_create(m, &msgproc_quoted);
+ ms->encoding = quoted_module;
} else if (!strcasecmp(enc, "base64")) {
- next = msgproc_create(m, &msgproc_base64);
+ ms->encoding = base64_module;
} else {
res = 0;
}
return res;
}
+static inline void
+unfold(char *s) {
+ char *t = s;
+ do {
+ if (*t == '\r') t++;
+ if (*t == '\n') {
+ t++;
+ if (*t && isspace(*t)) t++;
+ }
+ } while ((*s++ = *t++));
+}
+
static int
-parse_ctype(msgproc *m, char *h, size_t hlen)
+parse_ctype(struct message_state *ms, char *h, size_t hlen)
{
int match, res = 1;
int ovec[30];
- msgproc *next, *parent;
-
- next = msgproc_next(m);
- if (next) {
- parent = next;
- } else {
- parent = m;
- }
match = pcre_exec(type_re, 0, h, hlen, 0, 0, ovec, 30);
if (match > 0) {
char *type;
- pcre_get_substring(h, ovec, match, 1, (const char **)&type);
+ pcre_get_substring(h, ovec, match, match - 1, (const char **)&type);
if (!type) res = 0;
if (!strcasecmp(type, "plain")) {
- msgproc_create(parent, text_module);
+ ms->type = text_module;
} else if (!strcasecmp(type, "html")) {
- msgproc_create(parent, html_module);
+ ms->type = html_module;
} else if (!strcasecmp(type, "multipart")) {
- /* XXX -- unlike text/plain and text/html,
- multipart parts cannot be encoded.
- Remove any previously-established
- encoding */
- next = msgproc_next(m);
- if (next) msgproc_finish(next);
-
+ if (ms->encoding) {
+ free(type);
+ return 0;
+ }
+ ms->type = multipart_module;
match = pcre_exec(bound_re, 0, h, hlen, 0, 0, ovec, 30);
if (match > 0) {
char *b;
- pcre_get_substring(h, ovec, match, 1, (const char **)&b);
- next = msgproc_create(m, multipart_module);
- msgproc_start(next);
- msgproc_set(next, 1, b, strlen(b));
- free(h);
- res = 2;
+ pcre_get_substring(h, ovec, match, 1,
+ (const char **)&b);
+ unfold(b);
+ ms->boundary = b;
}
} else {
res = 0;
struct message_state *ms = msgproc_getpriv(m);
char *s, *h;
int res, match, ovec[30];
- msgproc *next = 0;
+ msgproc *enc, *type, *next;
if (!ms) return;
if (!ms->header) return;
break;
case NL:
if (*s == '\r') ms->state = CR2;
- if (*s == '\n') ms->state = BODY;
+ else if (*s == '\n') ms->state = BODY;
+ else ms->state = START;
break;
case CR2:
if (*s == '\n') ms->state = BODY;
ovec, 30);
if (match > 0) {
pcre_get_substring(ms->header, ovec, match, 0, (const char **)&h);
- res = parse_cxfer(m, h, strlen(h));
+ res = parse_cxfer(ms, h, strlen(h));
free(h);
if (!res) return;
}
ovec, 30);
if (match > 0) {
pcre_get_substring(ms->header, ovec, match, 0, (const char **)&h);
- res = parse_ctype(m, h, strlen(h));
+ res = parse_ctype(ms, h, strlen(h));
free(h);
if (!res) return;
- if (res == 2) continue; /* XXX -- avoid a double
- start of multipart downstream,
- which would clobber the
- boundary value. Better fix
- would be to include a state
- in the core msgproc struct
- to prevent double starts. */
}
- next = msgproc_next(m);
- assert(next);
- msgproc_start(next);
+ if (ms->encoding) {
+ enc = msgproc_create(m, ms->encoding);
+ msgproc_start(enc);
+ } else {
+ enc = m;
+ }
+
+ type = msgproc_create(enc, ms->type);
+ if (ms->type->mpm_type == MSGPROC_MULTIPART) {
+ assert(ms->boundary);
+ msgproc_start(type);
+ msgproc_set(type, 1, ms->boundary, strlen(ms->boundary));
+ free(ms->boundary);
+ } else {
+ msgproc_start(type);
+ }
}
}
msgproc_module msgproc_message = {
MSGPROC_MESSAGE, /* type */
message_init, /* module init */
- NULL, /* set module parameter */
+ message_module_set, /* set module parameter */
message_start, /* start module instance */
NULL, /* set module instance parameter */
message_process, /* process data */
msgproc_module msgproc_part = {
MSGPROC_PART, /* type */
message_init,
- NULL, /* set module parameter */
+ message_module_set, /* set module parameter */
message_start, /* start module instance */
NULL, /* set module instance parameter */
message_process, /* process data */