]> git.mikk.net Git - liburl/commitdiff
Preliminary implementation of message/rfc822 and multipart part processor.
authorchris mikkelson <chris@mikk.net>
Wed, 10 Mar 2010 04:14:04 +0000 (22:14 -0600)
committerchris mikkelson <chris@mikk.net>
Thu, 11 Mar 2010 04:41:09 +0000 (22:41 -0600)
Makefile
message.c [new file with mode: 0644]

index 5af3b51d13e18ad60f18e99405ae84678303c972..fa994518b1f0f571d6c772f98af9d75afe9f0584 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -3,8 +3,10 @@ CFLAGS=-g -Wall -Werror -I/usr/local/include
 .c.o:
        $(CC) $(CFLAGS) -c $>
 
-SRCS=msgproc.c base64.c quoted-printable.c re_stream.c html.c text.c multipart.c
-OBJS=msgproc.o base64.o quoted-printable.o re_stream.o html.o text.o multipart.o
+SRCS=msgproc.c base64.c quoted-printable.c re_stream.c html.c text.c \
+       multipart.c message.c
+OBJS=msgproc.o base64.o quoted-printable.o re_stream.o html.o text.o \
+       multipart.o message.o
 
 default: liburl.a
 
diff --git a/message.c b/message.c
new file mode 100644 (file)
index 0000000..be32fec
--- /dev/null
+++ b/message.c
@@ -0,0 +1,271 @@
+/* 
+ * Copyright (c) 2009  Christopher L. Mikkelson <chris@mikk.net>
+ * All Rights Reserved, for now.
+ */
+
+#include <string.h>
+#include <assert.h>
+#include <pcre.h>
+#include "msgproc.h"
+
+static const char *ctype_pat = "^Content-Type:.*?(^\\S|\\Z)";
+static pcre *ctype_re;
+static const char *cxfer_pat = "^Content-Tranfer-Encoding:.*?(^\\S|\\Z)";
+static pcre *cxfer_re;
+static const char *type_pat = "text/(plain|html)|(multipart)/";
+static pcre *type_re;
+static const char *enc_pat = "7bit|8bit|base64|quoted-printable";
+static pcre *enc_re;
+static const char *bound_pat = "boundary\\s*=\\s*\"?([\\da-z'()+,-./:=? \r\n])\"?";
+static pcre *bound_re;
+
+static void
+message_init(void)
+{
+       static int initialized = 0;
+       const char *etxt;
+       int epos;
+       int options = PCRE_MULTILINE | PCRE_DOTALL | PCRE_CASELESS;
+
+       if (initialized) return;
+       initialized = 1;
+
+       ctype_re = pcre_compile(ctype_pat, options, &etxt, &epos, 0); 
+       cxfer_re = pcre_compile(cxfer_pat, options, &etxt, &epos, 0); 
+       options = PCRE_CASELESS;
+       type_re = pcre_compile(type_pat, options, &etxt, &epos, 0);
+       enc_re = pcre_compile(enc_pat, options, &etxt, &epos, 0);
+       bound_re = pcre_compile(bound_pat, options, &etxt, &epos, 0);
+}
+
+static msgproc_module *text_module = &msgproc_text,
+               *html_module = &msgproc_html,
+               *multipart_module = &msgproc_multipart;
+
+#define HDRALLOC 512
+#define HDRINCR HDRALLOC
+
+struct message_state {
+       char *header;
+       int hdrlen, hdralloc;
+       enum {
+               START = 0,
+               CR, NL,
+               CR2,
+               BODY
+       } state;
+};
+
+static void
+message_start(msgproc *m)
+{
+       struct message_state *ms = malloc(sizeof(struct message_state));
+       if (ms) {
+               bzero(ms, sizeof(*ms));
+               ms->header = malloc(HDRALLOC);
+               if (!ms->header) {
+                       free(ms);
+                       ms = 0;
+               }
+       }
+       msgproc_setpriv(m, (void*)ms);
+}
+
+static void
+message_finish(msgproc *m)
+{
+       struct message_state *ms = msgproc_getpriv(m);
+       if (ms) {
+               if (ms->header) free(ms->header);
+               free(ms);
+       }
+       msgproc_free(m);
+}
+
+static inline char *
+append_hdr(char c, struct message_state *ms)
+{
+       if (!ms->header) return ms->header;
+       while (ms->header && ms->hdrlen >= ms->hdralloc) {
+               ms->hdralloc += HDRINCR;
+               ms->header = reallocf(ms->header, ms->hdralloc);
+       }
+       if (ms->header) {
+               ms->header[ms->hdrlen++] = c;
+       }
+       return ms->header;
+}
+
+static int
+parse_cxfer(msgproc *m, char *h, size_t hlen)
+{
+       int match, res = 1;
+       int ovec[30];
+       msgproc *next;
+
+       match = pcre_exec(enc_re, 0, h, hlen, 0, 0, ovec, 30);
+       if (match > 0) {
+               char *enc;
+               pcre_get_substring(h, ovec, match, 1, (const char **)&enc);
+               if (!enc) res = 0;
+               if (!strcasecmp(enc, "quoted-printable")) {
+                       next = msgproc_create(m, &msgproc_quoted);
+               } else if (!strcasecmp(enc, "base64")) {
+                       next = msgproc_create(m, &msgproc_base64);
+               } else {
+                       res = 0;
+               }
+               free(enc);
+       } else {
+               res = 0;
+       }
+       return res;
+}
+
+static int
+parse_ctype(msgproc *m, char *h, size_t hlen)
+{
+       int match, res = 1;
+       int ovec[30];
+       msgproc *next, *parent;
+
+       next = msgproc_next(m);
+       if (next) {
+               parent = next;
+       } else {
+               parent = m;
+       }
+
+       match = pcre_exec(type_re, 0, h, hlen, 0, 0, ovec, 30);
+       if (match > 0) {
+               char *type;
+               pcre_get_substring(h, ovec, match, 1, (const char **)&type);
+               if (!type) res = 0;
+               if (!strcasecmp(type, "plain")) {
+                       msgproc_create(parent, text_module);
+               } else if (!strcasecmp(type, "html")) {
+                       msgproc_create(parent, html_module);
+               } else if (!strcasecmp(type, "multipart")) {
+                       /* XXX -- unlike text/plain and text/html,
+                               multipart parts cannot be encoded.
+                               Remove any previously-established
+                               encoding */
+                       next = msgproc_next(m);
+                       if (next) msgproc_finish(next);
+
+                       match = pcre_exec(bound_re, 0, h, hlen, 0, 0, ovec, 30);
+                       if (match > 0) {
+                               char *b;
+                               pcre_get_substring(h, ovec, match, 1, (const char **)&b);
+                               next = msgproc_create(m, multipart_module);
+                               msgproc_start(next);
+                               msgproc_set(next, 1, b, strlen(b));
+                               free(h);
+                               res = 2;
+                       }
+               } else {
+                       res = 0;
+               }
+               free(type);
+       } else {
+               res = 0;
+       }
+       return res;
+}
+
+static void
+message_process(msgproc *m, char *buf, size_t len)
+{
+       struct message_state *ms = msgproc_getpriv(m);
+       char *s, *h;
+       int res, match, ovec[30];
+       msgproc *next = 0;
+
+       if (!ms) return;
+       if (!ms->header) return;
+
+       for (s = buf; len > 0; s++, len--) {
+               if (ms->state == BODY) {
+                       next = msgproc_next(m);
+                       if (next)
+                               msgproc_process(next, s, len);
+                       return;
+               } 
+               append_hdr(*s, ms);
+               switch(ms->state) {
+               case START:
+                       if (*s == '\r') ms->state = CR;
+                       if (*s == '\n') ms->state = NL;
+                       break;
+               case CR:
+                       if (*s == '\n') ms->state = NL;
+                       else ms->state = START;
+                       break;
+               case NL:
+                       if (*s == '\r') ms->state = CR2;
+                       if (*s == '\n') ms->state = BODY;
+                       break;
+               case CR2:
+                       if (*s == '\n') ms->state = BODY;
+                       else ms->state = START;
+                       break;
+               case BODY:
+                       break;
+               }
+               if (ms->state != BODY) continue;
+
+               /* parse saved headers, look for non-default Content-Type
+                  and Content-Transfer-Encoding values. */
+
+               match = pcre_exec(cxfer_re, 0, ms->header, ms->hdrlen, 0, 0,
+                                       ovec, 30);
+               if (match > 0) {
+                       pcre_get_substring(ms->header, ovec, match, 0, (const char **)&h);
+                       res = parse_cxfer(m, h, strlen(h));
+                       free(h);
+                       if (!res) return;
+               }
+
+               match = pcre_exec(ctype_re, 0, ms->header, ms->hdrlen, 0, 0,
+                                       ovec, 30);
+               if (match > 0) {
+                       pcre_get_substring(ms->header, ovec, match, 0, (const char **)&h);
+                       res = parse_ctype(m, h, strlen(h));
+                       free(h);
+                       if (!res) return;
+                       if (res == 2) continue;  /* XXX -- avoid a double
+                                               start of multipart downstream,
+                                               which would clobber the
+                                               boundary value.  Better fix
+                                               would be to include a state
+                                               in the core msgproc struct
+                                               to prevent double starts. */
+               }
+
+               next = msgproc_next(m);
+               assert(next);
+               msgproc_start(next);
+       }
+}
+
+msgproc_module msgproc_message = {
+       MSGPROC_MESSAGE,                /* type */
+       message_init,                   /* module init */
+       NULL,                           /* set module parameter */
+       message_start,                  /* start module instance */
+       NULL,                           /* set module instance parameter */
+       message_process,                /* process data */
+       message_finish,                 /* shut down, free module instance */
+       NULL                            /* shut down, free module */
+};
+
+msgproc_module msgproc_part = {
+       MSGPROC_PART,                   /* type */
+       message_init,
+       NULL,                           /* set module parameter */
+       message_start,                  /* start module instance */
+       NULL,                           /* set module instance parameter */
+       message_process,                /* process data */
+       message_finish,                 /* shut down, free module instance */
+       NULL                            /* shut down, free module */
+};