From 1f320a8c29fe8aa0236d868a659add037c0bb236 Mon Sep 17 00:00:00 2001 From: chris mikkelson Date: Mon, 26 Jan 2009 22:49:54 -0600 Subject: [PATCH] Implemented new module (msgproc_module) and module instance (msgproc) interface. This allows each processing module to consume/advertise only one global symbol, as opposed to 4-5 previously. The HTML, text, and base64 modules have been moved from the old interface to the new. Base64 has furthermore become self-contained. When the quoted-printable code is module-ified, the decoders.h file can go away. parser.h and parser.c were removed as they were specific to the old interface, used to collect the growing number of public symbols for each module. --- base64.c | 64 ++++++++++++++++++++++++--- decoders.h | 12 ----- html.c | 57 ++++++++++++------------ msgproc.c | 126 ++++++++++++++++++++++++++++++++--------------------- msgproc.h | 60 +++++++++++++++---------- parser.c | 66 ---------------------------- parser.h | 55 ----------------------- text.c | 58 ++++++++++++------------ 8 files changed, 233 insertions(+), 265 deletions(-) delete mode 100644 parser.c delete mode 100644 parser.h diff --git a/base64.c b/base64.c index 752ae19..96a85bb 100644 --- a/base64.c +++ b/base64.c @@ -6,13 +6,13 @@ /* Adapted from phk@freebsd.dk's public domain implementation. */ #include -#include "decoders.h" +#include "msgproc.h" static const char *b64c = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; static char i64[256]; -void +static void base64_init(void) { int i; @@ -24,20 +24,25 @@ base64_init(void) i64['='] = 0; } -struct b64_state * +struct b64_state { + unsigned u; + int v; +}; + +static struct b64_state * b64_start (struct b64_state *b64s) { bzero(b64s, sizeof(*b64s)); return b64s; } -void +static void b64_stop (struct b64_state *b64s) { bzero(b64s, sizeof(*b64s)); } -int +static int b64_decode(struct b64_state *b64s, char *s, int len, char *out, int size, int *outlen) { @@ -101,3 +106,52 @@ int main(void) { return 0; } #endif + +static void +base64_start(msgproc *m) +{ + struct b64_state *b64s = malloc(sizeof(b64s)); + base64_start(b64s); + msgproc_setpriv(m, (void *)b64s); +} + +static void +base64_process(msgproc *m, char *buf, int len) +{ + char tmp[2048]; + int tmplen, ret; + struct b64_state *b64s = (struct b64_state *)msgproc_getpriv(m); + msgproc *next = msgproc_next(m); + + while (len > 0) { + ret = b64_decode(b64s, buf, len, tmp, sizeof(tmp), &tmplen); + if (ret < 1) { + /* TODO: handle errors in *b64s; skip all + text after error, or recover? */ + } + msgproc_process(next, tmp, tmplen); + buf += ret; len -= ret; + } +} + +static void +base64_finish(struct msgproc_stage *m) +{ + struct b64_state *b64s = msgproc_getpriv(m); + if (b64s) { + b64_stop(b64s); + free(b64s); + } + msgproc_free(m); +} + +msgproc_module msgproc_base64 = { + MSGPROC_BASE64, /* type */ + base64_init, /* module init */ + NULL, /* set module parameter */ + base64_start, /* start module instance */ + NULL, /* set module instance parameter */ + base64_process, /* process data */ + base64_finish, /* shut down, free module instance */ + NULL /* shut down, free module */ +}; diff --git a/decoders.h b/decoders.h index 2835652..16b6cf3 100644 --- a/decoders.h +++ b/decoders.h @@ -3,18 +3,6 @@ * All Rights Reserved, for now. */ -/* Base64 decoder */ -void base64_init(void); - -struct b64_state { - unsigned u; - int v; -}; - -struct b64_state *b64_start(struct b64_state *); -void b64_end(struct b64_state *); -int b64_decode(struct b64_state *, char *, int, char *, int, int *); - /* Quoted-printable decoder */ struct qp_state { diff --git a/html.c b/html.c index 58e2512..ca4a911 100644 --- a/html.c +++ b/html.c @@ -7,13 +7,12 @@ #include "re_stream.h" #include "msgproc.h" -#include "parser.h" static const char *html_url_pattern = "<(a href|img src)=\"https?://[^\"]+"; static pcre *html_url_re; -void -init_html_parser(void) { +static void +html_init(void) { const char *etxt; int epos; html_url_re = pcre_compile(html_url_pattern, 0, &etxt, &epos, 0); if (!html_url_re) { @@ -21,44 +20,48 @@ init_html_parser(void) { } } -struct msgproc_stage * -start_html_parser(struct msgproc_stage *parent) +static void +html_start(msgproc *m) { - struct msgproc_stage *m; - m = init_mps(parent, process_text, finish_text_parser); - if (m) { - m->type = PARSE_HTML; - m->state = malloc(sizeof(struct stream_re)); - if (!m->state) { - msgproc_free(m); - return NULL; - } - re_stream_start((struct stream_re *)m->state, html_url_re, 0); - } - return m; + struct stream_re *s = malloc(sizeof(struct stream_re)); + re_stream_start(s, html_url_re, 0); + msgproc_setpriv(m, (void *)s); } -void -process_text(struct msgproc_stage *m, char *buf, int size) +static void +html_process(msgproc *m, char *buf, size_t size) { - struct stream_re *sr = (struct stream_re *)m->state; + struct stream_re *sr = (struct stream_re *)msgproc_getpriv(m); int n; while (size > 0) { n = re_stream_exec(sr, buf, size); - if (re_stream_result(sr) == 1 && m->base->callback) { - /* TODO: send more information back to callback... */ - m->base->callback("html", re_stream_getresult(sr)); + if (re_stream_result(sr) == 1) { + /* TODO: callback (module param) or next stage */ } size -= n; buf += n; } } -void -finish_html_parser(struct msgproc_stage *m) +static void +html_finish(msgproc *m) { - re_stream_stop((struct stream_re *)m->state); - free(m->state); + struct stream_re *sr = (struct stream_re *)msgproc_getpriv(m); + if (sr) { + re_stream_stop(sr); + free(sr); + } msgproc_free(m); } + +msgproc_module msgproc_html = { + MSGPROC_HTML, /* type */ + html_init, /* module init */ + NULL, /* set module parameter */ + html_start, /* start module instance */ + NULL, /* set module instance parameter */ + html_process, /* process data */ + html_finish, /* shut down, free module instance */ + NULL /* shut down, free module */ +}; diff --git a/msgproc.c b/msgproc.c index 1c84cce..8457a85 100644 --- a/msgproc.c +++ b/msgproc.c @@ -5,70 +5,98 @@ #include "msgproc.h" -struct msgproc_stage * -mps_alloc(struct msgproc_stage *parent) +msgproc * +msgproc_create(msgproc *parent, msgproc_module *mod) { - struct msgproc_stage *child = malloc(sizeof(struct msgproc_stage)); - if (parent) parent->next = child; - if (child) bzero(child, sizeof(*child)); - return child; + msgproc *m = malloc(sizeof(msgproc)); + if (m) { + m->mp_priv = NULL; + m->mp_mod = mod; + m->mp_prev = parent; + m->mp_next = NULL; + if (parent) parent->next = m; + } + return m; } -/* - void finish_type(struct msgproc_stage *m) - { - ...clean up - free(m->state); - mps_free(m); +void +msgproc_free(msgproc *m) +{ + if (m) { + if (m->mp_prev) m->mp_prev->next = NULL; + free(m); } -*/ +} void -mps_free(struct msgproc_stage *m) +msgproc_setpriv(msgproc *m, void *data) { - mps_finish_next(m); - if (m->prev) m->prev->next = 0; - free(m); + if (m) m->mp_priv = data; } -/* - void process_decoder_type(struct msgproc_stage *m, char *buf, int len) - { - char tmp[BUFSIZ]; int tmplen; - ... process buf/len into tmp/tmplen - mps_process_next(m,tmp,tmplen); - } +void * +msgproc_getpriv(msgproc *m) +{ + if (m) return m->mp_priv; + return NULL; +} - void process_multipart(struct msgproc_tage *m, char *buf, int len) { - char tmp[BUFSIZ]; int tmplen; - /* note; this will automatically skip over plain - text prior to boundary */ - do { - while (!boundary && remaining input) { - copy stuff into tmp - mps_process_next(m, tmp, tmplen); - } - if (boundary) - mps_finish_next(m); - tmplen = 0; - start_multipart_chunk(m); - } - } while input remains - } -*/ +msgproc * +msgproc_next(msgproc *m) +{ + if (m) return m->mp_next; + return NULL; +} + +msgproc * +msgproc_prev(msgproc *m) +{ + if (m) return m->mp_prev; + return NULL; +} void -mps_process_next(struct msgproc_stage *m, char *buf, int len) +msgproc_module_init(msgproc_module *m) { - if (m->next && m->next->process) { - m->next->process(m,buf,len); - } + if (m) m->mpm_init(); } void -mps_finish_next(struct msgproc_stage *m, char *buf, int len) +msgproc_module_set(msgproc_module *m, void *data, size_t dsiz) { - if (m->next && m->next->finish) { - m->next->finish(m); - } + if (m) m->mpm_set(data, dsiz); +} + +void +msgproc_start(msgproc *m) +{ + if (m && m->mp_mod && m->mp_mod->mp_start) + m->mp_mod->mp_start(m); +} + +void +msgproc_set(msgproc *m, int type, void *data, size_t size) +{ + if (m && m->mp_mod && m->mp_mod->mp_set) + m->mp_mod->mp_set(m, type, data, size); +} + +void +msgproc_process(msgproc *m, char *data, size_t size) +{ + if (m && m->mp_mod && m->mp_mod->mp_process) + m->mp_mod->mp_process(m, data, size); +} + +void +msgproc_finish(msgproc *m) +{ + if (m && m->mp_mod && m->mp_mod->mp_finish) + m->mp_mod->mp_finish(m); +} + +void +msgproc_module_shutdown(msgproc_module *m) +{ + if (m) m->mpm_shutdown(); } diff --git a/msgproc.h b/msgproc.h index 40a10a1..9a76f72 100644 --- a/msgproc.h +++ b/msgproc.h @@ -3,31 +3,45 @@ * All Rights Reserved, for now. */ -struct msgproc_stage; +type struct _msgproc_s msgproc; -struct msgproc_base { - void (*callback)(char *, char *); /* URL type, URL text */ - void *call_data; - struct msgproc_stage *msgproc; -}; +typedef struct { + int mpm_type; + void (*mpm_init)(void); + void (*mpm_set)(int, void *, size_t); + void (*mp_start)(msgproc *); + void (*mp_set)(msgproc *, int, void *, size_t); + void (*mp_process)(msgproc *, char *, size_t); + void (*mp_finish)(msgproc *); + void (*mpm_shutdown)(void); +} msgproc_module; -struct msgproc_stage { - int type; - void *state; - void (*process)(struct msgproc_stage *, char *, int); - void (*finish)(struct msgproc_stage *); /* finish frees its argument */ - struct msgproc_base *base; - struct msgproc_stage *prev, *next; +struct _msgproc_s { + void *mp_priv; + msgproc_module mp_mod; + msgproc *mp_prev, *mp_next; }; -/* main interface to the user. could wrap _start further... */ -struct msgproc_base *msgproc_init(void); -void msgproc_start(struct msgproc_base *, struct msgproc_stage *); -void msgproc_process(struct msgproc_base *, char *, int); -void msgproc_finish(struct msgproc_base *); -/* */ +msgproc *msgproc_create(msgproc *, msgproc_module *); +void msgproc_free(msgproc *); + +void msgproc_setpriv(msgproc *, void *); +void *msgproc_getpriv(msgproc *); +msgproc *msgproc_next(msgproc *); +msgproc *msgproc_prev(msgproc *); + +void msgproc_module_init(mgproc_module *); +void msgproc_module_set(msgproc_module *, int, void *, size_t); +void msgproc_start(msgproc *); +void msgproc_set(msgproc*, int, void *, size_t); +void msgproc_process(msgproc *, char *, size_t); +void msgproc_finish(msgproc *); +void msgproc_module_shutdown(mgproc_module *); -struct msgproc_stage *mps_alloc(struct msgproc_stage *); -void mps_free(struct msgproc_stage *); -void mps_process_next(struct msgproc_stage *, char *, int); -void mps_finish_next(struct msgproc_stage *); +extern msgproc_module msgproc_text, + msgproc_html, + msgproc_base64, + msgproc_quoted, + msgproc_multipart, + msgproc_part, + msgproc_message; diff --git a/parser.c b/parser.c deleted file mode 100644 index df6f451..0000000 --- a/parser.c +++ /dev/null @@ -1,66 +0,0 @@ -/* - * Copyright (c) 2009 Christopher L. Mikkelson - * All Rights Reserved, for now. - */ - -#include "msgproc.h" -#include "parser.h" - - - -struct msgproc_stage * -start_base64_decoder(struct msgproc_stage *parent) -{ - return init_mps(parent, process_base64, finish_base64_decoder); -} - -struct msgproc_stage * -start_quoted_decoder(struct msgproc_stage *parent) -{ - return init_mps(parent, process_quoted, finish_quoted_decoder); -} - -struct msgproc_stage * -start_multipart_parser(struct msgproc_stage *parent) -{ - return init_mps(parent, process_multipart, finish_multipart_parser); -} - -struct msgproc_stage * -start_rfc822_parser(struct msgproc_stage *parent) -{ - struct msgproc_stage *m; - m = init_mps(parent, process_chunk, finish_chunk_parser); - /* TBD: use the integer types */ - set_contenttype(m, "text/plain"); - set_encoding(m, "7bit"); - return m; -} - -struct msgproc_stage * -start_chunk_parser(struct msgproc_stage *parent) -{ - return init_mps(parent, process_chunk, finish_chunk_parser); -} - -/* these should probably be static inlines in .c file */ -void set_boundary(struct msgproc_stage *, char *); -void set_contenttype(struct msgproc_stage *, char *); -void set_encoding(struct msgproc_stage *, char *); -void set_disposition(struct msgproc_stage *, char *); - -void process_base64(struct msgproc_stage *, char *, int); -void process_quoted(struct msgproc_stage *, char *, int); -void process_multipart(struct msgproc_stage *, char *, int); -/* used for rfc822 complete messages and MIME parts; former - has defaults for content-type (text/plain) and encoding (7bit) */ -void process_message_chunk(struct msgproc_stage *, char *, int); - -void -finish_base64_decoder(struct msgproc_stage*); -void -finish_quoted_decoder(struct msgproc_stage*); -void -finish_multipart_parser(struct msgproc_stage*); -void -finish_rfc822_parser(struct msgproc_stage*); diff --git a/parser.h b/parser.h deleted file mode 100644 index c1dca60..0000000 --- a/parser.h +++ /dev/null @@ -1,55 +0,0 @@ -/* - * Copyright (c) 2009 Christopher L. Mikkelson - * All Rights Reserved, for now. - */ - -#define PARSE_TEXT 0 -#define PARSE_HTML 1 -#define DECODE_BASE64 2 -#define DECODE_QUOTED 3 -#define PARSE_MULTIPART 4 -#define PARSE_RFC822 5 - -struct msgproc_stage *start_text_parser(struct msgproc_stage *); -struct msgproc_stage *start_html_parser(struct msgproc_stage*); -struct msgproc_stage *start_base64_decoder(struct msgproc_stage*); -struct msgproc_stage *start_quoted_decoder(struct msgproc_stage*); -struct msgproc_stage *start_multipart_parser(struct msgproc_stage*); -struct msgproc_stage *start_rfc822_parser(struct msgproc_stage*); - -/* these should probably be static inlines in .c file */ -void set_boundary(struct msgproc_stage *, char *); -void set_contenttype(struct msgproc_stage *, char *); -void set_encoding(struct msgproc_stage *, char *); -void set_disposition(struct msgproc_stage *, char *); - -void process_text(struct msgproc_stage *, char *, int); -void process_html(struct msgproc_stage *, char *, int); -void process_base64(struct msgproc_stage *, char *, int); -void process_quoted(struct msgproc_stage *, char *, int); -void process_multipart(struct msgproc_stage *, char *, int); -/* used for rfc822 complete messages and MIME parts; former - has defaults for content-type (text/plain) and encoding (7bit) */ -void process_chunk(struct msgproc_stage *, char *, int); - -void finish_text_parser(struct msgproc_stage *); -void finish_html_parser(struct msgproc_stage *); -void finish_base64_decoder(struct msgproc_stage *); -void finish_quoted_decoder(struct msgproc_stage *); -void finish_multipart_parser(struct msgproc_stage *); -void finish_chunk_parser(struct msgproc_stage *); - - -static inline struct msgproc_stage * -init_mps(struct msgproc_stage *parent, - void (*process)(struct msgproc_stage *, char *, int), - void (*finish)(struct msgproc_stage *)) -{ - struct msgproc_stage *child = mps_alloc(parent); - if (child) { - if (parent) child->base = parent->base; - child->process = process; - child->finish = finish; - } - return child; -} diff --git a/text.c b/text.c index 22eb12d..a61bd26 100644 --- a/text.c +++ b/text.c @@ -7,13 +7,12 @@ #include "re_stream.h" #include "msgproc.h" -#include "parser.h" static const char *text_url_pattern = "https?://\\S+"; static pcre *text_url_re; -void -init_text_parser(void) { +static void +text_init(void) { const char *etxt; int epos; text_url_re = pcre_compile(text_url_pattern, 0, &etxt, &epos, 0); if (!text_url_re) { @@ -21,46 +20,49 @@ init_text_parser(void) { } } -struct msgproc_stage * -start_text_parser(struct msgproc_stage *parent) +static void +text_start(msgproc *m) { - struct msgproc_stage *m; - m = init_mps(parent, process_text, finish_text_parser); - if (m) { - m->type = PARSE_TEXT; - m->state = malloc(sizeof(struct stream_re)); - if (!m->state) { - msgproc_free(m); - return NULL; - } - re_stream_start((struct stream_re *)m->state, text_url_re, 0); - } - return m; + struct stream_re *s = malloc(sizeof(struct stream_re)); + re_stream_start(s, text_url_re, 0); + msgproc_setpriv(m, (void *)s); } -void -process_text(struct msgproc_stage *m, char *buf, int size) +static void +text_process(msgproc *m, char *buf, size_t size) { - struct stream_re *sr = (struct stream_re *)m->state; + struct stream_re *sr = (struct stream_re *)msgproc_getpriv(m); int n; while (size > 0) { n = re_stream_exec(sr, buf, size); if (re_stream_result(sr) == 1) { - /* TODO: send more information back to callback... */ - if (m->base->callback) - m->base->callback("text", - re_stream_getresult(sr)); + /* TODO: callback (supplied by set module param?) + or next stage */ } size -= n; buf += n; } } -void -finish_text_parser(struct msgproc_stage *m) +static void +text_finish(msgproc *m) { - re_stream_stop((struct stream_re *)m->state); - free(m->state); + void *priv = msgproc_getpriv(m); + if (priv) { + re_stream_stop((struct stream_re *)priv); + free(priv); + } msgproc_free(m); } + +msgproc_module msgproc_text = { + MSGPROC_TEXT, /* type */ + text_init, /* module init */ + NULL, /* set module parameter */ + text_start, /* start module instance */ + NULL, /* set module instance parameter */ + text_process, /* process data */ + text_finish, /* shut down, free module instance */ + NULL /* shut down, free module */ +}; -- 2.50.1