--- /dev/null
+/*
+ * Copyright (c) 2009 Christopher L. Mikkelson <chris@mikk.net>
+ * All Rights Reserved, for now.
+ */
+
+#include <pcre.h>
+#include "re_stream.h"
+
+#include "msgproc.h"
+#include "parser.h"
+
+static const char *html_url_pattern = "<(a href|img src)=\"https?://[^\"]+";
+static pcre *html_url_re;
+
+void
+init_html_parser(void) {
+ const char *etxt; int epos;
+ html_url_re = pcre_compile(html_url_pattern, 0, &etxt, &epos, 0);
+ if (!html_url_re) {
+ /* die */
+ }
+}
+
+struct msgproc_stage *
+start_html_parser(struct msgproc_stage *parent)
+{
+ struct msgproc_stage *m;
+ m = init_mps(parent, process_text, finish_text_parser);
+ if (m) {
+ m->type = PARSE_HTML;
+ m->state = malloc(sizeof(struct stream_re));
+ if (!m->state) {
+ msgproc_free(m);
+ return NULL;
+ }
+ re_stream_start((struct stream_re *)m->state, html_url_re, 0);
+ }
+ return m;
+}
+
+void
+process_text(struct msgproc_stage *m, char *buf, int size)
+{
+ struct stream_re *sr = (struct stream_re *)m->state;
+ int n;
+
+ while (size > 0) {
+ n = re_stream_exec(sr, buf, size);
+ if (re_stream_result(sr) == 1 && m->base->callback) {
+ /* TODO: send more information back to callback... */
+ m->base->callback("html", re_stream_getresult(sr));
+ }
+ size -= n;
+ buf += n;
+ }
+}
+
+void
+finish_html_parser(struct msgproc_stage *m)
+{
+ re_stream_stop((struct stream_re *)m->state);
+ free(m->state);
+ msgproc_free(m);
+}
#include "msgproc.h"
#include "parser.h"
-static inline struct msgproc_stage *
-init_mps(struct msgproc_stage *parent,
- void (*process)(struct msgproc_stage *, char *, int),
- void (*finish)(struct msgproc_stage *))
-{
- struct msgproc_stage *child = mps_alloc(parent);
- if (child) {
- if (parent) child->base = parent->base;
- child->process = process;
- child->finish = finish;
- }
- return child;
-}
-struct msgproc_stage *
-start_text_parser(struct msgproc_stage *parent)
-{
- return init_mps(parent, process_text, finish_text);
-}
-
-struct msgproc_stage *
-start_html_parser(struct msgproc_stage *parent)
-{
- return init_mps(parent, process_html, finish_html);
-}
struct msgproc_stage *
start_base64_decoder(struct msgproc_stage *parent)
{
- return init_mps(parent, process_base64, finish_base64);
+ return init_mps(parent, process_base64, finish_base64_decoder);
}
struct msgproc_stage *
start_quoted_decoder(struct msgproc_stage *parent)
{
- return init_mps(parent, process_quoted, finish_quoted);
+ return init_mps(parent, process_quoted, finish_quoted_decoder);
}
struct msgproc_stage *
start_multipart_parser(struct msgproc_stage *parent)
{
- return init_mps(parent, process_multipart, finish_multipart);
+ return init_mps(parent, process_multipart, finish_multipart_parser);
}
struct msgproc_stage *
start_rfc822_parser(struct msgproc_stage *parent)
{
- struct msgproc_stage *m = init_mps(parent, process_chunk, finish_chunk);
+ struct msgproc_stage *m;
+ m = init_mps(parent, process_chunk, finish_chunk_parser);
/* TBD: use the integer types */
set_contenttype(m, "text/plain");
set_encoding(m, "7bit");
struct msgproc_stage *
start_chunk_parser(struct msgproc_stage *parent)
{
- return init_mps(parent, process_chunk, finish_chunk);
+ return init_mps(parent, process_chunk, finish_chunk_parser);
}
/* these should probably be static inlines in .c file */
-void set_boundary(struct msgproc_stage *);
-void set_contenttype(struct msgproc_stage *);
-void set_encoding(struct msgproc_stage *);
-void set_disposition(struct msgproc_stage *);
+void set_boundary(struct msgproc_stage *, char *);
+void set_contenttype(struct msgproc_stage *, char *);
+void set_encoding(struct msgproc_stage *, char *);
+void set_disposition(struct msgproc_stage *, char *);
-void process_text(struct msgproc_stage *, char *, int);
-void process_html(struct msgproc_stage *, char *, int);
void process_base64(struct msgproc_stage *, char *, int);
void process_quoted(struct msgproc_stage *, char *, int);
void process_multipart(struct msgproc_stage *, char *, int);
has defaults for content-type (text/plain) and encoding (7bit) */
void process_message_chunk(struct msgproc_stage *, char *, int);
-struct msgproc_stage *finish_text_parser(struct msgproc_stage*);
-struct msgproc_stage *finish_html_parser(struct msgproc_stage*);
-struct msgproc_stage *finish_base64_decoder(struct msgproc_stage*);
-struct msgproc_stage *finish_quoted_decoder(struct msgproc_stage*);
-struct msgproc_stage *finish_multipart_parser(struct msgproc_stage*);
-struct msgproc_stage *finish_rfc822_parser(struct msgproc_stage*);
+void
+finish_base64_decoder(struct msgproc_stage*);
+void
+finish_quoted_decoder(struct msgproc_stage*);
+void
+finish_multipart_parser(struct msgproc_stage*);
+void
+finish_rfc822_parser(struct msgproc_stage*);
struct msgproc_stage *start_rfc822_parser(struct msgproc_stage*);
/* these should probably be static inlines in .c file */
-void set_boundary(struct msgproc_stage *);
-void set_contenttype(struct msgproc_stage *);
-void set_encoding(struct msgproc_stage *);
-void set_disposition(struct msgproc_stage *);
+void set_boundary(struct msgproc_stage *, char *);
+void set_contenttype(struct msgproc_stage *, char *);
+void set_encoding(struct msgproc_stage *, char *);
+void set_disposition(struct msgproc_stage *, char *);
void process_text(struct msgproc_stage *, char *, int);
void process_html(struct msgproc_stage *, char *, int);
void process_multipart(struct msgproc_stage *, char *, int);
/* used for rfc822 complete messages and MIME parts; former
has defaults for content-type (text/plain) and encoding (7bit) */
-void process_message_chunk(struct msgproc_stage *, char *, int);
+void process_chunk(struct msgproc_stage *, char *, int);
-struct msgproc_stage *finish_text_parser(struct msgproc_stage*);
-struct msgproc_stage *finish_html_parser(struct msgproc_stage*);
-struct msgproc_stage *finish_base64_decoder(struct msgproc_stage*);
-struct msgproc_stage *finish_quoted_decoder(struct msgproc_stage*);
-struct msgproc_stage *finish_multipart_parser(struct msgproc_stage*);
-struct msgproc_stage *finish_rfc822_parser(struct msgproc_stage*);
+void finish_text_parser(struct msgproc_stage *);
+void finish_html_parser(struct msgproc_stage *);
+void finish_base64_decoder(struct msgproc_stage *);
+void finish_quoted_decoder(struct msgproc_stage *);
+void finish_multipart_parser(struct msgproc_stage *);
+void finish_chunk_parser(struct msgproc_stage *);
+
+
+static inline struct msgproc_stage *
+init_mps(struct msgproc_stage *parent,
+ void (*process)(struct msgproc_stage *, char *, int),
+ void (*finish)(struct msgproc_stage *))
+{
+ struct msgproc_stage *child = mps_alloc(parent);
+ if (child) {
+ if (parent) child->base = parent->base;
+ child->process = process;
+ child->finish = finish;
+ }
+ return child;
+}
--- /dev/null
+/*
+ * Copyright (c) 2009 Christopher L. Mikkelson <chris@mikk.net>
+ * All Rights Reserved, for now.
+ */
+
+#include <pcre.h>
+#include "re_stream.h"
+
+#include "msgproc.h"
+#include "parser.h"
+
+static const char *text_url_pattern = "https?://\\S+";
+static pcre *text_url_re;
+
+void
+init_text_parser(void) {
+ const char *etxt; int epos;
+ text_url_re = pcre_compile(text_url_pattern, 0, &etxt, &epos, 0);
+ if (!text_url_re) {
+ /* die */
+ }
+}
+
+struct msgproc_stage *
+start_text_parser(struct msgproc_stage *parent)
+{
+ struct msgproc_stage *m;
+ m = init_mps(parent, process_text, finish_text_parser);
+ if (m) {
+ m->type = PARSE_TEXT;
+ m->state = malloc(sizeof(struct stream_re));
+ if (!m->state) {
+ msgproc_free(m);
+ return NULL;
+ }
+ re_stream_start((struct stream_re *)m->state, text_url_re, 0);
+ }
+ return m;
+}
+
+void
+process_text(struct msgproc_stage *m, char *buf, int size)
+{
+ struct stream_re *sr = (struct stream_re *)m->state;
+ int n;
+
+ while (size > 0) {
+ n = re_stream_exec(sr, buf, size);
+ if (re_stream_result(sr) == 1) {
+ /* TODO: send more information back to callback... */
+ if (m->base->callback)
+ m->base->callback("text",
+ re_stream_getresult(sr));
+ }
+ size -= n;
+ buf += n;
+ }
+}
+
+void
+finish_text_parser(struct msgproc_stage *m)
+{
+ re_stream_stop((struct stream_re *)m->state);
+ free(m->state);
+ msgproc_free(m);
+}