* All Rights Reserved, for now.
*/
+#include <stdio.h> /* XXX -- debugging */
#include <stdlib.h>
#include <string.h>
+#include <pcre.h>
+#include "re_stream.h"
#include "msgproc.h"
/* needs to be published for other callers */
static msgproc_module *nextmod;
struct multipart_state {
- char *boundary;
- int blen;
- char *line;
- int l;
+ struct stream_re *boundre;
+ pcre *bre;
int state;
};
static void
setboundary(msgproc *m, int type, void *data, size_t size)
{
+ char *boundpat = 0;
+ char *b, *s;
+ const char *etxt;
+ int epos;
+ struct stream_re *sre = 0;
struct multipart_state *mps = msgproc_getpriv(m);
+
if (type != MULTIPART_BOUNDARY) return;
- if (mps) {
- mps->boundary = malloc(size + 2);
- if (!mps->boundary) return;
- mps->boundary[0] = mps->boundary[1] = '-';
- memcpy(mps->boundary + 2, data, size);
- mps->blen = size + 2;
+ if (!mps) return;
+
+ boundpat = malloc(2*size + strlen("^--(--)?$") + 1);
+ if (!boundpat) goto fail;
+
+ strcpy(boundpat, "^--");
+ s = boundpat + strlen("^--");
+ /* escape PCRE pattern metacharacters in boundary */
+ for (b = (char *)data; b < (char *)data + size; b++) {
+ switch(*b) {
+ case '(':
+ case ')':
+ case '?':
+ case '.':
+ case '+': /* pattern metacharacters allowed by RFC 1341 */
+ case '\\':
+ *s++ = '\\';
+ /* FALLTHROUGH */
+ default:
+ *s++ = *b;
+ }
}
+ strcpy(s, "(--)?$");
+
+ mps->bre = pcre_compile(boundpat, PCRE_MULTILINE, &etxt, &epos, 0);
+ if (!mps->bre) goto fail;
+
+ /* fprintf(stderr, "multipart_init: boundpat = %s\n", boundpat); */
+
+ free(boundpat); /* no longer needed */
+ boundpat = 0;
+
+ sre = malloc(sizeof(struct stream_re));
+ if (!sre) goto fail;
+
+ re_stream_start(sre, mps->bre, 0);
+
+ mps->boundre = sre;
+ return;
+
+fail: if (boundpat) free(boundpat);
+ if (mps->bre) pcre_free(mps->bre);
}
-#define STATE_INMATCH 0
-#define STATE_MATCH 1
-#define STATE_MATCHEOL 2
-#define STATE_NOMATCH 3
-#define STATE_NOMATCHEOL 4
-#define STATE_MATCHEND 5
-#define STATE_DONE 6
+#define STATE_PREAMBLE 0
+#define STATE_PARTS 1
+#define STATE_END 2
static void
multipart_process(msgproc *m, char *buf, size_t len)
{
struct multipart_state *mps = msgproc_getpriv(m);
+ char *s = buf;
+ size_t p, l = len;
+ int ml;
msgproc *next;
- char *s, *t;
- int r, n;
-
- /* r = number of characters in current line which match
- boundary and resided in previous buffer.
-
- n = number of characters in current line which match
- boundary and reside in current buffer.
-
- mps->l = number of characters in boundary matched by
- current line (regardless of which buffer(s)).
-
- s = current character in buffer.
-
- t = beginning of current part in current buffer.
- */
-
- if (!mps) return;
- if (mps->state == STATE_DONE) return;
- if (!mps->boundary) return;
- r = mps->l;
- next = msgproc_next(m);
-
- for (s = t = buf; s < buf + len; s++) {
-
- switch (mps->state) {
- case STATE_INMATCH:
- if (mps->boundary[mps->l++] != *s) {
- mps->state = STATE_NOMATCH;
- n = mps->l = 0;
- continue;
+ char *pmatch = 0;
+
+ /* fprintf(stderr, "multipart_process: %d chars\n", len); */
+
+ while (l > 0 && mps->state != STATE_END) {
+ /* re_stream_exec needs to be tweaked:
+ - set state to nomatch and process to
+ end of match if partial
+ match is not at end of input.
+ - set state to nomatch and process to
+ beginning of match if previous state
+ was partial match and new match begins
+ after beginning of input
+ */
+ if (re_stream_result(mps->boundre) == -1)
+ pmatch = strdup(re_stream_getresult(mps->boundre));
+
+ /* fprintf(stderr, "multipart_process: s=%p, l=%d, state = %d\n",
+ s, l, mps->state);
+ fwrite(s, p, 1, stderr);
+ fprintf(stderr,"\n");
+ if (pmatch)
+ fprintf(stderr, "multipart_process pmatch = '%s'\n",
+ pmatch); */
+ p = re_stream_exec(mps->boundre, s, l);
+ if (p < 0) break;
+ /* fprintf(stderr, "multipart_process: p = %d, result = %d\n",
+ p, re_stream_result(mps->boundre)); */
+
+ switch(mps->state) {
+ case STATE_PREAMBLE:
+ if (re_stream_result(mps->boundre) == 1) {
+ s += p;
+ l -= p;
+ mps->state = STATE_PARTS;
+ next = msgproc_create(m, nextmod);
+ msgproc_start(next);
+ } else {
+ s += l;
+ l = 0;
}
- n++;
- if (mps->l < mps->blen) continue;
- mps->state = STATE_MATCH;
- if (*s == '\r') mps->state = STATE_MATCHEOL;
- if (*s == '-') mps->state = STATE_MATCHEND;
- continue;
- case STATE_MATCH:
- if (*s == '\r') mps->state = STATE_MATCHEOL;
- continue;
- case STATE_NOMATCH:
- if (*s == '\r') mps->state = STATE_NOMATCHEOL;
- continue;
- case STATE_MATCHEOL:
- if (*s == '\n') {
- mps->state = STATE_INMATCH;
- n = mps->l;
- mps->l = 0;
+ break;
+ case STATE_PARTS:
+ next = msgproc_next(m);
+ if (re_stream_result(mps->boundre) == 0) {
+ msgproc_process(next, s, l);
+ s += l;
+ l = 0;
break;
+ } else if (re_stream_result(mps->boundre) == -1) {
+ ml = strlen(re_stream_getresult(mps->boundre));
+ if (p > ml)
+ msgproc_process(next, s, p - ml);
+ s += l;
+ l = 0;
+ break;
+ } else { /* full match */
+ char *bm = re_stream_getresult(mps->boundre);
+ ml = strlen(bm);
+ if (p > ml) {
+ if (pmatch)
+ msgproc_process(next, pmatch,
+ strlen(pmatch));
+ msgproc_process(next, s, p - ml);
+ }
+ msgproc_finish(next);
+ /* check for -- at end of match */
+ /* fprintf(stderr, "endofmatch = %s\n",
+ bm + ml - 2); */
+ if (!strcmp("--", bm + ml - 2)) {
+ mps->state = STATE_END;
+ s += l;
+ l = 0;
+ break;
+ }
+ next = msgproc_create(m, nextmod);
+ msgproc_start(next);
+ s += p;
+ l -= p;
}
- mps->state = STATE_MATCH;
- continue;
- case STATE_NOMATCHEOL:
- if (*s == '\n') mps->state = STATE_INMATCH;
- mps->state = STATE_NOMATCH;
- continue;;
- case STATE_MATCHEND:
- if (*s != '-') {
- mps->state = STATE_MATCH;
- continue;
- }
- mps->state = STATE_DONE;
- n = mps->l;
- mps->l = 0;
- break;
- case STATE_DONE:
- t=s;
- continue;
}
-
- /* "break" above lands down here, continue skips to next char */
- /* send stuff downstream. (including a chunk of
- boundary if partial match happened at end
- of previous segment) */
- if (r) {
- msgproc_process(next, mps->boundary, r);
- r = 0;
- }
- msgproc_process(next, t, s - t - n);
- n = 0;
-
- if (mps->state == STATE_DONE) return;
+ if (pmatch) free(pmatch);
+ pmatch = 0;
}
-
- /* reached end of buffer. Process what we have. */
- msgproc_process(next, t, s - t - n);
}
void
{
struct multipart_state *mps = msgproc_getpriv(m);
if (mps) {
- free(mps->boundary);
+ if (mps->boundre) {
+ re_stream_stop(mps->boundre);
+ free(mps->boundre);
+ }
+ if (mps->bre) pcre_free(mps->bre);
}
free(mps);
msgproc_free(m);
*/
/* Stream-based regular expression matching implementation */
+#include <stdio.h> /* XXX -- debugging */
#include <pcre.h>
#include <strings.h>
#include "re_stream.h"
int
re_stream_exec(struct stream_re *sre, char *sub, int slen)
{
- int match, flags, newlen, end = slen;
+ int match, flags, newlen, start, end;
if (sre->state == -1) {
flags = PCRE_PARTIAL | PCRE_DFA_RESTART;
- sre->state = 1;
} else {
flags = PCRE_PARTIAL;
sre->state = 0;
+ sre->rlen = 0;
+ sre->result[0] = 0;
}
match = pcre_dfa_exec(sre->re, 0, sub, slen, 0, sre->flags | flags,
sre->ovec, sizeof(sre->ovec),
sre->wspace, sizeof(sre->wspace));
+ /* fprintf(stderr, "in (%d) = %s\n", slen, sub);
+ fprintf(stderr, "pcre_dfa_exec: match = %d\n", match); */
- if (match > 0 || match == PCRE_ERROR_PARTIAL) {
- end = sre->ovec[1];
- sre->state = 1;
- newlen = sre->rlen + sre->ovec[1] - sre->ovec[0];
- if (newlen >= sre->ralloc) {
- sre->ralloc += RALLOC_INIT;
- sre->result = reallocf(sre->result, sre->ralloc);
- if (!sre->result) return -1;
- }
- pcre_copy_substring(sub, sre->ovec, sizeof(sre->ovec),
- 0, sre->result + sre->rlen,
- sre->ralloc - sre->rlen);
- sre->rlen = newlen;
-
- if (sre->ovec[1] == slen && match == PCRE_ERROR_PARTIAL)
- sre->state = -1;
+ if (match < 0 && match != PCRE_ERROR_PARTIAL) {
+ sre->result[0] = sre->rlen = 0;
+ sre->state = 0;
+ return slen;
+ }
+
+ start = sre->ovec[0];
+ end = sre->ovec[1];
+
+ if (match == PCRE_ERROR_PARTIAL && end < slen) {
+ /* partial match ending before end of input => no match */
+ sre->result[0] = sre->rlen = 0;
+ sre->state = 0;
+ return end;
}
+
+ if (sre->state == -1 && start > 0) {
+ /* new match begins after buffer start, previous
+ buffer had partial match. set no match and
+ return start of next match to inform caller */
+ sre->result[0] = sre->rlen = 0;
+ sre->state = 0;
+ return start;
+ }
+
+ newlen = sre->rlen + end - start;
+ if (newlen >= sre->ralloc) {
+ sre->ralloc += RALLOC_INIT;
+ sre->result = reallocf(sre->result, sre->ralloc);
+ if (!sre->result) return -1;
+ }
+ pcre_copy_substring(sub, sre->ovec, sizeof(sre->ovec),
+ 0, sre->result + sre->rlen,
+ sre->ralloc - sre->rlen);
+ sre->rlen = newlen;
+
+ if (match == PCRE_ERROR_PARTIAL)
+ sre->state = -1;
+ else
+ sre->state = 1;
+
return end;
}