From 39606221d5d9e129124ec3ad81cbcbd6e75b26d1 Mon Sep 17 00:00:00 2001 From: chris mikkelson Date: Mon, 12 Apr 2010 13:24:28 -0500 Subject: [PATCH] Update HTML URL regexps to account for attributes before href/src --- text.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/text.c b/text.c index 031ab82..b5c3d02 100644 --- a/text.c +++ b/text.c @@ -28,7 +28,7 @@ static const char *text_url_pattern = "(/[0-9a-z_=./+&%?-]*)?" "(?=\r?\n|[^0-9a-z_=./+&%?-])"; static pcre *text_url_re; -static const char *html_url_pattern = "<(a href|img src)=\"https?://[^\"]+(?=\")"; +static const char *html_url_pattern = "<(a ([^>]*\\s)?href|img ([^>]*\\s)?src)=\"https?://[^\"]+(?=\")"; static pcre *html_url_re; static void @@ -75,7 +75,7 @@ text_process(msgproc *m, char *buf, size_t size) if (re_stream_result(sr) == 1) { char *s, *url = re_stream_getresult(sr); if (m->mp_mod->mpm_type == MSGPROC_HTML) - if ((s = strchr(url, '"'))) + if ((s = strrchr(url, '"'))) url = s + 1; if (m->callback) { m->callback(m, url, m->call_data); -- 2.50.1