From: chris mikkelson Date: Mon, 12 Apr 2010 18:24:28 +0000 (-0500) Subject: Update HTML URL regexps to account for attributes before href/src X-Git-Url: https://git.mikk.net/?a=commitdiff_plain;h=39606221d5d9e129124ec3ad81cbcbd6e75b26d1;p=liburl Update HTML URL regexps to account for attributes before href/src --- diff --git a/text.c b/text.c index 031ab82..b5c3d02 100644 --- a/text.c +++ b/text.c @@ -28,7 +28,7 @@ static const char *text_url_pattern = "(/[0-9a-z_=./+&%?-]*)?" "(?=\r?\n|[^0-9a-z_=./+&%?-])"; static pcre *text_url_re; -static const char *html_url_pattern = "<(a href|img src)=\"https?://[^\"]+(?=\")"; +static const char *html_url_pattern = "<(a ([^>]*\\s)?href|img ([^>]*\\s)?src)=\"https?://[^\"]+(?=\")"; static pcre *html_url_re; static void @@ -75,7 +75,7 @@ text_process(msgproc *m, char *buf, size_t size) if (re_stream_result(sr) == 1) { char *s, *url = re_stream_getresult(sr); if (m->mp_mod->mpm_type == MSGPROC_HTML) - if ((s = strchr(url, '"'))) + if ((s = strrchr(url, '"'))) url = s + 1; if (m->callback) { m->callback(m, url, m->call_data);