*/
#include <pcre.h>
+#include <string.h>
#include <err.h>
#include "re_stream.h"
#include "msgproc.h"
-static const char *text_url_pattern = "https?://[^\\s]+(?=\\s|$|\\))";
+/*static const char *text_url_pattern = "https?://[^\\s]+(?=\\s|$|\\))"; */
+static const char *text_url_pattern =
+ "https?://[a-z0-9.]+(:[0-9]+)?"
+ "(/[0-9a-z_=./+&%?-]+)?"
+ "(?=\r?\n|[^0-9a-z_=./+&%?-])";
static pcre *text_url_re;
-static const char *html_url_pattern = "(?<=<a href=\")https?://[^\"]+(?=\")";
+static const char *html_url_pattern = "<(a href|img src)=\"https?://[^\"]+(?=\")";
static pcre *html_url_re;
static void
if (initialized) return;
initialized = 1;
- text_url_re = pcre_compile(text_url_pattern, 0, &etxt, &epos, 0);
+ text_url_re = pcre_compile(text_url_pattern, PCRE_CASELESS, &etxt, &epos, 0);
if (!text_url_re) {
errx(1, "text_url_pattern compile error\n");
}
- html_url_re = pcre_compile(html_url_pattern, 0, &etxt, &epos, 0);
+ html_url_re = pcre_compile(html_url_pattern, PCRE_CASELESS, &etxt, &epos, 0);
if (!html_url_re) {
errx(1, "html_url_pattern compile error\n");
}
text_html_start(msgproc *m)
{
struct stream_re *s = malloc(sizeof(struct stream_re));
- re_stream_start(s, text_url_re, 0);
+ re_stream_start(s, html_url_re, 0);
msgproc_setpriv(m, (void *)s);
}
while (size > 0) {
n = re_stream_exec(sr, buf, size);
if (re_stream_result(sr) == 1) {
- char *url = re_stream_getresult(sr);
+ char *s, *url = re_stream_getresult(sr);
+ if (m->mp_mod->mpm_type == MSGPROC_HTML)
+ if ((s = strchr(url, '"')))
+ url = s + 1;
if (m->callback) {
m->callback(m, url, m->call_data);
}