logo

searx

My custom branche(s) on searx, a meta-search engine git clone https://hacktivis.me/git/searx.git
commit: 07f83cab224d2ddf1f7fd8b544f2f2d6679c2416
parent fe35c86c947f503ae2e1e7633d3355204e48ae12
Author: Thomas Pointhuber <thomas.pointhuber@gmx.at>
Date:   Tue,  4 Mar 2014 15:06:27 +0100

Merge remote-tracking branch 'asciimoo/master'

Diffstat:

Msearx/engines/google_news.py2+-
Msearx/engines/yahoo.py2+-
Msearx/engines/yahoo_news.py2+-
Msearx/utils.py7+++++--
4 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/searx/engines/google_news.py b/searx/engines/google_news.py @@ -6,7 +6,7 @@ from json import loads categories = ['news'] url = 'https://ajax.googleapis.com/' -search_url = url + 'ajax/services/search/news?v=2.0&start={offset}&rsz=large&safe=off&filter=off&{query}&hl={language}' # noqa +search_url = url + 'ajax/services/search/news?v=2.0&start={offset}&rsz=large&safe=off&filter=off&{query}&hl={language}' # noqa paging = True language_support = True diff --git a/searx/engines/yahoo.py b/searx/engines/yahoo.py @@ -35,7 +35,7 @@ def response(resp): for result in dom.xpath(results_xpath): url_string = extract_url(result.xpath(url_xpath), search_url) - start = url_string.find('/RU=')+4 + start = url_string.find('http', url_string.find('/RU=')+1) end = url_string.rfind('/RS') url = unquote(url_string[start:end]) title = extract_text(result.xpath(title_xpath)[0]) diff --git a/searx/engines/yahoo_news.py b/searx/engines/yahoo_news.py @@ -35,7 +35,7 @@ def response(resp): for result in dom.xpath(results_xpath): url_string = extract_url(result.xpath(url_xpath), search_url) - start = url_string.find('/RU=')+4 + start = url_string.find('http', url_string.find('/RU=')+1) end = url_string.rfind('/RS') url = unquote(url_string[start:end]) title = extract_text(result.xpath(title_xpath)[0]) diff --git a/searx/utils.py b/searx/utils.py @@ -4,12 +4,15 @@ import csv from codecs import getincrementalencoder import cStringIO import re +from random import choice +ua_versions = ('26.0', '27.0', '28.0') +ua_os = ('Windows NT 6.3; WOW64', 'X11; Linux x86_64; rv:26.0') +ua = "Mozilla/5.0 ({os}) Gecko/20100101 Firefox/{version}" def gen_useragent(): # TODO - ua = "Mozilla/5.0 (X11; Linux x86_64; rv:26.0) Gecko/20100101 Firefox/26.0" - return ua + return ua.format(os=choice(ua_os), version=choice(ua_versions)) def highlight_content(content, query):