logo

searx

My custom branche(s) on searx, a meta-search engine git clone https://hacktivis.me/git/searx.git
commit: 9460750feab250d383080342a7bb0a5fe2e2392d
parent 678a80f043d2f57f059236b574cc29fab4f70fe8
Author: Thomas Pointhuber <thomas.pointhuber@gmx.at>
Date:   Tue,  2 Sep 2014 20:14:52 +0200

fix twitter engine and add comments

* add language-support
* add comments
* little refactoring

Diffstat:

Msearx/engines/twitter.py37+++++++++++++++++++++++++++++++++++--
1 file changed, 35 insertions(+), 2 deletions(-)

diff --git a/searx/engines/twitter.py b/searx/engines/twitter.py @@ -1,30 +1,63 @@ +## Twitter (Social media) +# +# @website https://www.bing.com/news +# @provide-api yes (https://dev.twitter.com/docs/using-search) +# +# @using-api no +# @results HTML (using search portal) +# @stable no (HTML can change) +# @parse url, title, content +# +# @todo publishedDate + from urlparse import urljoin from urllib import urlencode from lxml import html from cgi import escape +# engine dependent config categories = ['social media'] +language_support = True +# search-url base_url = 'https://twitter.com/' search_url = base_url+'search?' + +# specific xpath variables +results_xpath = '//li[@data-item-type="tweet"]' +link_xpath = './/small[@class="time"]//a' title_xpath = './/span[@class="username js-action-profile-name"]//text()' content_xpath = './/p[@class="js-tweet-text tweet-text"]//text()' +# do search-request def request(query, params): params['url'] = search_url + urlencode({'q': query}) + + # set language if specified + if params['language'] != 'all': + params['cookies']['lang'] = params['language'].split('_')[0] + return params +# get response from search-request def response(resp): results = [] + dom = html.fromstring(resp.text) - for tweet in dom.xpath('//li[@data-item-type="tweet"]'): - link = tweet.xpath('.//small[@class="time"]//a')[0] + + # parse results + for tweet in dom.xpath(results_xpath): + link = tweet.xpath(link_xpath)[0] url = urljoin(base_url, link.attrib.get('href')) title = ''.join(tweet.xpath(title_xpath)) content = escape(''.join(tweet.xpath(content_xpath))) + + # append result results.append({'url': url, 'title': title, 'content': content}) + + # return results return results