logo

searx

My custom branche(s) on searx, a meta-search engine git clone https://hacktivis.me/git/searx.git
commit: 144f89bf785408a193d09f659a5442032c06de74
parent cdf74fe563f24facec5bb487b5b3c6f599b08934
Author: Thomas Pointhuber <thomas.pointhuber@gmx.at>
Date:   Mon,  1 Sep 2014 15:10:05 +0200

add comments to google-engines

Diffstat:

Msearx/engines/google.py28++++++++++++++++++++++++----
Msearx/engines/google_images.py31++++++++++++++++++++++++++-----
Msearx/engines/google_news.py31+++++++++++++++++++++++++------
3 files changed, 75 insertions(+), 15 deletions(-)

diff --git a/searx/engines/google.py b/searx/engines/google.py @@ -1,37 +1,57 @@ -#!/usr/bin/env python +## Google (Web) +# +# @website https://www.google.com +# @provide-api yes (https://developers.google.com/web-search/docs/), deprecated! +# +# @using-api yes +# @results JSON +# @stable yes (but deprecated) +# @parse url, title, content from urllib import urlencode from json import loads +# engine dependent config categories = ['general'] +paging = True +language_support = True +# search-url url = 'https://ajax.googleapis.com/' search_url = url + 'ajax/services/search/web?v=2.0&start={offset}&rsz=large&safe=off&filter=off&{query}&hl={language}' # noqa -paging = True -language_support = True - +# do search-request def request(query, params): offset = (params['pageno'] - 1) * 8 + language = 'en-US' if params['language'] != 'all': language = params['language'].replace('_', '-') + params['url'] = search_url.format(offset=offset, query=urlencode({'q': query}), language=language) + return params +# get response from search-request def response(resp): results = [] + search_res = loads(resp.text) + # return empty array if there are no results if not search_res.get('responseData', {}).get('results'): return [] + # parse results for result in search_res['responseData']['results']: + # append result results.append({'url': result['unescapedUrl'], 'title': result['titleNoFormatting'], 'content': result['content']}) + + # return results return results diff --git a/searx/engines/google_images.py b/searx/engines/google_images.py @@ -1,37 +1,58 @@ -#!/usr/bin/env python +## Google (Images) +# +# @website https://www.google.com +# @provide-api yes (https://developers.google.com/web-search/docs/), deprecated! +# +# @using-api yes +# @results JSON +# @stable yes (but deprecated) +# @parse url, title, img_src from urllib import urlencode from json import loads +# engine dependent config categories = ['images'] +paging = True +# search-url url = 'https://ajax.googleapis.com/' search_url = url + 'ajax/services/search/images?v=1.0&start={offset}&rsz=large&safe=off&filter=off&{query}' # noqa -paging = True +# do search-request def request(query, params): offset = (params['pageno'] - 1) * 8 + params['url'] = search_url.format(query=urlencode({'q': query}), offset=offset) + return params +# get response from search-request def response(resp): results = [] + search_res = loads(resp.text) - if not search_res.get('responseData'): - return [] - if not search_res['responseData'].get('results'): + + # return empty array if there are no results + if not search_res.get('responseData', {}).get('results'): return [] + + # parse results for result in search_res['responseData']['results']: href = result['originalContextUrl'] title = result['title'] if not result['url']: continue + + # append result results.append({'url': href, 'title': title, 'content': '', 'img_src': result['url'], 'template': 'images.html'}) + + # return results return results diff --git a/searx/engines/google_news.py b/searx/engines/google_news.py @@ -1,43 +1,62 @@ -#!/usr/bin/env python +## Google (News) +# +# @website https://www.google.com +# @provide-api yes (https://developers.google.com/web-search/docs/), deprecated! +# +# @using-api yes +# @results JSON +# @stable yes (but deprecated) +# @parse url, title, content, publishedDate from urllib import urlencode from json import loads from dateutil import parser +# search-url categories = ['news'] +paging = True +language_support = True +# engine dependent config url = 'https://ajax.googleapis.com/' search_url = url + 'ajax/services/search/news?v=2.0&start={offset}&rsz=large&safe=off&filter=off&{query}&hl={language}' # noqa -paging = True -language_support = True - +# do search-request def request(query, params): offset = (params['pageno'] - 1) * 8 + language = 'en-US' if params['language'] != 'all': language = params['language'].replace('_', '-') + params['url'] = search_url.format(offset=offset, query=urlencode({'q': query}), language=language) + return params +# get response from search-request def response(resp): results = [] + search_res = loads(resp.text) + # return empty array if there are no results if not search_res.get('responseData', {}).get('results'): return [] + # parse results for result in search_res['responseData']['results']: - -# Mon, 10 Mar 2014 16:26:15 -0700 + # parse publishedDate publishedDate = parser.parse(result['publishedDate']) + # append result results.append({'url': result['unescapedUrl'], 'title': result['titleNoFormatting'], 'publishedDate': publishedDate, 'content': result['content']}) + + # return results return results