logo

searx

My custom branche(s) on searx, a meta-search engine
commit: 0cb55ddfde3a2687ca7a647ac95ffe484e12471b
parent: b9d4c0523e8d6eab81658d77f8213b39f9b28f17
Author: Adam Tauber <asciimoo@gmail.com>
Date:   Thu, 22 Mar 2018 14:41:42 +0100

Merge pull request #1136 from kvch/add-findx-general

Add findx engine

Diffstat:

Asearx/engines/findx.py115+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Msearx/query.py15+++++++++++----
Msearx/search.py9+++++++--
Msearx/settings.yml18++++++++++++++++++
4 files changed, 151 insertions(+), 6 deletions(-)

diff --git a/searx/engines/findx.py b/searx/engines/findx.py @@ -0,0 +1,115 @@ +""" +FindX (General, Images, Videos) + +@website https://www.findx.com +@provide-api no +@using-api no +@results HTML +@stable no +@parse url, title, content, embedded, img_src, thumbnail_src +""" + +from dateutil import parser +from json import loads +import re + +from lxml import html + +from searx import logger +from searx.engines.xpath import extract_text +from searx.engines.youtube_noapi import base_youtube_url, embedded_url +from searx.url_utils import urlencode + + +paging = True +results_xpath = '//script[@id="initial-state"]' +search_url = 'https://www.findx.com/{category}?{q}' +type_map = { + 'none': 'web', + 'general': 'web', + 'images': 'images', + 'videos': 'videos', +} + + +def request(query, params): + params['url'] = search_url.format( + category=type_map[params['category']], + q=urlencode({ + 'q': query, + 'page': params['pageno'] + }) + ) + return params + + +def response(resp): + dom = html.fromstring(resp.text) + results_raw_json = dom.xpath(results_xpath) + results_json = loads(extract_text(results_raw_json)) + + if len(results_json['web']['results']) > 0: + return _general_results(results_json['web']['results']) + + if len(results_json['images']['results']) > 0: + return _images_results(results_json['images']['results']) + + if len(results_json['video']['results']) > 0: + return _videos_results(results_json['video']['results']) + + return [] + + +def _general_results(general_results): + results = [] + for result in general_results: + results.append({ + 'url': result['url'], + 'title': result['title'], + 'content': result['sum'], + }) + return results + + +def _images_results(image_results): + results = [] + for result in image_results: + results.append({ + 'url': result['sourceURL'], + 'title': result['title'], + 'content': result['source'], + 'thumbnail_src': _extract_url(result['assets']['thumb']['url']), + 'img_src': _extract_url(result['assets']['file']['url']), + 'template': 'images.html', + }) + return results + + +def _videos_results(video_results): + results = [] + for result in video_results: + if not result['kind'].startswith('youtube'): + logger.warn('Unknown video kind in findx: {}'.format(result['kind'])) + continue + + description = result['snippet']['description'] + if len(description) > 300: + description = description[:300] + '...' + + results.append({ + 'url': base_youtube_url + result['id'], + 'title': result['snippet']['title'], + 'content': description, + 'thumbnail': _extract_url(result['snippet']['thumbnails']['default']['url']), + 'publishedDate': parser.parse(result['snippet']['publishedAt']), + 'embedded': embedded_url.format(videoid=result['id']), + 'template': 'videos.html', + }) + return results + + +def _extract_url(url): + matching = re.search('(/https?://[^)]+)', url) + if matching: + return matching.group(0)[1:] + return '' diff --git a/searx/query.py b/searx/query.py @@ -107,14 +107,21 @@ class RawTextQuery(object): # check if prefix is equal with engine shortcut if prefix in engine_shortcuts: parse_next = True - self.engines.append({'category': 'none', - 'name': engine_shortcuts[prefix]}) + engine_name = engine_shortcuts[prefix] + if engine_name in engines: + for engine_category in engines[engine_name].categories: + self.engines.append({'category': engine_category, + 'name': engine_name, + 'from_bang': True}) # check if prefix is equal with engine name elif prefix in engines: parse_next = True - self.engines.append({'category': 'none', - 'name': prefix}) + if prefix in engines: + for engine_category in engines[engine_name].categories: + self.engines.append({'category': engine_category, + 'name': engine_name, + 'from_bang': True}) # check if prefix is equal with categorie name elif prefix in categories: diff --git a/searx/search.py b/searx/search.py @@ -258,8 +258,13 @@ def get_search_query_from_webapp(preferences, form): # if engines are calculated from query, # set categories by using that informations if query_engines and raw_text_query.specific: - query_categories = list(set(engine['category'] - for engine in query_engines)) + additional_categories = set() + for engine in query_engines: + if 'from_bang' in engine and engine['from_bang']: + additional_categories.add('none') + else: + additional_categories.add(engine['category']) + query_categories = list(additional_categories) # otherwise, using defined categories to # calculate which engines should be used diff --git a/searx/settings.yml b/searx/settings.yml @@ -218,6 +218,24 @@ engines: shortcut : fd disabled : True + - name : findx + engine : findx + shortcut : fx + categories : general + disabled : True + + - name : findx images + engine : findx + shortcut : fxi + categories : images + disabled : True + + - name : findx videos + engine : findx + shortcut : fxv + categories : videos + disabled : True + - name : flickr categories : images shortcut : fl