logo

searx

Unnamed repository; edit this file 'description' to name the repository.
commit: 96d623436c9ed9cbd7a8f7dbd8852e9100a9b7b7
parent: 53c9185cbeb05026909adec164b859e09850c74e
Author: Adam Tauber <asciimoo@gmail.com>
Date:   Wed, 10 Aug 2016 11:39:00 +0200

Merge pull request #630 from davidar/doi

Add Crossref search engine and DOAI rewrite plugin

Diffstat:

searx/engines/json_engine.py | 16+++++++++++++++-
searx/plugins/__init__.py | 4+++-
searx/plugins/doai_rewrite.py | 31+++++++++++++++++++++++++++++++
searx/settings.yml | 10++++++++++
4 files changed, 59 insertions(+), 2 deletions(-)

diff --git a/searx/engines/json_engine.py b/searx/engines/json_engine.py @@ -8,6 +8,14 @@ content_query = None title_query = None # suggestion_xpath = '' +# parameters for engines with paging support +# +# number of results on each page +# (only needed if the site requires not a page number, but an offset) +page_size = 1 +# number of the first page (usually 0 or 1) +first_page_num = 1 + def iterate(iterable): if type(iterable) == dict: @@ -69,8 +77,14 @@ def query(data, query_string): def request(query, params): query = urlencode({'q': query})[2:] - params['url'] = search_url.format(query=query) + + fp = {'query': query} + if paging and search_url.find('{pageno}') >= 0: + fp['pageno'] = (params['pageno'] + first_page_num - 1) * page_size + + params['url'] = search_url.format(**fp) params['query'] = query + return params diff --git a/searx/plugins/__init__.py b/searx/plugins/__init__.py @@ -19,7 +19,8 @@ from searx import logger logger = logger.getChild('plugins') -from searx.plugins import (https_rewrite, +from searx.plugins import (doai_rewrite, + https_rewrite, open_results_on_new_tab, self_info, search_on_category_select, @@ -73,6 +74,7 @@ class PluginStore(): plugins = PluginStore() +plugins.register(doai_rewrite) plugins.register(https_rewrite) plugins.register(open_results_on_new_tab) plugins.register(self_info) diff --git a/searx/plugins/doai_rewrite.py b/searx/plugins/doai_rewrite.py @@ -0,0 +1,31 @@ +from flask_babel import gettext +import re +from urlparse import urlparse, parse_qsl + +regex = re.compile(r'10\.\d{4,9}/[^\s]+') + +name = gettext('DOAI rewrite') +description = gettext('Avoid paywalls by redirecting to open-access versions of publications when available') +default_on = False + + +def extract_doi(url): + match = regex.search(url.path) + if match: + return match.group(0) + for _, v in parse_qsl(url.query): + match = regex.search(v) + if match: + return match.group(0) + return None + + +def on_result(request, ctx): + doi = extract_doi(ctx['result']['parsed_url']) + if doi and len(doi) < 50: + for suffix in ('/', '.pdf', '/full', '/meta', '/abstract'): + if doi.endswith(suffix): + doi = doi[:-len(suffix)] + ctx['result']['url'] = 'http://doai.io/' + doi + ctx['result']['parsed_url'] = urlparse(ctx['result']['url']) + return True diff --git a/searx/settings.yml b/searx/settings.yml @@ -87,6 +87,16 @@ engines: - name : btdigg engine : btdigg shortcut : bt + + - name : crossref + engine : json_engine + paging : True + search_url : http://search.crossref.org/dois?q={query}&page={pageno} + url_query : doi + title_query : title + content_query : fullCitation + categories : science + shortcut : cr - name : currency engine : currency_convert