|
@@ -1,12 +1,28 @@
|
|
|
# SPDX-License-Identifier: AGPL-3.0-or-later
|
|
|
# lint: pylint
|
|
|
-"""Google (Web)
|
|
|
+"""This is the implementation of the google WEB engine. Some of this
|
|
|
+implementations are shared by other engines:
|
|
|
|
|
|
-For detailed description of the *REST-full* API see: `Query Parameter
|
|
|
-Definitions`_.
|
|
|
+- :ref:`google images engine`
|
|
|
+- :ref:`google news engine`
|
|
|
+- :ref:`google videos engine`
|
|
|
+
|
|
|
+The google WEB engine itself has a special setup option:
|
|
|
+
|
|
|
+.. code:: yaml
|
|
|
+
|
|
|
+ - name: google
|
|
|
+ ...
|
|
|
+ use_mobile_ui: true
|
|
|
+
|
|
|
+``use_mobile_ui``: (default: ``true``)
|
|
|
+ Enables to use *mobile endpoint* to bypass the google blocking (see
|
|
|
+ :issue:`159`). On the mobile UI of Google Search, the button :guilabel:`More
|
|
|
+ results` is not affected by Google rate limiting and we can still do requests
|
|
|
+ while actively blocked by the original Google search. By activate
|
|
|
+ ``use_mobile_ui`` this behavior is simulated by adding the parameter
|
|
|
+ ``async=use_ac:true,_fmt:pc`` to the :py:func:`request`.
|
|
|
|
|
|
-.. _Query Parameter Definitions:
|
|
|
- https://developers.google.com/custom-search/docs/xml_results#WebSearch_Query_Parameter_Definitions
|
|
|
"""
|
|
|
|
|
|
# pylint: disable=invalid-name, missing-function-docstring
|
|
@@ -137,8 +153,9 @@ spelling_suggestion_xpath = '//div[@class="med"]/p/a'
|
|
|
def get_lang_info(params, lang_list, custom_aliases, supported_any_language):
|
|
|
"""Composing various language properties for the google engines.
|
|
|
|
|
|
- This function is called by the various google engines (google itself,
|
|
|
- google-images, -news, -scholar, -videos).
|
|
|
+ This function is called by the various google engines (:ref:`google web
|
|
|
+ engine`, :ref:`google images engine`, :ref:`google news engine` and
|
|
|
+ :ref:`google videos engine`).
|
|
|
|
|
|
:param dict param: request parameters of the engine
|
|
|
|
|
@@ -146,7 +163,7 @@ def get_lang_info(params, lang_list, custom_aliases, supported_any_language):
|
|
|
:py:obj:`ENGINES_LANGUAGES[engine-name] <searx.data.ENGINES_LANGUAGES>`
|
|
|
|
|
|
:param dict lang_list: custom aliases for non standard language codes
|
|
|
- (used when calling :py:func:`searx.utils.match_language)
|
|
|
+ (used when calling :py:func:`searx.utils.match_language`)
|
|
|
|
|
|
:param bool supported_any_language: When a language is not specified, the
|
|
|
language interpretation is left up to Google to decide how the search
|
|
@@ -159,7 +176,7 @@ def get_lang_info(params, lang_list, custom_aliases, supported_any_language):
|
|
|
Py-Dictionary with the key/value pairs:
|
|
|
|
|
|
language:
|
|
|
- Return value from :py:func:`searx.utils.match_language
|
|
|
+ Return value from :py:func:`searx.utils.match_language`
|
|
|
|
|
|
country:
|
|
|
The country code (e.g. US, AT, CA, FR, DE ..)
|
|
@@ -270,8 +287,7 @@ def request(query, params):
|
|
|
additional_parameters = {}
|
|
|
if use_mobile_ui:
|
|
|
additional_parameters = {
|
|
|
- 'asearch': "arc",
|
|
|
- 'async': 'arc_id:srp_510,ffilt:all,ve_name:MoreResultsContainer,next_id:srp_5,use_ac:true,_id:arc-srp_510,_pms:qs,_fmt:pc' # pylint: disable=line-too-long
|
|
|
+ 'async': 'use_ac:true,_fmt:pc',
|
|
|
}
|
|
|
|
|
|
# https://www.google.de/search?q=corona&hl=de&lr=lang_de&start=0&tbs=qdr%3Ad&safe=medium
|
|
@@ -312,9 +328,10 @@ def response(resp):
|
|
|
dom = html.fromstring(resp.text)
|
|
|
|
|
|
# results --> answer
|
|
|
- answer = eval_xpath(dom, '//div[contains(@class, "LGOjhe")]//text()')
|
|
|
- if answer:
|
|
|
- results.append({'answer': ' '.join(answer)})
|
|
|
+ answer_list = eval_xpath(dom, '//div[contains(@class, "LGOjhe")]')
|
|
|
+ if answer_list:
|
|
|
+ answer_list = [_.xpath("normalize-space()") for _ in answer_list]
|
|
|
+ results.append({'answer': ' '.join(answer_list)})
|
|
|
else:
|
|
|
logger.debug("did not find 'answer'")
|
|
|
|