Browse Source

[fix] engine - bing fix search, pagination, remove safesearch

jazzzooo 1 year ago
parent
commit
079636c079
4 changed files with 23 additions and 116 deletions
  1. 15 99
      searx/engines/bing.py
  2. 3 6
      searx/engines/bing_images.py
  3. 2 5
      searx/engines/bing_news.py
  4. 3 6
      searx/engines/bing_videos.py

+ 15 - 99
searx/engines/bing.py

@@ -30,9 +30,8 @@ inaccuracies there too):
 
 
 from typing import TYPE_CHECKING
 from typing import TYPE_CHECKING
 import base64
 import base64
-import datetime
 import re
 import re
-import uuid
+import time
 from urllib.parse import parse_qs, urlencode, urlparse
 from urllib.parse import parse_qs, urlencode, urlparse
 from lxml import html
 from lxml import html
 import babel
 import babel
@@ -58,17 +57,10 @@ about = {
     "results": 'HTML',
     "results": 'HTML',
 }
 }
 
 
-send_accept_language_header = True
-"""Bing tries to guess user's language and territory from the HTTP
-Accept-Language.  Optional the user can select a search-language (can be
-different to the UI language) and a region (market code)."""
-
 # engine dependent config
 # engine dependent config
 categories = ['general', 'web']
 categories = ['general', 'web']
 paging = True
 paging = True
 time_range_support = True
 time_range_support = True
-safesearch = True
-safesearch_types = {2: 'STRICT', 1: 'DEMOTE', 0: 'OFF'}  # cookie: ADLT=STRICT
 
 
 base_url = 'https://www.bing.com/search'
 base_url = 'https://www.bing.com/search'
 """Bing (Web) search URL"""
 """Bing (Web) search URL"""
@@ -77,105 +69,29 @@ bing_traits_url = 'https://learn.microsoft.com/en-us/bing/search-apis/bing-web-s
 """Bing (Web) search API description"""
 """Bing (Web) search API description"""
 
 
 
 
-def _get_offset_from_pageno(pageno):
-    return (pageno - 1) * 10 + 1
-
-
-def set_bing_cookies(params, engine_language, engine_region, SID):
-
-    # set cookies
-    # -----------
-
-    params['cookies']['_EDGE_V'] = '1'
-
-    # _EDGE_S: F=1&SID=3A5253BD6BCA609509B741876AF961CA&mkt=zh-tw
-    _EDGE_S = [
-        'F=1',
-        'SID=%s' % SID,
-        'mkt=%s' % engine_region.lower(),
-        'ui=%s' % engine_language.lower(),
-    ]
-    params['cookies']['_EDGE_S'] = '&'.join(_EDGE_S)
-    logger.debug("cookie _EDGE_S=%s", params['cookies']['_EDGE_S'])
+def _page_offset(pageno):
+    return (int(pageno) - 1) * 10 + 1
 
 
-    # "_EDGE_CD": "m=zh-tw",
 
 
-    _EDGE_CD = [  # pylint: disable=invalid-name
-        'm=%s' % engine_region.lower(),  # search region: zh-cn
-        'u=%s' % engine_language.lower(),  # UI: en-us
-    ]
-
-    params['cookies']['_EDGE_CD'] = '&'.join(_EDGE_CD) + ';'
-    logger.debug("cookie _EDGE_CD=%s", params['cookies']['_EDGE_CD'])
-
-    SRCHHPGUSR = [  # pylint: disable=invalid-name
-        'SRCHLANG=%s' % engine_language,
-        # Trying to set ADLT cookie here seems not to have any effect, I assume
-        # there is some age verification by a cookie (and/or session ID) needed,
-        # to disable the SafeSearch.
-        'ADLT=%s' % safesearch_types.get(params['safesearch'], 'DEMOTE'),
-    ]
-    params['cookies']['SRCHHPGUSR'] = '&'.join(SRCHHPGUSR)
-    logger.debug("cookie SRCHHPGUSR=%s", params['cookies']['SRCHHPGUSR'])
+def set_bing_cookies(params, engine_language, engine_region):
+    params['cookies']['_EDGE_CD'] = f'm={engine_region.lower()}&u={engine_language.lower()};'
 
 
 
 
 def request(query, params):
 def request(query, params):
     """Assemble a Bing-Web request."""
     """Assemble a Bing-Web request."""
 
 
-    engine_region = traits.get_region(params['searxng_locale'], 'en-US')
-    engine_language = traits.get_language(params['searxng_locale'], 'en')
-
-    SID = uuid.uuid1().hex.upper()
-    CVID = uuid.uuid1().hex.upper()
+    engine_region = traits.get_region(params['searxng_locale'], 'en-us')
+    engine_language = traits.get_language(params['searxng_locale'], 'en-us')
+    set_bing_cookies(params, engine_language, engine_region)
 
 
-    set_bing_cookies(params, engine_language, engine_region, SID)
+    query_params = {'q': query, 'first': _page_offset(params.get('pageno', 1))}
+    params['url'] = f'{base_url}?{urlencode(query_params)}'
 
 
-    # build URL query
-    # ---------------
-
-    # query term
-    page = int(params.get('pageno', 1))
-    query_params = {
-        # fmt: off
-        'q': query,
-        'pq': query,
-        'cvid': CVID,
-        'qs': 'n',
-        'sp': '-1'
-        # fmt: on
-    }
+    unix_day = int(time.time() / 86400)
+    time_ranges = {'day': '1', 'week': '2', 'month': '3', 'year': f'5_{unix_day-365}_{unix_day}'}
+    if params.get('time_range') in time_ranges:
+        params['url'] += f'&filters=ex1:"ez{time_ranges[params["time_range"]]}"'
 
 
-    # page
-    if page > 1:
-        referer = base_url + '?' + urlencode(query_params)
-        params['headers']['Referer'] = referer
-        logger.debug("headers.Referer --> %s", referer)
-
-    query_params['first'] = _get_offset_from_pageno(page)
-
-    if page == 2:
-        query_params['FORM'] = 'PERE'
-    elif page > 2:
-        query_params['FORM'] = 'PERE%s' % (page - 2)
-
-    filters = ''
-    if params['time_range']:
-        query_params['filt'] = 'custom'
-
-        if params['time_range'] == 'day':
-            filters = 'ex1:"ez1"'
-        elif params['time_range'] == 'week':
-            filters = 'ex1:"ez2"'
-        elif params['time_range'] == 'month':
-            filters = 'ex1:"ez3"'
-        elif params['time_range'] == 'year':
-            epoch_1970 = datetime.date(1970, 1, 1)
-            today_no = (datetime.date.today() - epoch_1970).days
-            filters = 'ex1:"ez5_%s_%s"' % (today_no - 365, today_no)
-
-    params['url'] = base_url + '?' + urlencode(query_params)
-    if filters:
-        params['url'] = params['url'] + '&filters=' + filters
     return params
     return params
 
 
 
 
@@ -236,7 +152,7 @@ def response(resp):
     except Exception as e:  # pylint: disable=broad-except
     except Exception as e:  # pylint: disable=broad-except
         logger.debug('result error :\n%s', e)
         logger.debug('result error :\n%s', e)
 
 
-    if result_len and _get_offset_from_pageno(resp.search_params.get("pageno", 0)) > result_len:
+    if result_len and _page_offset(resp.search_params.get("pageno", 0)) > result_len:
         # Avoid reading more results than avalaible.
         # Avoid reading more results than avalaible.
         # For example, if there is 100 results from some search and we try to get results from 120 to 130,
         # For example, if there is 100 results from some search and we try to get results from 120 to 130,
         # Bing will send back the results from 0 to 10 and no error.
         # Bing will send back the results from 0 to 10 and no error.

+ 3 - 6
searx/engines/bing_images.py

@@ -6,7 +6,6 @@
 
 
 
 
 from typing import TYPE_CHECKING
 from typing import TYPE_CHECKING
-import uuid
 import json
 import json
 from urllib.parse import urlencode
 from urllib.parse import urlencode
 
 
@@ -17,7 +16,6 @@ from searx.engines.bing import (
     set_bing_cookies,
     set_bing_cookies,
     _fetch_traits,
     _fetch_traits,
 )
 )
-from searx.engines.bing import send_accept_language_header  # pylint: disable=unused-import
 
 
 if TYPE_CHECKING:
 if TYPE_CHECKING:
     import logging
     import logging
@@ -61,11 +59,10 @@ time_map = {
 def request(query, params):
 def request(query, params):
     """Assemble a Bing-Image request."""
     """Assemble a Bing-Image request."""
 
 
-    engine_region = traits.get_region(params['searxng_locale'], 'en-US')
-    engine_language = traits.get_language(params['searxng_locale'], 'en')
+    engine_region = traits.get_region(params['searxng_locale'], 'en-us')
+    engine_language = traits.get_language(params['searxng_locale'], 'en-us')
 
 
-    SID = uuid.uuid1().hex.upper()
-    set_bing_cookies(params, engine_language, engine_region, SID)
+    set_bing_cookies(params, engine_language, engine_region)
 
 
     # build URL query
     # build URL query
     # - example: https://www.bing.com/images/async?q=foo&first=155&count=35
     # - example: https://www.bing.com/images/async?q=foo&first=155&count=35

+ 2 - 5
searx/engines/bing_news.py

@@ -6,7 +6,6 @@
 # pylint: disable=invalid-name
 # pylint: disable=invalid-name
 
 
 from typing import TYPE_CHECKING
 from typing import TYPE_CHECKING
-import uuid
 from urllib.parse import urlencode
 from urllib.parse import urlencode
 
 
 from lxml import html
 from lxml import html
@@ -16,7 +15,6 @@ from searx.engines.bing import (
     set_bing_cookies,
     set_bing_cookies,
     _fetch_traits,
     _fetch_traits,
 )
 )
-from searx.engines.bing import send_accept_language_header  # pylint: disable=unused-import
 
 
 if TYPE_CHECKING:
 if TYPE_CHECKING:
     import logging
     import logging
@@ -70,10 +68,9 @@ def request(query, params):
 
 
     sxng_locale = params['searxng_locale']
     sxng_locale = params['searxng_locale']
     engine_region = traits.get_region(mkt_alias.get(sxng_locale, sxng_locale), traits.all_locale)
     engine_region = traits.get_region(mkt_alias.get(sxng_locale, sxng_locale), traits.all_locale)
-    engine_language = traits.get_language(sxng_locale, 'en')
+    engine_language = traits.get_language(sxng_locale, 'en-us')
 
 
-    SID = uuid.uuid1().hex.upper()
-    set_bing_cookies(params, engine_language, engine_region, SID)
+    set_bing_cookies(params, engine_language, engine_region)
 
 
     # build URL query
     # build URL query
     #
     #

+ 3 - 6
searx/engines/bing_videos.py

@@ -5,7 +5,6 @@
 # pylint: disable=invalid-name
 # pylint: disable=invalid-name
 
 
 from typing import TYPE_CHECKING
 from typing import TYPE_CHECKING
-import uuid
 import json
 import json
 from urllib.parse import urlencode
 from urllib.parse import urlencode
 
 
@@ -16,7 +15,6 @@ from searx.engines.bing import (
     set_bing_cookies,
     set_bing_cookies,
     _fetch_traits,
     _fetch_traits,
 )
 )
-from searx.engines.bing import send_accept_language_header  # pylint: disable=unused-import
 
 
 if TYPE_CHECKING:
 if TYPE_CHECKING:
     import logging
     import logging
@@ -60,11 +58,10 @@ time_map = {
 def request(query, params):
 def request(query, params):
     """Assemble a Bing-Video request."""
     """Assemble a Bing-Video request."""
 
 
-    engine_region = traits.get_region(params['searxng_locale'], 'en-US')
-    engine_language = traits.get_language(params['searxng_locale'], 'en')
+    engine_region = traits.get_region(params['searxng_locale'], 'en-us')
+    engine_language = traits.get_language(params['searxng_locale'], 'en-us')
 
 
-    SID = uuid.uuid1().hex.upper()
-    set_bing_cookies(params, engine_language, engine_region, SID)
+    set_bing_cookies(params, engine_language, engine_region)
 
 
     # build URL query
     # build URL query
     #
     #