Browse Source

[feat] engine: implementation of geizhals.de

Bnyro 9 months ago
parent
commit
84abab0808
3 changed files with 110 additions and 0 deletions
  1. 8 0
      docs/dev/engines/online/geizhals.rst
  2. 97 0
      searx/engines/geizhals.py
  3. 5 0
      searx/settings.yml

+ 8 - 0
docs/dev/engines/online/geizhals.rst

@@ -0,0 +1,8 @@
+.. _gitea geizhals:
+
+========
+Geizhals
+========
+
+.. automodule:: searx.engines.geizhals
+  :members:

+ 97 - 0
searx/engines/geizhals.py

@@ -0,0 +1,97 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
+"""Geizhals is a German website to compare the price of a product on the
+most common German shopping sites and find the lowest price.
+
+The sorting of the search results can be influenced by the following additions
+to the search term:
+
+``asc`` or ``price``
+  To sort by price in ascending order.
+
+``desc``
+  To sort by price in descending order.
+
+"""
+
+import re
+
+from urllib.parse import urlencode
+from lxml import html
+
+from searx.utils import eval_xpath, eval_xpath_list, extract_text
+
+about = {
+    'website': 'https://geizhals.de',
+    'wikidata_id': 'Q15977657',
+    'use_official_api': False,
+    'official_api_documentation': None,
+    'require_api_key': False,
+    'results': 'HTML',
+    'language': 'de',
+}
+paging = True
+categories = ['shopping']
+
+base_url = "https://geizhals.de"
+sort_order = 'relevance'
+
+SORT_RE = re.compile(r"sort:(\w+)")
+sort_order_map = {
+    'relevance': None,
+    'price': 'p',
+    'asc': 'p',
+    'desc': '-p',
+}
+
+
+def request(query, params):
+    sort = None
+
+    sort_order_path = SORT_RE.search(query)
+    if sort_order_path:
+        sort = sort_order_map.get(sort_order_path.group(1))
+        query = SORT_RE.sub("", query)
+        logger.debug(query)
+
+    args = {
+        'fs': query,
+        'pg': params['pageno'],
+        'toggle_all': 1,  # load item specs
+        'sort': sort,
+    }
+    params['url'] = f"{base_url}/?{urlencode(args)}"
+
+    return params
+
+
+def response(resp):
+    results = []
+
+    dom = html.fromstring(resp.text)
+    for result in eval_xpath_list(dom, "//article[contains(@class, 'listview__item')]"):
+        content = []
+        for spec in eval_xpath_list(result, ".//div[contains(@class, 'specs-grid__item')]"):
+            content.append(f"{extract_text(eval_xpath(spec, './dt'))}: {extract_text(eval_xpath(spec, './dd'))}")
+
+        metadata = [
+            extract_text(eval_xpath(result, ".//div[contains(@class, 'stars-rating-label')]")),
+            extract_text(eval_xpath(result, ".//div[contains(@class, 'listview__offercount')]")),
+        ]
+
+        item = {
+            'template': 'products.html',
+            'url': (
+                base_url + "/" + extract_text(eval_xpath(result, ".//a[contains(@class, 'listview__name-link')]/@href"))
+            ),
+            'title': extract_text(eval_xpath(result, ".//h3[contains(@class, 'listview__name')]")),
+            'content': ' | '.join(content),
+            'thumbnail': extract_text(eval_xpath(result, ".//img[contains(@class, 'listview__image')]/@src")),
+            'price': "Bestes Angebot: "
+            + extract_text(eval_xpath(result, ".//a[contains(@class, 'listview__price-link')]")).split(" ")[1]
+            + "€",
+            'metadata': ', '.join(item for item in metadata if item),
+        }
+
+        results.append(item)
+
+    return results

+ 5 - 0
searx/settings.yml

@@ -797,6 +797,11 @@ engines:
     timeout: 8.0
     timeout: 8.0
     disabled: true
     disabled: true
 
 
+  - name: geizhals
+    engine: geizhals
+    shortcut: geiz
+    disabled: true
+
   - name: genius
   - name: genius
     engine: genius
     engine: genius
     shortcut: gen
     shortcut: gen