Browse Source

[mod] pypi engine: use packages.html

Alexandre Flament 1 year ago
parent
commit
d58760ef75
2 changed files with 69 additions and 17 deletions
  1. 68 0
      searx/engines/pypi.py
  2. 1 17
      searx/settings.yml

+ 68 - 0
searx/engines/pypi.py

@@ -0,0 +1,68 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
+# lint: pylint
+"""pypi.org
+
+"""
+
+from urllib.parse import urlencode
+from dateutil import parser
+
+from lxml import html
+from searx.utils import (
+    eval_xpath_getindex,
+    eval_xpath_list,
+    extract_text,
+)
+
+# about
+about = {
+    "website": "https://pypi.org",
+    "wikidata_id": "Q2984686",
+    "official_api_documentation": "https://warehouse.readthedocs.io/api-reference/index.html",
+    "use_official_api": False,
+    "require_api_key": False,
+    "results": "HTML",
+}
+
+categories = ['it', 'packages']
+
+
+# engine dependent config
+first_page_num = 1
+base_url = "https://pypi.org"
+search_url = base_url + '/search/?{query}'
+
+
+def request(query, params):
+    args = {
+        "q": query,
+        "page": params['pageno'],
+    }
+    params['url'] = search_url.format(query=urlencode(args))
+    return params
+
+
+def response(resp):
+    results = []
+    dom = html.fromstring(resp.text)
+    for entry in eval_xpath_list(dom, '/html/body/main/div/div/div/form/div/ul/li/a[@class="package-snippet"]'):
+        url = base_url + extract_text(eval_xpath_getindex(entry, './@href', 0))  # type: ignore
+        title = extract_text(eval_xpath_getindex(entry, './h3/span[@class="package-snippet__name"]', 0))
+        version = extract_text(eval_xpath_getindex(entry, './h3/span[@class="package-snippet__version"]', 0))
+        created_at = extract_text(
+            eval_xpath_getindex(entry, './h3/span[@class="package-snippet__created"]/time/@datetime', 0)
+        )
+        content = extract_text(eval_xpath_getindex(entry, './p', 0))
+        results.append(
+            {
+                "template": "packages.html",
+                "url": url,
+                "title": title,
+                'package_name': title,
+                "content": content,
+                "version": version,
+                'publishedDate': parser.parse(created_at),  # type: ignore
+            }
+        )
+
+    return results

+ 1 - 17
searx/settings.yml

@@ -1457,23 +1457,7 @@ engines:
 
 
   - name: pypi
   - name: pypi
     shortcut: pypi
     shortcut: pypi
-    engine: xpath
-    paging: true
-    search_url: https://pypi.org/search/?q={query}&page={pageno}
-    results_xpath: /html/body/main/div/div/div/form/div/ul/li/a[@class="package-snippet"]
-    url_xpath: ./@href
-    title_xpath: ./h3/span[@class="package-snippet__name"]
-    content_xpath: ./p
-    suggestion_xpath: /html/body/main/div/div/div/form/div/div[@class="callout-block"]/p/span/a[@class="link"]
-    first_page_num: 1
-    categories: [it, packages]
-    about:
-      website: https://pypi.org
-      wikidata_id: Q2984686
-      official_api_documentation: https://warehouse.readthedocs.io/api-reference/index.html
-      use_official_api: false
-      require_api_key: false
-      results: HTML
+    engine: pypi
 
 
   - name: qwant
   - name: qwant
     qwant_categ: web
     qwant_categ: web