pypi.py 1.9 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768
  1. # SPDX-License-Identifier: AGPL-3.0-or-later
  2. # lint: pylint
  3. """pypi.org
  4. """
  5. from urllib.parse import urlencode
  6. from dateutil import parser
  7. from lxml import html
  8. from searx.utils import (
  9. eval_xpath_getindex,
  10. eval_xpath_list,
  11. extract_text,
  12. )
  13. # about
  14. about = {
  15. "website": "https://pypi.org",
  16. "wikidata_id": "Q2984686",
  17. "official_api_documentation": "https://warehouse.readthedocs.io/api-reference/index.html",
  18. "use_official_api": False,
  19. "require_api_key": False,
  20. "results": "HTML",
  21. }
  22. categories = ['it', 'packages']
  23. # engine dependent config
  24. first_page_num = 1
  25. base_url = "https://pypi.org"
  26. search_url = base_url + '/search/?{query}'
  27. def request(query, params):
  28. args = {
  29. "q": query,
  30. "page": params['pageno'],
  31. }
  32. params['url'] = search_url.format(query=urlencode(args))
  33. return params
  34. def response(resp):
  35. results = []
  36. dom = html.fromstring(resp.text)
  37. for entry in eval_xpath_list(dom, '/html/body/main/div/div/div/form/div/ul/li/a[@class="package-snippet"]'):
  38. url = base_url + extract_text(eval_xpath_getindex(entry, './@href', 0)) # type: ignore
  39. title = extract_text(eval_xpath_getindex(entry, './h3/span[@class="package-snippet__name"]', 0))
  40. version = extract_text(eval_xpath_getindex(entry, './h3/span[@class="package-snippet__version"]', 0))
  41. created_at = extract_text(
  42. eval_xpath_getindex(entry, './h3/span[@class="package-snippet__created"]/time/@datetime', 0)
  43. )
  44. content = extract_text(eval_xpath_getindex(entry, './p', 0))
  45. results.append(
  46. {
  47. "template": "packages.html",
  48. "url": url,
  49. "title": title,
  50. 'package_name': title,
  51. "content": content,
  52. "version": version,
  53. 'publishedDate': parser.parse(created_at), # type: ignore
  54. }
  55. )
  56. return results