update_firefox_version.py 2.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384
  1. #!/usr/bin/env python
  2. # lint: pylint
  3. # SPDX-License-Identifier: AGPL-3.0-or-later
  4. """Fetch firefox useragent signatures
  5. Output file: :origin:`searx/data/useragents.json` (:origin:`CI Update data ...
  6. <.github/workflows/data-update.yml>`).
  7. """
  8. # pylint: disable=use-dict-literal
  9. import json
  10. import re
  11. from os.path import join
  12. from urllib.parse import urlparse, urljoin
  13. from distutils.version import LooseVersion # pylint: disable=deprecated-module
  14. import requests
  15. from lxml import html
  16. from searx import searx_dir
  17. URL = 'https://ftp.mozilla.org/pub/firefox/releases/'
  18. RELEASE_PATH = '/pub/firefox/releases/'
  19. NORMAL_REGEX = re.compile(r'^[0-9]+\.[0-9](\.[0-9])?$')
  20. # BETA_REGEX = re.compile(r'.*[0-9]b([0-9\-a-z]+)$')
  21. # ESR_REGEX = re.compile(r'^[0-9]+\.[0-9](\.[0-9])?esr$')
  22. #
  23. useragents = {
  24. # fmt: off
  25. "versions": (),
  26. "os": ('Windows NT 10.0; Win64; x64',
  27. 'X11; Linux x86_64'),
  28. "ua": "Mozilla/5.0 ({os}; rv:{version}) Gecko/20100101 Firefox/{version}",
  29. # fmt: on
  30. }
  31. def fetch_firefox_versions():
  32. resp = requests.get(URL, timeout=2.0)
  33. if resp.status_code != 200:
  34. # pylint: disable=broad-exception-raised
  35. raise Exception("Error fetching firefox versions, HTTP code " + resp.status_code)
  36. dom = html.fromstring(resp.text)
  37. versions = []
  38. for link in dom.xpath('//a/@href'):
  39. url = urlparse(urljoin(URL, link))
  40. path = url.path
  41. if path.startswith(RELEASE_PATH):
  42. version = path[len(RELEASE_PATH) : -1]
  43. if NORMAL_REGEX.match(version):
  44. versions.append(LooseVersion(version))
  45. list.sort(versions, reverse=True)
  46. return versions
  47. def fetch_firefox_last_versions():
  48. versions = fetch_firefox_versions()
  49. result = []
  50. major_last = versions[0].version[0]
  51. major_list = (major_last, major_last - 1)
  52. for version in versions:
  53. major_current = version.version[0]
  54. minor_current = version.version[1]
  55. if major_current in major_list:
  56. user_agent_version = f'{major_current}.{minor_current}'
  57. if user_agent_version not in result:
  58. result.append(user_agent_version)
  59. return result
  60. def get_useragents_filename():
  61. return join(join(searx_dir, "data"), "useragents.json")
  62. if __name__ == '__main__':
  63. useragents["versions"] = fetch_firefox_last_versions()
  64. with open(get_useragents_filename(), "w", encoding='utf-8') as f:
  65. json.dump(useragents, f, indent=4, ensure_ascii=False)