update_firefox_version.py 2.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384
  1. #!/usr/bin/env python
  2. # lint: pylint
  3. # SPDX-License-Identifier: AGPL-3.0-or-later
  4. """Fetch firefox useragent signatures
  5. Output file: :origin:`searx/data/useragents.json` (:origin:`CI Update data ...
  6. <.github/workflows/data-update.yml>`).
  7. """
  8. # pylint: disable=use-dict-literal
  9. import json
  10. import re
  11. from os.path import join
  12. from urllib.parse import urlparse, urljoin
  13. from packaging.version import parse
  14. import requests
  15. from lxml import html
  16. from searx import searx_dir
  17. URL = 'https://ftp.mozilla.org/pub/firefox/releases/'
  18. RELEASE_PATH = '/pub/firefox/releases/'
  19. NORMAL_REGEX = re.compile(r'^[0-9]+\.[0-9](\.[0-9])?$')
  20. # BETA_REGEX = re.compile(r'.*[0-9]b([0-9\-a-z]+)$')
  21. # ESR_REGEX = re.compile(r'^[0-9]+\.[0-9](\.[0-9])?esr$')
  22. #
  23. useragents = {
  24. # fmt: off
  25. "versions": (),
  26. "os": ('Windows NT 10.0; Win64; x64',
  27. 'X11; Linux x86_64'),
  28. "ua": "Mozilla/5.0 ({os}; rv:{version}) Gecko/20100101 Firefox/{version}",
  29. # fmt: on
  30. }
  31. def fetch_firefox_versions():
  32. resp = requests.get(URL, timeout=2.0)
  33. if resp.status_code != 200:
  34. # pylint: disable=broad-exception-raised
  35. raise Exception("Error fetching firefox versions, HTTP code " + resp.status_code)
  36. dom = html.fromstring(resp.text)
  37. versions = []
  38. for link in dom.xpath('//a/@href'):
  39. url = urlparse(urljoin(URL, link))
  40. path = url.path
  41. if path.startswith(RELEASE_PATH):
  42. version = path[len(RELEASE_PATH) : -1]
  43. if NORMAL_REGEX.match(version):
  44. versions.append(parse(version))
  45. list.sort(versions, reverse=True)
  46. return versions
  47. def fetch_firefox_last_versions():
  48. versions = fetch_firefox_versions()
  49. result = []
  50. major_last = versions[0].major
  51. major_list = (major_last, major_last - 1)
  52. for version in versions:
  53. major_current = version.major
  54. minor_current = version.minor
  55. if major_current in major_list:
  56. user_agent_version = f'{major_current}.{minor_current}'
  57. if user_agent_version not in result:
  58. result.append(user_agent_version)
  59. return result
  60. def get_useragents_filename():
  61. return join(join(searx_dir, "data"), "useragents.json")
  62. if __name__ == '__main__':
  63. useragents["versions"] = fetch_firefox_last_versions()
  64. with open(get_useragents_filename(), "w", encoding='utf-8') as f:
  65. json.dump(useragents, f, indent=4, ensure_ascii=False)