update_firefox_version.py 2.2 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879
  1. #!/usr/bin/env python
  2. # SPDX-License-Identifier: AGPL-3.0-or-later
  3. """Fetch firefox useragent signatures
  4. Output file: :origin:`searx/data/useragents.json` (:origin:`CI Update data ...
  5. <.github/workflows/data-update.yml>`).
  6. """
  7. import json
  8. import requests
  9. import re
  10. from os.path import dirname, join
  11. from urllib.parse import urlparse, urljoin
  12. from distutils.version import LooseVersion, StrictVersion
  13. from lxml import html
  14. from searx import searx_dir
  15. URL = 'https://ftp.mozilla.org/pub/firefox/releases/'
  16. RELEASE_PATH = '/pub/firefox/releases/'
  17. NORMAL_REGEX = re.compile('^[0-9]+\.[0-9](\.[0-9])?$')
  18. # BETA_REGEX = re.compile('.*[0-9]b([0-9\-a-z]+)$')
  19. # ESR_REGEX = re.compile('^[0-9]+\.[0-9](\.[0-9])?esr$')
  20. #
  21. useragents = {
  22. # fmt: off
  23. "versions": (),
  24. "os": ('Windows NT 10.0; Win64; x64',
  25. 'X11; Linux x86_64'),
  26. "ua": "Mozilla/5.0 ({os}; rv:{version}) Gecko/20100101 Firefox/{version}",
  27. # fmt: on
  28. }
  29. def fetch_firefox_versions():
  30. resp = requests.get(URL, timeout=2.0)
  31. if resp.status_code != 200:
  32. raise Exception("Error fetching firefox versions, HTTP code " + resp.status_code)
  33. else:
  34. dom = html.fromstring(resp.text)
  35. versions = []
  36. for link in dom.xpath('//a/@href'):
  37. url = urlparse(urljoin(URL, link))
  38. path = url.path
  39. if path.startswith(RELEASE_PATH):
  40. version = path[len(RELEASE_PATH) : -1]
  41. if NORMAL_REGEX.match(version):
  42. versions.append(LooseVersion(version))
  43. list.sort(versions, reverse=True)
  44. return versions
  45. def fetch_firefox_last_versions():
  46. versions = fetch_firefox_versions()
  47. result = []
  48. major_last = versions[0].version[0]
  49. major_list = (major_last, major_last - 1)
  50. for version in versions:
  51. major_current = version.version[0]
  52. if major_current in major_list:
  53. result.append(version.vstring)
  54. return result
  55. def get_useragents_filename():
  56. return join(join(searx_dir, "data"), "useragents.json")
  57. if __name__ == '__main__':
  58. useragents["versions"] = fetch_firefox_last_versions()
  59. with open(get_useragents_filename(), "w", encoding='utf-8') as f:
  60. json.dump(useragents, f, indent=4, ensure_ascii=False)