update_firefox_version.py 2.2 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980
  1. #!/usr/bin/env python
  2. # lint: pylint
  3. # SPDX-License-Identifier: AGPL-3.0-or-later
  4. """Fetch firefox useragent signatures
  5. Output file: :origin:`searx/data/useragents.json` (:origin:`CI Update data ...
  6. <.github/workflows/data-update.yml>`).
  7. """
  8. import json
  9. import re
  10. from os.path import join
  11. from urllib.parse import urlparse, urljoin
  12. from distutils.version import LooseVersion
  13. import requests
  14. from lxml import html
  15. from searx import searx_dir
  16. URL = 'https://ftp.mozilla.org/pub/firefox/releases/'
  17. RELEASE_PATH = '/pub/firefox/releases/'
  18. NORMAL_REGEX = re.compile(r'^[0-9]+\.[0-9](\.[0-9])?$')
  19. # BETA_REGEX = re.compile(r'.*[0-9]b([0-9\-a-z]+)$')
  20. # ESR_REGEX = re.compile(r'^[0-9]+\.[0-9](\.[0-9])?esr$')
  21. #
  22. useragents = {
  23. # fmt: off
  24. "versions": (),
  25. "os": ('Windows NT 10.0; Win64; x64',
  26. 'X11; Linux x86_64'),
  27. "ua": "Mozilla/5.0 ({os}; rv:{version}) Gecko/20100101 Firefox/{version}",
  28. # fmt: on
  29. }
  30. def fetch_firefox_versions():
  31. resp = requests.get(URL, timeout=2.0)
  32. if resp.status_code != 200:
  33. raise Exception("Error fetching firefox versions, HTTP code " + resp.status_code)
  34. dom = html.fromstring(resp.text)
  35. versions = []
  36. for link in dom.xpath('//a/@href'):
  37. url = urlparse(urljoin(URL, link))
  38. path = url.path
  39. if path.startswith(RELEASE_PATH):
  40. version = path[len(RELEASE_PATH) : -1]
  41. if NORMAL_REGEX.match(version):
  42. versions.append(LooseVersion(version))
  43. list.sort(versions, reverse=True)
  44. return versions
  45. def fetch_firefox_last_versions():
  46. versions = fetch_firefox_versions()
  47. result = []
  48. major_last = versions[0].version[0]
  49. major_list = (major_last, major_last - 1)
  50. for version in versions:
  51. major_current = version.version[0]
  52. if major_current in major_list:
  53. result.append(version.vstring)
  54. return result
  55. def get_useragents_filename():
  56. return join(join(searx_dir, "data"), "useragents.json")
  57. if __name__ == '__main__':
  58. useragents["versions"] = fetch_firefox_last_versions()
  59. with open(get_useragents_filename(), "w", encoding='utf-8') as f:
  60. json.dump(useragents, f, indent=4, ensure_ascii=False)