Browse Source

[mod] data: implement a simple tracker URL (SQL) database

On demand, the tracker data is loaded directly into the cache, so that the
maintenance of this data via PRs is no longer necessary.

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
Markus Heiser 1 month ago
parent
commit
2dd4f7b972

+ 0 - 1
.github/workflows/data-update.yml

@@ -33,7 +33,6 @@ jobs:
           - update_engine_traits.py
           - update_wikidata_units.py
           - update_engine_descriptions.py
-          - update_tracker_patterns.py
 
     permissions:
       contents: write

+ 15 - 0
searx/cache.py

@@ -10,6 +10,7 @@ from __future__ import annotations
 __all__ = ["ExpireCacheCfg", "ExpireCacheStats", "ExpireCache", "ExpireCacheSQLite"]
 
 import abc
+from collections.abc import Iterator
 import dataclasses
 import datetime
 import hashlib
@@ -396,6 +397,20 @@ class ExpireCacheSQLite(sqlitedb.SQLiteAppl, ExpireCache):
 
         return self.deserialize(row[0])
 
+    def pairs(self, ctx: str) -> Iterator[tuple[str, typing.Any]]:
+        """Iterate over key/value pairs from table given by argument ``ctx``.
+        If ``ctx`` argument is ``None`` (the default), a table name is
+        generated from the :py:obj:`ExpireCacheCfg.name`."""
+        table = ctx
+        self.maintenance()
+
+        if not table:
+            table = self.normalize_name(self.cfg.name)
+
+        if table in self.table_names:
+            for row in self.DB.execute(f"SELECT key, value FROM {table}"):
+                yield row[0], self.deserialize(row[1])
+
     def state(self) -> ExpireCacheStats:
         cached_items = {}
         for table in self.table_names:

+ 3 - 3
searx/data/__init__.py

@@ -13,6 +13,7 @@ import typing
 
 from .core import log, data_dir
 from .currencies import CurrenciesDB
+from .tracker_patterns import TrackerPatternsDB
 
 CURRENCIES: CurrenciesDB
 USER_AGENTS: dict[str, typing.Any]
@@ -23,7 +24,7 @@ OSM_KEYS_TAGS: dict[str, typing.Any]
 ENGINE_DESCRIPTIONS: dict[str, typing.Any]
 ENGINE_TRAITS: dict[str, typing.Any]
 LOCALES: dict[str, typing.Any]
-TRACKER_PATTERNS: list[dict[str, typing.Any]]
+TRACKER_PATTERNS: TrackerPatternsDB
 
 lazy_globals = {
     "CURRENCIES": CurrenciesDB(),
@@ -35,7 +36,7 @@ lazy_globals = {
     "ENGINE_DESCRIPTIONS": None,
     "ENGINE_TRAITS": None,
     "LOCALES": None,
-    "TRACKER_PATTERNS": None,
+    "TRACKER_PATTERNS": TrackerPatternsDB(),
 }
 
 data_json_files = {
@@ -47,7 +48,6 @@ data_json_files = {
     "ENGINE_DESCRIPTIONS": "engine_descriptions.json",
     "ENGINE_TRAITS": "engine_traits.json",
     "LOCALES": "locales.json",
-    "TRACKER_PATTERNS": "tracker_patterns.json",
 }
 
 

+ 0 - 1985
searx/data/tracker_patterns.json

@@ -1,1985 +0,0 @@
-[
-    {
-        "exceptions": [
-            "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?amazon(?:\\.[a-z]{2,}){1,}\\/gp\\/.*?(?:redirector.html|cart\\/ajax-update.html|video\\/api\\/)",
-            "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?amazon(?:\\.[a-z]{2,}){1,}\\/(?:hz\\/reviews-render\\/ajax\\/|message-us\\?|s\\?)"
-        ],
-        "trackerParams": [
-            "p[fd]_rd_[a-z]*",
-            "qid",
-            "srs?",
-            "__mk_[a-z]{1,3}_[a-z]{1,3}",
-            "spIA",
-            "ms3_c",
-            "[a-z%0-9]*ie",
-            "refRID",
-            "colii?d",
-            "[^a-z%0-9]adId",
-            "qualifier",
-            "_encoding",
-            "smid",
-            "field-lbr_brands_browse-bin",
-            "ref_?",
-            "th",
-            "sprefix",
-            "crid",
-            "keywords",
-            "cv_ct_[a-z]+",
-            "linkCode",
-            "creativeASIN",
-            "ascsubtag",
-            "aaxitk",
-            "hsa_cr_id",
-            "sb-ci-[a-z]+",
-            "rnid",
-            "dchild",
-            "camp",
-            "creative",
-            "s",
-            "content-id",
-            "dib",
-            "dib_tag",
-            "social_share",
-            "starsLeft",
-            "skipTwisterOG"
-        ],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?amazon(?:\\.[a-z]{2,}){1,}"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [
-            "cvid",
-            "ocid"
-        ],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?msn\\.com"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [
-            "p[fd]_rd_[a-z]*",
-            "qid",
-            "srs?",
-            "__mk_[a-z]{1,3}_[a-z]{1,3}",
-            "spIA",
-            "ms3_c",
-            "[a-z%0-9]*ie",
-            "refRID",
-            "colii?d",
-            "[^a-z%0-9]adId",
-            "qualifier",
-            "_encoding",
-            "smid",
-            "field-lbr_brands_browse-bin",
-            "ref_?",
-            "th",
-            "sprefix",
-            "crid",
-            "cv_ct_[a-z]+",
-            "linkCode",
-            "creativeASIN",
-            "ascsubtag",
-            "aaxitk",
-            "hsa_cr_id",
-            "sb-ci-[a-z]+",
-            "rnid",
-            "dchild",
-            "camp",
-            "creative"
-        ],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?amazon(?:\\.[a-z]{2,}){1,}\\/s\\?"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?fls-na\\.amazon(?:\\.[a-z]{2,}){1,}"
-    },
-    {
-        "exceptions": [
-            "^https?:\\/\\/mail\\.google\\.com\\/mail\\/u\\/",
-            "^https?:\\/\\/accounts\\.google\\.com\\/o\\/oauth2\\/",
-            "^https?:\\/\\/accounts\\.google\\.com\\/signin\\/oauth\\/",
-            "^https?:\\/\\/(?:docs|accounts)\\.google(?:\\.[a-z]{2,}){1,}",
-            "^https?:\\/\\/([a-z0-9-\\.])*(chat|drive)\\.google\\.com\\/videoplayback",
-            "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?google(?:\\.[a-z]{2,}){1,}(?:\\/upload)?\\/drive\\/",
-            "^https?:\\/\\/news\\.google\\.com.*\\?hl=.",
-            "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?google(?:\\.[a-z]{2,}){1,}\\/s\\?tbm=map.*?gs_[a-z]*=.",
-            "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?google(?:\\.[a-z]{2,}){1,}\\/(?:complete\\/search|setprefs|searchbyimage)",
-            "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?google(?:\\.[a-z]{2,}){1,}\\/(?:appsactivity|aclk\\?)",
-            "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?google(?:\\.[a-z]{2,}){1,}\\/safe[-]?browsing\\/([^&]+)"
-        ],
-        "trackerParams": [
-            "ved",
-            "bi[a-z]*",
-            "gfe_[a-z]*",
-            "ei",
-            "source",
-            "gs_[a-z]*",
-            "site",
-            "oq",
-            "esrc",
-            "uact",
-            "cd",
-            "cad",
-            "gws_[a-z]*",
-            "atyp",
-            "vet",
-            "_u",
-            "je",
-            "dcr",
-            "ie",
-            "sei",
-            "sa",
-            "dpr",
-            "btn[a-z]*",
-            "usg",
-            "cd",
-            "cad",
-            "uact",
-            "aqs",
-            "sourceid",
-            "sxsrf",
-            "rlz",
-            "i-would-rather-use-firefox",
-            "pcampaignid",
-            "sca_(?:esv|upv)",
-            "iflsig",
-            "fbs",
-            "ictx"
-        ],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?google(?:\\.[a-z]{2,}){1,}"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?googlesyndication\\.com"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?doubleclick(?:\\.[a-z]{2,}){1,}"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?googleadservices\\.com"
-    },
-    {
-        "exceptions": [
-            "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?matrix\\.org\\/_matrix\\/",
-            "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?(?:cloudflare\\.com|prismic\\.io|tangerine\\.ca|gitlab\\.com)",
-            "^https?:\\/\\/myaccount.google(?:\\.[a-z]{2,}){1,}",
-            "^https?:\\/\\/accounts.google(?:\\.[a-z]{2,}){1,}",
-            "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?gcsip\\.(?:com|nl)[^?]*\\?.*?&?ref_?=.",
-            "^https?:\\/\\/[^/]+/[^/]+/[^/]+\\/-\\/refs\\/switch[^?]*\\?.*?&?ref_?=.",
-            "^https?:\\/\\/bugtracker\\.[^/]*\\/[^?]+\\?.*?&?ref_?=[^/?&]*",
-            "^https?:\\/\\/comment-cdn\\.9gag\\.com\\/.*?comment-list.json\\?",
-            "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?battle\\.net\\/login",
-            "^https?:\\/\\/blizzard\\.com\\/oauth2",
-            "^https?:\\/\\/kreditkarten-banking\\.lbb\\.de",
-            "^https?:\\/\\/www\\.tinkoff\\.ru",
-            "^https?:\\/\\/www\\.cyberport\\.de\\/adscript\\.php",
-            "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?tweakers\\.net\\/ext\\/lt\\.dsp\\?.*?(?:%3F)?&?ref_?=.",
-            "^https?:\\/\\/git(lab)?\\.[^/]*\\/[^?]+\\?.*?&?ref_?=[^/?&]*",
-            "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?amazon(?:\\.[a-z]{2,}){1,}\\/message-us\\?",
-            "^https?:\\/\\/authorization\\.td\\.com",
-            "^https?:\\/\\/support\\.steampowered\\.com",
-            "^https?:\\/\\/privacy\\.vakmedianet\\.nl\\/.*?ref=",
-            "^https?:\\/\\/sso\\.serverplan\\.com\\/manage2fa\\/check\\?ref=",
-            "^https?:\\/\\/login\\.meijer\\.com\\/.*?\\?ref=",
-            "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?facebook\\.com\\/(?:login_alerts|ajax|should_add_browser)/",
-            "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?facebook\\.com\\/groups\\/member_bio\\/bio_dialog\\/",
-            "^https?:\\/\\/api\\.taiga\\.io",
-            "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?gog\\.com\\/click\\.html",
-            "^https?:\\/\\/login\\.progressive\\.com",
-            "^https?:\\/\\/www\\.sephora\\.com\\/api\\/",
-            "^https?:\\/\\/www\\.contestgirl\\.com",
-            "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?agenciatributaria\\.gob\\.es",
-            "^https?:\\/\\/login\\.ingbank\\.pl",
-            "^wss?:\\/\\/(?:[a-z0-9-]+\\.)*?zoom\\.us",
-            "^https?:\\/\\/api\\.bilibili\\.com",
-            "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?onet\\.pl\\/[^?]*\\?.*?utm_campaign=.",
-            "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?stripe\\.com\\/[^?]+.*?&?referrer=[^/?&]*",
-            "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?lichess\\.org\\/login.*?&?referrer=.*?",
-            "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?microsoft\\.com\\/.*?research\\/redirect",
-            "^https?:\\/\\/like.co\\/api\\/like\\/likebutton\\/[^?]+.*?&?referrer=[^/?&]*",
-            "^https?:\\/\\/button.like.co\\/in\\/.*?&?referrer=[^/?&]*",
-            "^https?:\\/\\/www\\.mma\\.go\\.kr",
-            "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?github\\.com",
-            "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?billiger\\.de\\/.*?mc=",
-            "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?\\.youtrack\\.cloud",
-            "^https?:\\/\\/cu\\.bankid\\.com",
-            "^https?:\\/\\/login\\.aliexpress\\.us"
-        ],
-        "trackerParams": [
-            "(?:%3F)?utm(?:_[a-z_]*)?",
-            "(?:%3F)?ga_[a-z_]+",
-            "(?:%3F)?yclid",
-            "(?:%3F)?_openstat",
-            "(?:%3F)?fb_action_(?:types|ids)",
-            "(?:%3F)?fb_(?:source|ref)",
-            "(?:%3F)?fbclid",
-            "(?:%3F)?action_(?:object|type|ref)_map",
-            "(?:%3F)?gs_l",
-            "(?:%3F)?mkt_tok",
-            "(?:%3F)?hmb_(?:campaign|medium|source)",
-            "(?:%3F)?gclid",
-            "(?:%3F)?srsltid",
-            "(?:%3F)?otm_[a-z_]*",
-            "(?:%3F)?cmpid",
-            "(?:%3F)?os_ehash",
-            "(?:%3F)?_ga",
-            "(?:%3F)?_gl",
-            "(?:%3F)?__twitter_impression",
-            "(?:%3F)?wt_?z?mc",
-            "(?:%3F)?wtrid",
-            "(?:%3F)?[a-z]?mc",
-            "(?:%3F)?dclid",
-            "Echobox",
-            "(?:%3F)?spm",
-            "(?:%3F)?vn(?:_[a-z]*)+",
-            "(?:%3F)?tracking_source",
-            "(?:%3F)?ceneo_spo",
-            "(?:%3F)?itm_(?:campaign|medium|source)",
-            "(?:%3F)?__hsfp",
-            "(?:%3F)?__hssc",
-            "(?:%3F)?__hstc",
-            "(?:%3F)?_hsenc",
-            "(?:%3F)?__s",
-            "(?:%3F)?hsCtaTracking",
-            "(?:%3F)?mc_(?:eid|cid|tc)",
-            "(?:%3F)?ml_subscriber",
-            "(?:%3F)?ml_subscriber_hash",
-            "(?:%3F)?msclkid",
-            "(?:%3F)?oly_anon_id",
-            "(?:%3F)?oly_enc_id",
-            "(?:%3F)?rb_clickid",
-            "(?:%3F)?s_cid",
-            "(?:%3F)?vero_conv",
-            "(?:%3F)?vero_id",
-            "(?:%3F)?wickedid",
-            "(?:%3F)?twclid"
-        ],
-        "urlPattern": ".*"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?adtech(?:\\.[a-z]{2,}){1,}"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?bf-ad(?:\\.[a-z]{2,}){1,}"
-    },
-    {
-        "exceptions": [
-            "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?amazon-adsystem(?:\\.[a-z]{2,}){1,}\\/v3\\/oor\\?"
-        ],
-        "trackerParams": [],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?amazon-adsystem(?:\\.[a-z]{2,}){1,}"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?adsensecustomsearchads(?:\\.[a-z]{2,}){1,}"
-    },
-    {
-        "exceptions": [
-            "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?youtube\\.com\\/signin\\?.*?"
-        ],
-        "trackerParams": [
-            "feature",
-            "gclid",
-            "kw",
-            "si",
-            "pp"
-        ],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?(youtube\\.com|youtu\\.be)"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?youtube\\.com\\/pagead"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?youtube\\.com\\/api\\/stats\\/ads"
-    },
-    {
-        "exceptions": [
-            "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?facebook\\.com\\/.*?(plugins|ajax)\\/",
-            "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?facebook\\.com\\/dialog\\/(?:share|send)",
-            "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?facebook\\.com\\/groups\\/member_bio\\/bio_dialog\\/",
-            "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?facebook\\.com\\/photo\\.php\\?",
-            "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?facebook\\.com\\/privacy\\/specific_audience_selector_dialog\\/",
-            "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?facebook\\.com\\/photo\\/download\\/"
-        ],
-        "trackerParams": [
-            "hc_[a-z_%\\[\\]0-9]*",
-            "[a-z]*ref[a-z]*",
-            "__tn__",
-            "eid",
-            "__(?:xts|cft)__(?:\\[|%5B)\\d(?:\\]|%5D)",
-            "comment_tracking",
-            "dti",
-            "app",
-            "video_source",
-            "ftentidentifier",
-            "pageid",
-            "padding",
-            "ls_ref",
-            "action_history",
-            "tracking",
-            "referral_code",
-            "referral_story_type",
-            "eav",
-            "sfnsn",
-            "idorvanity",
-            "wtsid",
-            "rdc",
-            "rdr",
-            "paipv",
-            "_nc_x",
-            "_rdr",
-            "mibextid"
-        ],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?facebook\\.com"
-    },
-    {
-        "exceptions": [
-            "^https?:\\/\\/twitter.com\\/i\\/redirect"
-        ],
-        "trackerParams": [
-            "(?:ref_?)?src",
-            "s",
-            "cn",
-            "ref_url",
-            "t"
-        ],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?twitter.com"
-    },
-    {
-        "exceptions": [
-            "^https?:\\/\\/x.com\\/i\\/redirect"
-        ],
-        "trackerParams": [
-            "(?:ref_?)?src",
-            "s",
-            "cn",
-            "ref_url",
-            "t"
-        ],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?x.com"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [
-            "%24deep_link",
-            "\\$deep_link",
-            "correlation_id",
-            "ref_campaign",
-            "ref_source",
-            "%243p",
-            "rdt",
-            "\\$3p",
-            "%24original_url",
-            "\\$original_url",
-            "_branch_match_id",
-            "share_id"
-        ],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?reddit.com"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [
-            "trackId",
-            "tctx",
-            "jb[a-z]*?"
-        ],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?netflix.com"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [
-            "ncid",
-            "sr",
-            "sr_share",
-            "guccounter",
-            "guce_referrer",
-            "guce_referrer_sig"
-        ],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?techcrunch\\.com"
-    },
-    {
-        "exceptions": [
-            "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?bing(?:\\.[a-z]{2,}){1,}\\/WS\\/redirect\\/"
-        ],
-        "trackerParams": [
-            "cvid",
-            "form",
-            "sk",
-            "sp",
-            "sc",
-            "qs",
-            "qp"
-        ],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?bing(?:\\.[a-z]{2,}){1,}"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [
-            "nb",
-            "u"
-        ],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?tweakers\\.net"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [
-            "tt_medium",
-            "tt_content"
-        ],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?twitch\\.com"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [
-            "pk_campaign",
-            "pk_kwd"
-        ],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?vivaldi\\.com"
-    },
-    {
-        "exceptions": [
-            "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?indeed\\.com\\/rc\\/clk"
-        ],
-        "trackerParams": [
-            "from",
-            "alid",
-            "[a-z]*tk"
-        ],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?indeed\\.com"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [
-            "vss",
-            "t",
-            "swnt",
-            "grpos",
-            "ptl",
-            "stl",
-            "exp",
-            "plim"
-        ],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?hh\\.ru"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [
-            "_trkparms",
-            "_trksid",
-            "_from",
-            "hash"
-        ],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?ebay(?:\\.[a-z]{2,}){1,}"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [
-            "ftag"
-        ],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?cnet\\.com"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [
-            "ref_",
-            "pf_rd_[a-z]*"
-        ],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?imdb\\.com"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?govdelivery\\.com"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [
-            "u1",
-            "ath[a-z]*"
-        ],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?walmart\\.com"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [
-            "pl"
-        ],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?net\\-parade\\.it"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [
-            "xid"
-        ],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?prvnizpravy\\.cz"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [
-            "tpa"
-        ],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?youku\\.com"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [
-            "smid"
-        ],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?nytimes\\.com"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [
-            "wbdcd"
-        ],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?tchibo\\.de"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [
-            "snr"
-        ],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?steampowered\\.com"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?steamcommunity\\.com"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [],
-        "urlPattern": "https?:\\/\\/outgoing\\.prod\\.mozaws\\.net\\/"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [
-            "src"
-        ],
-        "urlPattern": "https?:\\/\\/([a-z0-9-.]*\\.)shutterstock\\.com"
-    },
-    {
-        "exceptions": [
-            "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?mozilla.org\\/api"
-        ],
-        "trackerParams": [
-            "src",
-            "platform",
-            "redirect_source"
-        ],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?mozilla\\.org"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [
-            "ref"
-        ],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?readdc\\.com"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [
-            "email"
-        ],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?dailycodingproblem\\.com"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [
-            "email_token",
-            "email_source"
-        ],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?github\\.com"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?deviantart\\.com"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?site2\\.com"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?site\\.com"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?site3\\.com"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [
-            "ws_ab_test",
-            "btsid",
-            "algo_expid",
-            "algo_pvid",
-            "gps-id",
-            "scm[_a-z-]*",
-            "cv",
-            "af",
-            "mall_affr",
-            "sk",
-            "dp",
-            "terminal_id",
-            "aff_request_id"
-        ],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?aliexpress(?:\\.[a-z]{2,}){1,}"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [
-            "sid"
-        ],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?mozillazine\\.org"
-    },
-    {
-        "exceptions": [
-            "^https?:\\/\\/comment-cdn\\.9gag\\.com\\/.*?comment-list.json\\?"
-        ],
-        "trackerParams": [
-            "ref"
-        ],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?9gag\\.com"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?linksynergy\\.com"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [
-            "ref"
-        ],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?giphy\\.com"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?gate\\.sc"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [],
-        "urlPattern": "^https?:\\/\\/vk\\.com"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [
-            "ref_?"
-        ],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?woot\\.com"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [
-            "_requestid",
-            "cid",
-            "dl",
-            "di",
-            "sd",
-            "bi"
-        ],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?vitamix\\.com"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?curseforge\\.com"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?messenger\\.com"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [
-            "__twitter_impression"
-        ],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?nypost\\.com"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [
-            "partner"
-        ],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?ozon\\.ru"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [
-            "link_id",
-            "can_id",
-            "source",
-            "email_referrer",
-            "email_subject"
-        ],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?norml\\.org"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [
-            "refId",
-            "trk",
-            "li[a-z]{2}",
-            "trackingId"
-        ],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?linkedin\\.com"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [
-            "u"
-        ],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?linkedin\\.com\\/learning"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?smartredirect\\.de"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [
-            "b"
-        ],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?spiegel\\.de"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?rutracker\\.org"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [
-            "igshid",
-            "igsh"
-        ],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?instagram\\.com"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?imgsrc\\.ru"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [
-            "h"
-        ],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?boredpanda\\.com"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?awstrack\\.me"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?exactag\\.com"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [
-            "dbkanal_[0-9]{3}"
-        ],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?bahn\\.de"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [
-            "cuid"
-        ],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?disq\\.us"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?anonym\\.to"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [
-            "cm_lm",
-            "cm_mmc",
-            "webUserId",
-            "spMailingID",
-            "spUserID",
-            "spJobID",
-            "spReportId"
-        ],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?moosejaw\\.com"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [
-            "int_campaign"
-        ],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?80000hours\\.org"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [
-            "si"
-        ],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?spotify\\.com"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [
-            "lr",
-            "redircnt"
-        ],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?(?:yandex(?:\\.[a-z]{2,}){1,}|ya\\.ru)"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [
-            "ecp",
-            "m_bt"
-        ],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?healio\\.com"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [
-            "iref"
-        ],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?zoho\\.com"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [
-            "sc_referrer",
-            "sc_ua"
-        ],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?snapchat\\.com"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [
-            "source"
-        ],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?medium\\.com"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [
-            "source"
-        ],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?swp\\.de"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [
-            "from"
-        ],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?wps\\.com"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [
-            "entrypoint",
-            "form_type"
-        ],
-        "urlPattern": "^https?:\\/\\/(?:accounts\\.)?firefox\\.com"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [
-            "as"
-        ],
-        "urlPattern": "^https?:\\/\\/(?:support\\.)?mozilla\\.org"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [
-            "test"
-        ],
-        "urlPattern": "^https?:\\/\\/kevinroebert\\.gitlab\\.io\\/ClearUrls\\/void\\/index\\.html"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [],
-        "urlPattern": "^https?:\\/\\/kevinroebert\\.gitlab\\.io\\/ClearUrls\\/void\\/block\\.svg"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [
-            "test"
-        ],
-        "urlPattern": "^https?:\\/\\/test\\.clearurls\\.xyz\\/void\\/index\\.html"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [],
-        "urlPattern": "^https?:\\/\\/test\\.clearurls\\.xyz\\/void\\/block\\.svg"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [
-            "from",
-            "xtor",
-            "xt_at"
-        ],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?diepresse\\.com"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [
-            "x"
-        ],
-        "urlPattern": "^https?:\\/\\/newsletter\\.lidl(?:\\.[a-z]{2,}){1,}"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [
-            "reco_id",
-            "sid"
-        ],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?allegro\\.pl"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [
-            "CMP_SKU",
-            "MER",
-            "mr:trackingCode",
-            "mr:device",
-            "mr:adType",
-            "iv_",
-            "CMP_ID",
-            "k_clickid",
-            "rmatt",
-            "INT_ID",
-            "ti",
-            "fl"
-        ],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?backcountry\\.com"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [
-            "rv",
-            "_xtd"
-        ],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?meetup\\.com"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [
-            "app",
-            "ign-itsc[a-z]+"
-        ],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?apple\\.com"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?alabout\\.com"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [
-            "source",
-            "bxid",
-            "cndid",
-            "esrc",
-            "mbid"
-        ],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?newyorker\\.com"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [
-            "track_click",
-            "link_id"
-        ],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?gog\\.com"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?tradedoubler\\.com"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [
-            "CMP"
-        ],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?theguardian\\.com"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?srvtrck\\.com"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [],
-        "urlPattern": "^https?:\\/\\/mysku\\.ru"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?admitad\\.com"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [
-            "price",
-            "sourceType",
-            "suid",
-            "ut_sk",
-            "un",
-            "share_crt_v",
-            "sp_tk",
-            "cpp",
-            "shareurl",
-            "short_name",
-            "app",
-            "scm[_a-z-]*",
-            "pvid",
-            "algo_expid",
-            "algo_pvid",
-            "ns",
-            "abbucket",
-            "ali_refid",
-            "ali_trackid",
-            "acm",
-            "utparam",
-            "pos",
-            "abtest",
-            "trackInfo",
-            "utkn",
-            "scene",
-            "mytmenu",
-            "turing_bucket",
-            "lygClk",
-            "impid",
-            "bftTag",
-            "bftRwd",
-            "spm",
-            "_u"
-        ],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?taobao\\.com"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [
-            "price",
-            "sourceType",
-            "suid",
-            "ut_sk",
-            "un",
-            "share_crt_v",
-            "sp_tk",
-            "cpp",
-            "shareurl",
-            "short_name",
-            "app",
-            "scm[_a-z-]*",
-            "pvid",
-            "algo_expid",
-            "algo_pvid",
-            "ns",
-            "abbucket",
-            "ali_refid",
-            "ali_trackid",
-            "acm",
-            "utparam",
-            "pos",
-            "abtest",
-            "trackInfo",
-            "user_number_id",
-            "utkn",
-            "scene",
-            "mytmenu",
-            "turing_bucket",
-            "lygClk",
-            "impid",
-            "bftTag",
-            "bftRwd",
-            "activity_id"
-        ],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?tmall\\.com"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [
-            "sm"
-        ],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?tb\\.cn"
-    },
-    {
-        "exceptions": [
-            "^https?:\\/\\/api\\.bilibili\\.com",
-            "^https?:\\/\\/space\\.bilibili\\.com"
-        ],
-        "trackerParams": [
-            "callback",
-            "spm_id_from",
-            "from_source",
-            "from",
-            "seid",
-            "mid",
-            "share_source",
-            "msource",
-            "refer_from",
-            "share_from",
-            "share_medium",
-            "share_source",
-            "share_plat",
-            "share_tag",
-            "share_session_id",
-            "timestamp",
-            "unique_k",
-            "vd_source",
-            "plat_id",
-            "buvid",
-            "is_story_h5",
-            "up_id"
-        ],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?bilibili\\.com"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [
-            "spm_id_from"
-        ],
-        "urlPattern": "^https?:\\/\\/space\\.bilibili\\.com"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [
-            "bbid",
-            "ts"
-        ],
-        "urlPattern": "^https?:\\/\\/m\\.bilibili\\.com"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [
-            "visit_id",
-            "session_id",
-            "broadcast_type",
-            "is_room_feed"
-        ],
-        "urlPattern": "^https?:\\/\\/live\\.bilibili\\.com"
-    },
-    {
-        "exceptions": [
-            "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?marketscreener\\.com\\/search\\/\\?"
-        ],
-        "trackerParams": [
-            "type_recherche",
-            "mots",
-            "noredirect",
-            "RewriteLast",
-            "lien",
-            "aComposeInputSearch",
-            "type_recherche_forum",
-            "add_mots",
-            "countview"
-        ],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?marketscreener\\.com"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [
-            "type_recherche",
-            "noredirect",
-            "RewriteLast",
-            "lien",
-            "aComposeInputSearch",
-            "type_recherche_forum",
-            "countview"
-        ],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?marketscreener\\.com\\/search\\/\\?"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [
-            "irclickid",
-            "irgwc",
-            "loc",
-            "acampID",
-            "mpid",
-            "intl"
-        ],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?bestbuy\\.com"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?digidip\\.net"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [
-            "u_code",
-            "preview_pb",
-            "_d",
-            "_t",
-            "_r",
-            "timestamp",
-            "user_id",
-            "share_app_name",
-            "share_iid",
-            "source"
-        ],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?tiktok\\.com"
-    },
-    {
-        "exceptions": [
-            "^https?:\\/\\/edith\\.xiaohongshu\\.com\\/api\\/sns\\/web\\/v1\\/user\\/hover_card"
-        ],
-        "trackerParams": [
-            "xhsshare",
-            "author_share",
-            "type",
-            "xsec_source",
-            "share_from_user_hidden",
-            "app_version",
-            "ignoreEngage",
-            "app_platform",
-            "apptime",
-            "appuid",
-            "shareRedId",
-            "share_id",
-            "exSource",
-            "verifyUuid",
-            "verifyType",
-            "verifyBiz"
-        ],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?xiaohongshu\\.com"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [
-            "idprob",
-            "hash",
-            "sending_id",
-            "site_id",
-            "dr_tracker"
-        ],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?autoplus\\.fr"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [
-            "pc",
-            "npc",
-            "npv[0-9]+",
-            "npi"
-        ],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?bigfishgames\\.com"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?dpbolvw\\.net"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?humblebundle\\.com"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [
-            "actId",
-            "actCampaignType",
-            "actSource"
-        ],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?cafepedagogique\\.net"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [
-            "tl_[a-z_]+"
-        ],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?bloculus\\.com"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?mailpanion\\.com"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?signtr\\.website"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?mailtrack\\.io"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [
-            "rtoken"
-        ],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?zillow\\.com"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [
-            "ex",
-            "identityID",
-            "MID",
-            "RID"
-        ],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?realtor\\.com"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [
-            "riftinfo"
-        ],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?redfin\\.com"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [
-            "epic_affiliate",
-            "epic_gameId"
-        ],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?epicgames\\.com"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [
-            "srcc",
-            "utm_v",
-            "utm_medium",
-            "utm_source"
-        ],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?onet\\.pl"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [
-            "internalSource",
-            "referringId",
-            "referringContentType",
-            "clickId"
-        ],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?allrecipes\\.com"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [
-            "xtor"
-        ],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?europe1\\.fr"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?effiliation\\.com"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [
-            "istCompanyId",
-            "istFeedId",
-            "istItemId",
-            "istBid",
-            "clickOrigin"
-        ],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?argos\\.co\\.uk"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?hlserve\\.com"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [
-            "src"
-        ],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?thunderbird\\.net"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [
-            "__source"
-        ],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?cnbc\\.com"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [
-            "refPageId"
-        ],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?roblox\\.com"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [
-            "_returnURL"
-        ],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?cell\\.com"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [
-            "redirectedFrom"
-        ],
-        "urlPattern": "^https?:\\/\\/academic\\.oup\\.com"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?flexlinkspro\\.com"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [
-            "source"
-        ],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?agata88\\.com"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [
-            "share"
-        ],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?hs\\.fi"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [
-            "origin"
-        ],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?yle\\.fi"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [],
-        "urlPattern": "^https?:\\/\\/refer\\.ccbill\\.com"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [
-            "otracker.?",
-            "ssid",
-            "[cilp]id",
-            "marketplace",
-            "store",
-            "srno",
-            "store",
-            "ppn",
-            "ppt",
-            "fm",
-            "collection-tab-name",
-            "sattr\\[\\]",
-            "p\\[\\]",
-            "st"
-        ],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?flipkart\\.com"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [
-            "sid",
-            "src",
-            "siteId",
-            "lcb",
-            "leadOutUrl",
-            "offerListId",
-            "osId",
-            "cancelUrl",
-            "disc"
-        ],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?idealo\\.de"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?idealo-partner\\.com"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [
-            "internal"
-        ],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?teletrader\\.com"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?webgains\\.com"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [
-            "ecid",
-            "_hsmi",
-            "_hsenc"
-        ],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?deeplearning\\.ai"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?getpocket\\.com"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [
-            "PostType",
-            "ServiceType",
-            "ftag",
-            "UniqueID",
-            "TheTime"
-        ],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?gamespot\\.com"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [
-            "src",
-            "trkid",
-            "whid"
-        ],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?tokopedia\\.com"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [
-            "ddw",
-            "ds_ch"
-        ],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?wkorea\\.com"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [
-            "source",
-            "medium",
-            "content"
-        ],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?eonline\\.com"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [
-            "taid"
-        ],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?reuters\\.com"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [],
-        "urlPattern": "^https?:\\/\\/app\\.adjust\\.com"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [
-            "source_location",
-            "psf_variant",
-            "share_intent"
-        ],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?change\\.org"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [
-            "tag"
-        ],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?ceneo\\.pl"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [
-            "intcid"
-        ],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?wired\\.com"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [
-            "pid",
-            "uid",
-            "tag",
-            "release",
-            "environment",
-            "sample",
-            "behavior",
-            "enableSPA",
-            "enableLinkTrace",
-            "page",
-            "begin",
-            "c2",
-            "c3",
-            "success",
-            "code",
-            "msg",
-            "api",
-            "traceId",
-            "pv_id",
-            "flag",
-            "sr",
-            "vp",
-            "ct",
-            "_v",
-            "sampling",
-            "dl",
-            "post_res"
-        ],
-        "urlPattern": "^https?:\\/\\/arms-retcode\\.aliyuncs\\.com"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [
-            "adid",
-            "i_cid",
-            "n_cid",
-            "waad"
-        ],
-        "urlPattern": "^https?://(?:[a-z0-9-]+\\.)*?nikkei\\.co(?:m|\\.jp)"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [
-            "weibo_id",
-            "dt_dapp"
-        ],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?weibo\\.(cn|com)"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [
-            "context_referrer",
-            "source",
-            "ref_ctx_id",
-            "funnel"
-        ],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?fiverr\\.com"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [
-            "click_key",
-            "click_sum",
-            "organic_search_click"
-        ],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?etsy\\.com"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [
-            "itm_campaign",
-            "itm_medium",
-            "itm_source",
-            "itm_term"
-        ],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?magento\\.com"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [
-            "dop_ab_variant",
-            "dop_source_zone_name",
-            "dop_req_id",
-            "dop_id",
-            "source",
-            "seq_no"
-        ],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?novinky\\.cz"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [
-            "dop_ab_variant",
-            "dop_source_zone_name",
-            "dop_req_id",
-            "dop_id"
-        ],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?aktualne\\.cz"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [
-            "dop_ab_variant",
-            "dop_source_zone_name",
-            "dop_req_id",
-            "dop_id",
-            "source",
-            "seq_no"
-        ],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?seznamzpravy\\.cz"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [
-            "log",
-            "p"
-        ],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?billiger\\.de"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [
-            "sznclid",
-            "dop_ab_variant",
-            "dop_source_zone_name",
-            "dop_req_id",
-            "dop_id"
-        ],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?respekt\\.cz"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [
-            "sznclid",
-            "dop_ab_variant",
-            "dop_source_zone_name",
-            "dop_req_id",
-            "dop_id"
-        ],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?faei\\.cz"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [
-            "sznclid",
-            "dop_ab_variant",
-            "dop_source_zone_name",
-            "dop_req_id",
-            "dop_id"
-        ],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?iprima\\.cz"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [
-            "sznclid",
-            "dop_ab_variant",
-            "dop_source_zone_name",
-            "dop_req_id",
-            "dop_id"
-        ],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?nova\\.cz"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?duckduckgo\\.com"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [
-            "DEAL_ID",
-            "L",
-            "S",
-            "T",
-            "V",
-            "pdp_filters",
-            "position",
-            "search_layout",
-            "tracking_id",
-            "type",
-            "c_[_a-zA-Z]+",
-            "me\\.[_a-zA-Z]+",
-            "reco_[_a-zA-Z]+"
-        ],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?mercadolibre\\.com"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [
-            "funnelUUID"
-        ],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?quizlet\\.com"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [
-            "xtor",
-            "at_[a-z_]+"
-        ],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?bbc\\.com"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [
-            "federated_search_id",
-            "search_type",
-            "source",
-            "source_impression_id"
-        ],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?airbnb\\.(com|ae|ca|co\\.in|co\\.nz|co\\.uk|co\\.za|com\\.au|com\\.mt|com\\.sg|de|gy|ie)"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?partner-ads\\.com"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [
-            "refer_method"
-        ],
-        "urlPattern": "^https?://(?:[a-z0-9-]+\\.)*?kahoot\\.it"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?href\\.li"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?adform\\.net"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?artefact\\.com"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?awin1\\.com"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?telekom\\.de"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?loginfra\\.com"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?umblr\\.com"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [
-            "from_search",
-            "from_srp",
-            "qid",
-            "rank",
-            "ac"
-        ],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?goodreads\\.com"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [
-            "pvid",
-            "scm"
-        ],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?sohu\\.com"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [
-            "publish_id",
-            "sp_atk",
-            "xptdk"
-        ],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?shopee\\.(com|co\\.th)"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [
-            "clickTrackInfo",
-            "abid",
-            "pvid",
-            "ad_src",
-            "spm",
-            "src",
-            "from",
-            "scm",
-            "pa",
-            "pid_pvid",
-            "did",
-            "mp",
-            "cid",
-            "impsrc",
-            "pos"
-        ],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?lazada\\.(com|co\\.th|co\\.id|com\\.my|com\\.ph|sg|vn)"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?pantip\\.com"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?skimresources\\.com"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [
-            "sPartner",
-            "campaign"
-        ],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?office-partner\\.de"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?mozgcp\\.net"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [
-            "shareToken"
-        ],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?thetimes\\.co\\.uk"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [
-            "ito"
-        ],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?metro\\.co\\.uk"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [
-            "sh"
-        ],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?forbes\\.com"
-    },
-    {
-        "exceptions": [],
-        "trackerParams": [],
-        "urlPattern": "^https?:\\/\\/(?:[a-z0-9-]+\\.)*?viglink\\.com"
-    }
-]

+ 142 - 0
searx/data/tracker_patterns.py

@@ -0,0 +1,142 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
+"""Simple implementation to store TrackerPatterns data in a SQL database."""
+
+from __future__ import annotations
+import typing
+
+__all__ = ["TrackerPatternsDB"]
+
+import re
+import pathlib
+from collections.abc import Iterator
+from urllib.parse import urlparse, urlunparse, parse_qsl, urlencode
+
+import httpx
+
+from searx.data.core import get_cache, log
+
+RuleType = tuple[str, list[str], list[str]]
+
+
+class TrackerPatternsDB:
+    # pylint: disable=missing-class-docstring
+
+    ctx_name = "data_tracker_patterns"
+    json_file = pathlib.Path(__file__).parent / "tracker_patterns.json"
+
+    CLEAR_LIST_URL = [
+        # ClearURL rule lists, the first one that responds HTTP 200 is used
+        "https://rules1.clearurls.xyz/data.minify.json",
+        "https://rules2.clearurls.xyz/data.minify.json",
+        "https://raw.githubusercontent.com/ClearURLs/Rules/refs/heads/master/data.min.json",
+    ]
+
+    class Fields:
+        # pylint: disable=too-few-public-methods, invalid-name
+        url_regexp: typing.Final = 0  # URL (regular expression) match condition of the link
+        url_ignore: typing.Final = 1  # URL (regular expression) to ignore
+        del_args: typing.Final = 2  # list of URL arguments (regular expression) to delete
+
+    def __init__(self):
+        self.cache = get_cache()
+
+    def init(self):
+        if self.cache.properties("tracker_patterns loaded") != "OK":
+            self.load()
+            self.cache.properties.set("tracker_patterns loaded", "OK")
+        # F I X M E:
+        #     do we need a maintenance .. rember: database is stored
+        #     in /tmp and will be rebuild during the reboot anyway
+
+    def load(self):
+        log.debug("init searx.data.TRACKER_PATTERNS")
+        for rule in self.iter_clear_list():
+            self.add(rule)
+
+    def add(self, rule: RuleType):
+        self.cache.set(
+            key=rule[self.Fields.url_regexp],
+            value=(
+                rule[self.Fields.url_ignore],
+                rule[self.Fields.del_args],
+            ),
+            ctx=self.ctx_name,
+            expire=None,
+        )
+
+    def rules(self) -> Iterator[RuleType]:
+        self.init()
+        for key, value in self.cache.pairs(ctx=self.ctx_name):
+            yield key, value[0], value[1]
+
+    def iter_clear_list(self) -> Iterator[RuleType]:
+        resp = None
+        for url in self.CLEAR_LIST_URL:
+            resp = httpx.get(url, timeout=3)
+            if resp.status_code == 200:
+                break
+            log.warning(f"TRACKER_PATTERNS: ClearURL ignore HTTP {resp.status_code} {url}")
+
+        if resp is None:
+            log.error("TRACKER_PATTERNS: failed fetching ClearURL rule lists")
+            return
+
+        for rule in resp.json()["providers"].values():
+            yield (
+                rule["urlPattern"].replace("\\\\", "\\"),  # fix javascript regex syntax
+                [exc.replace("\\\\", "\\") for exc in rule.get("exceptions", [])],
+                rule.get("rules", []),
+            )
+
+    def clean_url(self, url: str) -> bool | str:
+        """The URL arguments are normalized and cleaned of tracker parameters.
+
+        Returns bool ``True`` to use URL unchanged (``False`` to ignore URL).
+        If URL should be modified, the returned string is the new URL to use.
+        """
+
+        new_url = url
+        parsed_new_url = urlparse(url=new_url)
+
+        for rule in self.rules():
+
+            if not re.match(rule[self.Fields.url_regexp], new_url):
+                # no match / ignore pattern
+                continue
+
+            do_ignore = False
+            for pattern in rule[self.Fields.url_ignore]:
+                if re.match(pattern, new_url):
+                    do_ignore = True
+                    break
+
+            if do_ignore:
+                # pattern is in the list of exceptions / ignore pattern
+                # HINT:
+                #    we can't break the outer pattern loop since we have
+                #    overlapping urlPattern like ".*"
+                continue
+
+            # remove tracker arguments from the url-query part
+            query_args: list[tuple[str, str]] = list(parse_qsl(parsed_new_url.query))
+
+            for name, val in query_args.copy():
+                # remove URL arguments
+                for pattern in rule[self.Fields.del_args]:
+                    if re.match(pattern, name):
+                        log.debug("TRACKER_PATTERNS: %s remove tracker arg: %s='%s'", parsed_new_url.netloc, name, val)
+                        query_args.remove((name, val))
+
+            parsed_new_url = parsed_new_url._replace(query=urlencode(query_args))
+            new_url = urlunparse(parsed_new_url)
+
+        if new_url != url:
+            return new_url
+
+        return True
+
+
+if __name__ == "__main__":
+    db = TrackerPatternsDB()
+    for r in db.rules():
+        print(r)

+ 8 - 42
searx/plugins/tracker_url_remover.py

@@ -2,17 +2,15 @@
 # pylint: disable=missing-module-docstring, unused-argument
 
 from __future__ import annotations
-import typing
 
-import re
-from urllib.parse import urlparse, urlunparse, parse_qsl, urlencode
+import logging
+import typing
 
 from flask_babel import gettext
 
 from searx.data import TRACKER_PATTERNS
 
 from . import Plugin, PluginInfo
-from ._core import log
 
 if typing.TYPE_CHECKING:
     from searx.search import SearchWithPlugins
@@ -21,13 +19,16 @@ if typing.TYPE_CHECKING:
     from searx.plugins import PluginCfg
 
 
+log = logging.getLogger("searx.plugins.tracker_url_remover")
+
+
 class SXNGPlugin(Plugin):
     """Remove trackers arguments from the returned URL."""
 
     id = "tracker_url_remover"
-    log = log.getChild(id)
 
     def __init__(self, plg_cfg: "PluginCfg") -> None:
+
         super().__init__(plg_cfg)
         self.info = PluginInfo(
             id=self.id,
@@ -47,42 +48,7 @@ class SXNGPlugin(Plugin):
         If URL should be modified, the returned string is the new URL to use."""
 
         if not url_src:
-            cls.log.debug("missing a URL in field %s", field_name)
+            log.debug("missing a URL in field %s", field_name)
             return True
 
-        new_url = url_src
-        parsed_new_url = urlparse(url=new_url)
-
-        for rule in TRACKER_PATTERNS:
-
-            if not re.match(rule["urlPattern"], new_url):
-                # no match / ignore pattern
-                continue
-
-            in_exceptions = False
-            for exception in rule["exceptions"]:
-                if re.match(exception, new_url):
-                    in_exceptions = True
-                    break
-            if in_exceptions:
-                # pattern is in the list of exceptions / ignore pattern
-                # hint: we can't break the outer pattern loop since we have
-                # overlapping urlPattern like ".*"
-                continue
-
-            # remove tracker arguments from the url-query part
-            query_args: list[tuple[str, str]] = list(parse_qsl(parsed_new_url.query))
-
-            for name, val in query_args.copy():
-                for reg in rule["trackerParams"]:
-                    if re.match(reg, name):
-                        cls.log.debug("%s remove tracker arg: %s='%s'", parsed_new_url.netloc, name, val)
-                        query_args.remove((name, val))
-
-            parsed_new_url = parsed_new_url._replace(query=urlencode(query_args))
-            new_url = urlunparse(parsed_new_url)
-
-        if new_url != url_src:
-            return new_url
-
-        return True
+        return TRACKER_PATTERNS.clean_url(url=url_src)

+ 0 - 36
searxng_extra/update/update_tracker_patterns.py

@@ -1,36 +0,0 @@
-# SPDX-License-Identifier: AGPL-3.0-or-later
-"""Fetch trackers"""
-
-import json
-import httpx
-
-from searx.data import data_dir
-
-DATA_FILE = data_dir / "tracker_patterns.json"
-CLEAR_LIST_URL = "https://raw.githubusercontent.com/ClearURLs/Rules/refs/heads/master/data.min.json"
-
-
-def fetch_clear_url_filters():
-    resp = httpx.get(CLEAR_LIST_URL)
-    if resp.status_code != 200:
-        # pylint: disable=broad-exception-raised
-        raise Exception(f"Error fetching ClearURL filter lists, HTTP code {resp.status_code}")
-
-    providers = resp.json()["providers"]
-    rules = []
-    for rule in providers.values():
-        rules.append(
-            {
-                "urlPattern": rule["urlPattern"].replace("\\\\", "\\"),  # fix javascript regex syntax
-                "exceptions": [exc.replace("\\\\", "\\") for exc in rule["exceptions"]],
-                "trackerParams": rule["rules"],
-            }
-        )
-
-    return rules
-
-
-if __name__ == '__main__':
-    filter_list = fetch_clear_url_filters()
-    with DATA_FILE.open("w", encoding='utf-8') as f:
-        json.dump(filter_list, f, indent=4, sort_keys=True, ensure_ascii=False)