| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566 | # SPDX-License-Identifier: AGPL-3.0-or-later"""Method ``http_user_agent``--------------------------The ``http_user_agent`` method evaluates a request as the request of a bot ifthe User-Agent_ header is unset or matches the regular expression:py:obj:`USER_AGENT`... _User-Agent:   https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/User-Agent"""# pylint: disable=unused-argumentfrom __future__ import annotationsimport refrom ipaddress import (    IPv4Network,    IPv6Network,)import flaskimport werkzeugfrom . import configfrom ._helpers import too_many_requestsUSER_AGENT = (    r'('    + r'unknown'    + r'|[Cc][Uu][Rr][Ll]|[wW]get|Scrapy|splash|JavaFX|FeedFetcher|python-requests|Go-http-client|Java|Jakarta|okhttp'    + r'|HttpClient|Jersey|Python|libwww-perl|Ruby|SynHttpClient|UniversalFeedParser|Googlebot|GoogleImageProxy'    + r'|bingbot|Baiduspider|yacybot|YandexMobileBot|YandexBot|Yahoo! Slurp|MJ12bot|AhrefsBot|archive.org_bot|msnbot'    + r'|MJ12bot|SeznamBot|linkdexbot|Netvibes|SMTBot|zgrab|James BOT|Sogou|Abonti|Pixray|Spinn3r|SemrushBot|Exabot'    + r'|ZmEu|BLEXBot|bitlybot|HeadlessChrome'    # unmaintained Farside instances    + r'|'    + re.escape(r'Mozilla/5.0 (compatible; Farside/0.1.0; +https://farside.link)')    # other bots and client to block    + '|.*PetalBot.*'    + r')')"""Regular expression that matches to User-Agent_ from known *bots*"""_regexp = Nonedef regexp_user_agent():    global _regexp  # pylint: disable=global-statement    if not _regexp:        _regexp = re.compile(USER_AGENT)    return _regexpdef filter_request(    network: IPv4Network | IPv6Network,    request: flask.Request,    cfg: config.Config,) -> werkzeug.Response | None:    user_agent = request.headers.get('User-Agent', 'unknown')    if regexp_user_agent().match(user_agent):        return too_many_requests(network, f"bot detected, HTTP header User-Agent: {user_agent}")    return None
 |