Files
dokuwiki-plugin-botmon/config/known-bots.json
Sascha Leib 871c97bf14 Multiple smaller fixes
Added Support for Privacy Browser (Android),
Split Meta bots for better control,
Added Internet Archive to default whitelist,
Reactivated hashed cookies.
2025-11-07 12:29:08 +01:00

343 lines
10 KiB
JSON
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
[ {"id": "bingbot",
"n": "BingBot",
"r": ["bingbot"],
"rx": ["\\sbingbot\\/(\\d+\\.\\d+);"],
"url": "http://www.bing.com/bingbot.htm"
},
{"id": "googlebot",
"n": "GoogleBot",
"r": ["Googlebot"],
"rx": ["Googlebot\\/(\\d+\\.\\d+)", "Googlebot-Image\\/(\\d+\\.\\d+)"],
"url": "http://www.google.com/bot.html"
},
{"id": "googleads",
"n": "Google Ads",
"r": ["AdsBot-Google", "AdsBot-Google-Mobile", "Mediapartners-Google"],
"rx": ["AdsBot-Google;","AdsBot-Google-Mobile;", "Mediapartners-Google\\/(\\d+\\.\\d+);"],
"url": "https://developers.google.com/search/docs/crawling-indexing/google-special-case-crawlers"
},
{"id": "googleapi",
"n": "Google API Crawler",
"r": ["APIs-Google"],
"rx": ["APIs-Google"],
"url": "https://developers.google.com/search/docs/crawling-indexing/google-special-case-crawlers"
},
{"id": "googleother",
"n": "GoogleOther",
"r": ["GoogleOther"],
"rx": ["\\sGoogleOther(\\-\\w+)?[\\)\\/]"],
"url": "https://developers.google.com/search/docs/crawling-indexing/google-common-crawlers#googleother"
},
{"id": "googinspct",
"n": "Google-InspectionTool",
"r": ["Google-InspectionTool"],
"rx": ["\\sGoogle-InspectionTool(\\-\\w+)?[\\)\\/]"],
"url": "https://support.google.com/webmasters/answer/9012289"
},
{"id": "goognblm",
"n": "Google NotebookLM User",
"r": ["Google-NotebookLM"],
"rx": ["Google-NotebookLM"],
"url": "https://developers.google.com/search/docs/crawling-indexing/google-user-triggered-fetchers"
},
{"id": "applebot",
"n": "Applebot",
"r": ["Applebot", "Applebot-Extended"],
"rx": ["Applebot\\/(\\d+\\.\\d+);", "Applebot\\-Extended\\/(\\d+\\.\\d+);"],
"url": "http://www.apple.com/go/applebot"
},
{"id": "baidu",
"n": "Baidu Bots",
"r": ["Baiduspider", "Baiduspider-image", "Baiduspider-video", "Baiduspider-news", "Baiduspider-favo", "Baiduspider-cpro", "Baiduspider-ads", "Baiduspider-render"],
"rx": ["Baiduspider"],
"url": "http://www.baidu.com/search/spider.html"
},
{"id": "reddit",
"n": "RedditBot",
"r": ["RedditBot"],
"rx": [" redditbot\\/(\\d+\\.\\d+);"],
"url": "http://www.reddit.com/feedback"
},
{"id": "slurp",
"n": "Slurp (Yahoo!)",
"r": ["Slurp"],
"rx": ["Slurp[\\s;\\)]"],
"url": "http://help.yahoo.com/help/us/ysearch/slurp"
},
{"id": "yahoolp",
"n": "Yahoo! Link Preview (User)",
"r": ["Yahoo Link Preview"],
"rx": ["Yahoo Link Preview;"],
"url": "https://help.yahoo.com/kb/SLN23615.html"
},
{"id": "ddg",
"n": "DuckDuck Bot",
"r": ["DuckDuckBot","DuckAssistBot","DuckDuckGo-Favicons-Bot"],
"rx": ["DuckDuckBot\\/(\\d+\\.\\d+);", "DuckAssistBot\\/(\\d+\\.\\d+);", "DuckDuckGo-Favicons-Bot\\/(\\d+\\.\\d+);"],
"url": "https://duckduckgo.com/duckduckbot.html"
},
{"id": "marginalia",
"n": "Marginalia.nu",
"r": [],
"rx": ["^search\\.marginalia\\.nu"],
"url": "https://marginalia-search.com/"
},
{"id": "openai",
"n": "OpenAI/ChatGPT",
"r": ["OAI-SearchBot", "GPTBot"],
"rx": ["OAI-SearchBot\\/(\\d+\\.\\d+);", "GPTBot\\/(\\d+\\.\\d+);"],
"url": "https://platform.openai.com/docs/bots/"
},
{"id": "chatgpt",
"n": "ChatGPT User",
"r": ["ChatGPT-User"],
"rx": ["ChatGPT\\-User\\/(\\d\\.\\d);"],
"url": "https://platform.openai.com/docs/bots/"
},
{"id": "claude",
"n": "Anthropic Claude",
"r": ["ClaudeBot", "Claude-SearchBot"],
"rx": ["ClaudeBot\\/(\\d+\\.\\d+);"],
"url": "https://support.claude.com/en/articles/8896518-does-anthropic-crawl-data-from-the-web-and-how-can-site-owners-block-the-crawler"
},
{"id": "claudeusr",
"n": "ClaudeAI User",
"r": ["Claude-User"],
"rx": ["Claude-User\\/(\\d+\\.\\d+);"],
"url": "https://support.claude.com/en/articles/8896518-does-anthropic-crawl-data-from-the-web-and-how-can-site-owners-block-the-crawler"
},
{"id": "perplexity",
"n": "Perplexity Bot",
"r": ["PerplexityBot"],
"rx": ["PerplexityBot\\/(\\d+\\.\\d+);"],
"url": "https://perplexity.ai/perplexitybot"
},
{"id": "perplexityusr",
"n": "Perplexity User",
"r": ["PerplexityUser"],
"rx": ["Perplexity\\-User\\/(\\d+\\.\\d+);"],
"url": "https://perplexity.ai/perplexitybot"
},
{"id": "metauser",
"n": "Meta/Facebook User",
"r": ["facebookexternalhit","facebookcatalog"],
"rx": ["facebookexternalhit\\/(\\d+\\.?\\d*)", "facebookcatalog\\/(\\d\\.?\\d*)"],
"url": "https://developers.facebook.com/docs/sharing/webmasters/crawler"
},
{"id": "metabots",
"n": "Meta/Facebook",
"r": ["meta-webindexer","meta-externalads","meta-externalagent", "meta-webindexer"],
"rx": ["facebook[cw]\\w+\\/(\\d+\\.?\\d*)", "meta\\-[cw]\\w+\\/(\\d+\\.?\\d*)", "meta-externalads\\/(\\d+\\.?\\d*)", "meta-externalagent\\/(\\d+\\.?\\d*)"],
"url": "https://developers.facebook.com/docs/sharing/webmasters/crawler"
},
{"id": "qwant",
"n": "Qwant Web crawler",
"r": ["Qwantbot", "Qwantbot-news"],
"rx": ["Qwantbot\\/(\\d+\\.\\d+)[_;\\s]"],
"url": "https://help.qwant.com/bot/"
},
{"id": "yandex",
"n": "Yandex",
"r": ["YandexBot", "YandexAdNet", "YandexBlogs", "YandexImages", "YandexImageResizer", "YandexMarket", "YandexMedia", "YandexOntoDB", "YandexSitelinks","YandexSpravBot", "YandexVertis", "YandexVerticals", "YandexVideo", "YandexWebmaster", "YandexComBot"],
"rx": ["Yandex\\w+\\/(\\d+\\.\\d+);"],
"url": "http://yandex.com/bots"
},
{"id": "seznambot",
"n": "SeznamBot",
"r": ["SeznamBot"],
"rx": ["SeznamBot\\/(\\d+\\.\\d+);"],
"url": "https://o-seznam.cz/napoveda/vyhledavani/en/seznambot-crawler/"
},
{"id": "ahrefs",
"n": "Ahrefs",
"r": ["AhrefsBot", "AhrefsSiteAudit"],
"rx": ["AhrefsBot\\/(\\d+\\.\\d+);", "AhrefsSiteAudit\\/(\\d+\\.\\d+);"],
"url": "https://ahrefs.com/robot/"
},
{"id": "ccbot",
"n": "CommonCrawl Bot",
"r": ["CCBot"],
"rx": ["CCBot\\/(\\d+\\.\\d+)[\\s\\.;]*"],
"url": "https://commoncrawl.org/bot.html"
},
{"id": "mjbot",
"n": "Majestic Crawler (UK)",
"r": ["MJ12bot"],
"rx": ["MJ12bot\\/v?(\\d+\\.\\d+)[\\s\\.;]"],
"url": "http://www.majestic12.co.uk/bot.php"
},
{"id": "petal",
"n": "PetalSearch Bot (CN)",
"r": ["PetalBot", "AspiegelBot"],
"rx": ["[\\s;]PetalBot[\\s\\/;]", "AspiegelBot[\\)$]"],
"url": "https://webmaster.petalsearch.com/site/petalbot"
},
{"id": "barkrowler",
"n": "Barkrowler (Babbar)",
"r": ["Barkrowler"],
"rx": ["[\\s;^]Barkrowler\\/(\\d+\\.\\d+)?"],
"url": "https://babbar.tech/crawler"
},
{"id": "semrush",
"n": "SemrushBot",
"r": ["SemrushBot"],
"rx": ["SemrushBot-?[A-Z]*\\/(\\d+\\.?\\d*)[;~\\s]"],
"url": "http://www.semrush.com/bot.html"
},
{"id": "bytespider",
"n": "Bytespider (ByteDance, TikTok)",
"r": ["Bytespider"],
"rx": ["Bytespider[;$]"],
"url": "https://darkvisitors.com/agents/bytespider"
},
{"id": "dfseo",
"n": "DataForSEO Bot",
"r": ["DataForSeoBot"],
"rx": ["DataForSeoBot[;\\/](\\d\\.\\d)?"],
"url": "https://dataforseo.com/dataforseo-bot"
},
{"id": "hunter",
"n": "Hunter (Velen) Crawler",
"r": ["VelenPublicWebCrawler"],
"rx": ["VelenPublicWebCrawler\\/(\\d\\.\\d)"],
"url": "https://velen.io"
},
{"id": "serpstat",
"n": "Serpstat Bot",
"r": ["serpstatbot"],
"rx": ["serpstatbot\\/(\\d\\.\\d)\\s"],
"url": "https://serpstatbot.com/"
},
{"id": "netestate",
"n": "netEstate NE Crawler",
"r": ["netEstate NE Crawler"],
"rx": ["netEstate NE Crawler\\s"],
"url": "http://www.website-datenbank.de/"
},
{"id": "imagesift",
"n": "Hive Imagesift Bot (AI Scraper)",
"r": ["ImagesiftBot"],
"rx": ["ImagesiftBot[;\\s\\/]"],
"url": "https://darkvisitors.com/agents/imagesiftbot"
},
{"id": "moz",
"n": "Moz.com Bots",
"r": ["dotbot", "rogerbot"],
"rx": ["DotBot\\/(\\d\\.\\d)[\\.;\\s]", "RogerBot\\/(\\d\\.\\d)[\\.;\\s]"],
"url": "https://moz.com/help/moz-procedures/crawlers/dotbot"
},
{"id": "maui",
"n": "MauiBot (unknown developer)",
"r": ["MauiBot"],
"rx": ["MauiBot\\s"],
"url": "https://darkvisitors.com/agents/mauibot"
},
{"id": "plagaware",
"n": "PlagAwareBot (DE)",
"r": ["PlagAwareBot"],
"rx": ["PlagAwareBot\\/(\\d+\\.\\d+)"],
"url": "https://www.plagaware.com/bot"
},
{"id": "discord",
"n": "Discord User",
"r": ["Discordbot"],
"rx": ["Discordbot\\/(\\d\\.\\d);"],
"url": "https://datadome.co/bots/discordbot/"
},
{"id": "twitter",
"n": "Twitter User",
"r": ["Twitterbot"],
"rx": ["^Twitterbot\\/(\\d\\.?\\d*)"],
"url": "https://datadome.co/bots/twitterbot/"
},
{"id": "whatsapp",
"n": "WhatsApp User",
"r": ["WhatsApp"],
"rx": ["WhatsApp/(\\d+\\.?\\d*)"],
"url": "https://darkvisitors.com/agents/whatsapp/"
},
{"id": "tiktok",
"n": "TikTok User",
"r": ["TikTokSpider"],
"rx": ["TikTokSpider;"],
"url": "https://darkvisitors.com/agents/tiktokspider"
},
{"id": "blexbot",
"n": "BLEXBot",
"r": ["BLEXBot"],
"rx": ["BLEXBot\\/(\\d\\.\\d);"],
"url": "https://help.seranking.com/en/blex-crawler"
},
{"id": "sogou",
"n": "Sogou web spider",
"r": ["Sogou web spider"],
"rx": ["Sogou web spider\\/(\\d+\\.\\d+)[;\\s\\(\\.]"],
"url": "http://www.sogou.com/docs/help/webmasters.htm#07"
},
{"id": "amazon",
"n": "Amazonbot",
"r": ["Amazonbot"],
"rx": ["Amazonbot\\/(\\d+\\.\\d+)[;\\s\\(\\.]"],
"url": "https://developer.amazon.com/amazonbot"
},
{"id": "sop",
"n": "SitesOverPagesBot",
"r": ["SitesOverPagesBot"],
"rx": ["SitesOverPagesBot\\/(\\d+\\.\\d+)[;\\s\\(\\.]"],
"url": "https://www.sitesoverpages.com/bot"
},
{"id": "startme",
"n": "StartmeBot",
"r": ["StartmeBot"],
"rx": ["StartmeBot\\/(\\d+\\.\\d+)[;\\s\\(\\.]"],
"url": "https://support.start.me/en/articles/9182933-what-is-startmebot"
},
{"id": "bitsight",
"n": "BitSightBot",
"r": ["BitSightBot"],
"rx": ["BitSightBot\\/(\\d+\\.\\d+)"],
"url": "https://datadome.co/bots/bitsightbot/"
},
{"id": "terracotta",
"n": "TerraCotta Bot",
"r": ["TerraCotta"],
"rx": ["TerraCotta"],
"url": "https://github.com/CeramicTeam/CeramicTerracotta"
},
{"id": "halobot",
"n": "HaloBot",
"r": ["HaloBot"],
"rx": [" HaloBot\\/(\\d+)\\."],
"url": "https://www.haloscan.com/"
},
{"id": "wayback",
"n": "Internet Archive",
"r": ["archive.org_bot"],
"rx": [" archive.org_bot "],
"url": "https://archive.org/details/archive.org_bot"
},
{"id": "alibaba",
"n": "Alibaba AliyunSec Bot",
"r": ["AliyunSecBot"],
"rx": ["AliyunSecBot/Aliyun"],
"url": "https://www.aqtronix.com/useragents/?Action=ShowAgentDetails&Name=AliyunSecBot"
},
{"id": "ibou",
"n": "Ibou Bot",
"r": ["IbouBot"],
"rx": ["IbouBot\\/(\\d+\\.\\d+)"],
"url": "https://ibou.io/iboubot.html"
},
{"id": "bnl",
"n": "BnL Harvester",
"r": [],
"rx": ["NLUX_IAHarvester\\/(\\d+)"],
"url": "http://crawl.bnl.lu/"
},
{"id": "turnitin",
"n": "TurnitinBot",
"r": [],
"rx": ["Turnitin\\s"],
"url": "https://www.turnitin.com/robot/crawlerinfo.html"
}
]