Files
dokuwiki-plugin-botmon/config/known-bots.json
2025-10-03 21:30:29 +02:00

313 lines
9.6 KiB
JSON
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
[ {"id": "bingbot",
"n": "BingBot", "geo": "US",
"r": ["bingbot"],
"rx": ["\\sbingbot\\/(\\d+\\.\\d+);"],
"url": "http://www.bing.com/bingbot.htm"
},
{"id": "googlebot",
"n": "GoogleBot", "geo": "US",
"r": ["Googlebot"],
"rx": ["Googlebot\\/(\\d+\\.\\d+)", "Googlebot-Image\\/(\\d+\\.\\d+)"],
"url": "http://www.google.com/bot.html"
},
{"id": "googleads",
"n": "Google Ads", "geo": "US",
"r": ["AdsBot-Google", "AdsBot-Google-Mobile", "Mediapartners-Google"],
"rx": ["AdsBot-Google;","AdsBot-Google-Mobile;", "Mediapartners-Google\\/(\\d+\\.\\d+);"],
"url": "https://developers.google.com/search/docs/crawling-indexing/google-special-case-crawlers"
},
{"id": "googleapi",
"n": "Google API Crawler", "geo": "US",
"r": ["APIs-Google"],
"rx": ["APIs-Google"],
"url": "https://developers.google.com/search/docs/crawling-indexing/google-special-case-crawlers"
},
{"id": "googleother",
"n": "GoogleOther", "geo": "US",
"r": ["GoogleOther"],
"rx": ["\\sGoogleOther(\\-\\w+)?[\\)\\/]"],
"url": "https://developers.google.com/search/docs/crawling-indexing/google-common-crawlers#googleother"
},
{"id": "googinspct",
"n": "Google-InspectionTool", "geo": "US",
"r": ["Google-InspectionTool"],
"rx": ["\\sGoogle-InspectionTool(\\-\\w+)?[\\)\\/]"],
"url": "https://support.google.com/webmasters/answer/9012289"
},
{"id": "applebot",
"n": "Applebot", "geo": "US",
"r": ["Applebot"],
"rx": ["Applebot\\/(\\d+\\.\\d+);"],
"url": "http://www.apple.com/go/applebot"
},
{"id": "reddit",
"n": "RedditBot",
"r": ["Applebot"],
"rx": [" redditbot\\/(\\d+\\.\\d+);"],
"url": "http://www.reddit.com/feedback"
},
{"id": "slurp",
"n": "Slurp (Yahoo!)", "geo": "US",
"r": ["Slurp"],
"rx": ["Slurp[\\s;\\)]"],
"url": "http://help.yahoo.com/help/us/ysearch/slurp"
},
{"id": "yahoo",
"n": "Yahoo! Link Preview",
"r": [],
"rx": ["Yahoo Link Preview;"],
"url": "https://help.yahoo.com/kb/mail/yahoo-link-preview-SLN23615.html"
},
{"id": "ddg",
"n": "DuckDuck Bot",
"r": ["DuckDuckBot","DuckAssistBot","DuckDuckGo-Favicons-Bot"],
"rx": ["DuckDuckBot\\/(\\d+\\.\\d+);", "DuckAssistBot\\/(\\d+\\.\\d+);", "DuckDuckGo-Favicons-Bot\\/(\\d+\\.\\d+);"],
"url": "https://duckduckgo.com/duckduckbot.html"
},
{"id": "marginalia",
"n": "Marginalia.nu",
"r": [],
"rx": ["^search\\.marginalia\\.nu"],
"url": "https://marginalia-search.com/"
},
{"id": "openai",
"n": "OpenAI/ChatGPT", "geo": "US",
"r": ["OAI-SearchBot", "ChatGPT-User", "GPTBot"],
"rx": ["OAI-SearchBot\\/(\\d+\\.\\d+);", "ChatGPT-User\\/(\\d+\\.\\d+);", "GPTBot\\/(\\d+\\.\\d+);"],
"url": "https://platform.openai.com/docs/bots/"
},
{"id": "claude",
"n": "Anthropic Claude", "geo": "US",
"r": ["ClaudeBot", "Claude-User", "Claude-SearchBot"],
"rx": ["ClaudeBot\\/(\\d+\\.\\d+);"],
"url": "https://darkvisitors.com/agents/claudebot"
},
{"id": "perplexity",
"n": "Perplexity", "geo": "US",
"r": ["PerplexityBot", "PerplexityUser"],
"rx": ["PerplexityBot\\/(\\d+\\.\\d+);", "PerplexityUser\\/(\\d+\\.\\d+);"],
"url": "https://perplexity.ai/perplexitybot"
},
{"id": "applemsgs",
"n": "Apple Messages",
"r": [],
"rx": ["Facebot Twitterbot\\/(\\d\\.\\d)"],
"url": "https://developer.apple.com/documentation/technotes/tn3156-create-rich-previews-for-messages"
},
{"id": "metabots",
"n": "Meta/Facebook", "geo": "US",
"r": ["facebookexternalhit", "facebookcatalog","meta-webindexer","meta-externalads","meta-externalagent","meta-externalfetcher"],
"rx": ["facebook\\w+\\/(\\d+\\.\\d+)", "meta-\\w+\\/(\\d+\\.\\d+)"],
"url": "https://developers.facebook.com/docs/sharing/webmasters/crawler"
},
{"id": "qwant",
"n": "Qwant Web crawler",
"r": ["Qwantbot", "Qwantbot-news"],
"rx": ["Qwantbot\\/(\\d+\\.\\d+)[_;\\s]"],
"url": "https://help.qwant.com/bot/"
},
{"id": "yandex",
"n": "Yandex", "geo": "CN",
"r": ["YandexBot", "YandexAdNet", "YandexBlogs", "YandexImages", "YandexImageResizer", "YandexMarket", "YandexMedia", "YandexOntoDB", "YandexSitelinks","YandexSpravBot", "YandexVertis", "YandexVerticals", "YandexVideo", "YandexWebmaster", "YandexComBot"],
"rx": ["Yandex\\w+\\/(\\d+\\.\\d+);"],
"url": "http://yandex.com/bots"
},
{"id": "seznambot",
"n": "SeznamBot", "geo": "CZ",
"r": ["SeznamBot"],
"rx": ["SeznamBot\\/(\\d+\\.\\d+);"],
"url": "https://o-seznam.cz/napoveda/vyhledavani/en/seznambot-crawler/"
},
{"id": "ahrefs",
"n": "Ahrefs",
"r": ["AhrefsBot", "AhrefsSiteAudit"],
"rx": ["AhrefsBot\\/(\\d+\\.\\d+);", "AhrefsSiteAudit\\/(\\d+\\.\\d+);"],
"url": "https://ahrefs.com/robot/"
},
{"id": "ccbot",
"n": "CommonCrawl Bot", "geo": "US",
"r": ["CCBot"],
"rx": ["CCBot\\/(\\d+\\.\\d+)[\\s\\.;]*"],
"url": "https://commoncrawl.org/bot.html"
},
{"id": "mjbot",
"n": "Majestic Crawler (UK)", "geo": "GB",
"r": ["MJ12bot"],
"rx": ["MJ12bot\\/v?(\\d+\\.\\d+)[\\s\\.;]"],
"url": "http://www.majestic12.co.uk/bot.php"
},
{"id": "petal",
"n": "PetalSearch Bot (CN)", "geo": "CN",
"r": ["PetalBot", "AspiegelBot"],
"rx": ["[\\s;]PetalBot[\\s\\/;]", "AspiegelBot[\\)$]"],
"url": "https://webmaster.petalsearch.com/site/petalbot"
},
{"id": "barkrowler",
"n": "Barkrowler (Babbar)",
"r": ["Barkrowler"],
"rx": ["[\\s;^]Barkrowler\\/(\\d+\\.\\d+)?"],
"url": "https://babbar.tech/crawler"
},
{"id": "semrush",
"n": "SemrushBot",
"r": ["SemrushBot"],
"rx": ["SemrushBot-?[A-Z]*\\/(\\d+\\.?\\d*)[;~\\s]"],
"url": "http://www.semrush.com/bot.html"
},
{"id": "bytespider",
"n": "Bytespider (ByteDance, TikTok)", "geo": "CN",
"r": ["Bytespider"],
"rx": ["Bytespider[;$]"],
"url": "https://darkvisitors.com/agents/bytespider"
},
{"id": "dfseo",
"n": "DataForSEO Bot",
"r": ["DataForSeoBot"],
"rx": ["DataForSeoBot[;\\/](\\d\\.\\d)?"],
"url": "https://dataforseo.com/dataforseo-bot"
},
{"id": "hunter",
"n": "Hunter (Velen) Crawler",
"r": ["VelenPublicWebCrawler"],
"rx": ["VelenPublicWebCrawler\\/(\\d\\.\\d)"],
"url": "https://velen.io"
},
{"id": "serpstat",
"n": "Serpstat Bot",
"r": ["serpstatbot"],
"rx": ["serpstatbot\\/(\\d\\.\\d)\\s"],
"url": "https://serpstatbot.com/"
},
{"id": "netestate",
"n": "netEstate NE Crawler", "geo": "DE",
"r": ["netEstate NE Crawler"],
"rx": ["netEstate NE Crawler\\s"],
"url": "http://www.website-datenbank.de/"
},
{"id": "imagesift",
"n": "Hive Imagesift Bot (AI Scraper)",
"r": ["ImagesiftBot"],
"rx": ["ImagesiftBot[;\\s\\/]"],
"url": "https://darkvisitors.com/agents/imagesiftbot"
},
{"id": "moz",
"n": "Moz.com Bots",
"r": ["dotbot", "rogerbot"],
"rx": ["DotBot\\/(\\d\\.\\d)[\\.;\\s]", "RogerBot\\/(\\d\\.\\d)[\\.;\\s]"],
"url": "https://moz.com/help/moz-procedures/crawlers/dotbot"
},
{"id": "maui",
"n": "MauiBot (unknown developer)",
"r": ["MauiBot"],
"rx": ["MauiBot\\s"],
"url": "https://darkvisitors.com/agents/mauibot"
},
{"id": "plagaware",
"n": "PlagAwareBot (DE)", "geo": "DE",
"r": ["PlagAwareBot"],
"rx": ["PlagAwareBot\\/(\\d+\\.\\d+)"],
"url": "https://www.plagaware.com/bot"
},
{"id": "discord",
"n": "Discord",
"r": ["Discordbot"],
"rx": ["Discordbot\\/(\\d\\.\\d);"],
"url": "https://datadome.co/bots/discordbot/"
},
{"id": "twitter",
"n": "Twitterbot",
"r": ["Twitterbot"],
"rx": ["Twitterbot\\/(\\d+\\.\\d+)"],
"url": "https://datadome.co/bots/twitterbot/"
},
{"id": "blexbot",
"n": "BLEXBot",
"r": ["BLEXBot"],
"rx": ["BLEXBot\\/(\\d\\.\\d);"],
"url": "https://help.seranking.com/en/blex-crawler"
},
{"id": "sogou",
"n": "Sogou web spider",
"r": ["Sogou web spider"],
"rx": ["Sogou web spider\\/(\\d+\\.\\d+)[;\\s\\(\\.]"],
"url": "http://www.sogou.com/docs/help/webmasters.htm#07"
},
{"id": "amazon",
"n": "Amazonbot", "geo": "US",
"r": ["Amazonbot"],
"rx": ["Amazonbot\\/(\\d+\\.\\d+)[;\\s\\(\\.]"],
"url": "https://developer.amazon.com/amazonbot"
},
{"id": "sop",
"n": "SitesOverPagesBot",
"r": ["SitesOverPagesBot"],
"rx": ["SitesOverPagesBot\\/(\\d+\\.\\d+)[;\\s\\(\\.]"],
"url": "https://www.sitesoverpages.com/bot"
},
{"id": "startme",
"n": "StartmeBot",
"r": ["StartmeBot"],
"rx": ["StartmeBot\\/(\\d+\\.\\d+)[;\\s\\(\\.]"],
"url": "https://support.start.me/en/articles/9182933-what-is-startmebot"
},
{"id": "bitsight",
"n": "BitSightBot",
"r": ["BitSightBot"],
"rx": ["BitSightBot\\/(\\d+\\.\\d+)"],
"url": "https://datadome.co/bots/bitsightbot/"
},
{"id": "terracotta",
"n": "TerraCotta Bot",
"r": ["TerraCotta"],
"rx": ["TerraCotta"],
"url": "https://github.com/CeramicTeam/CeramicTerracotta"
},
{"id": "tiktok",
"n": "TikTok Spider", "geo": "CN",
"r": ["TikTokSpider"],
"rx": ["TikTokSpider"],
"url": "https://darkvisitors.com/agents/tiktokspider"
},
{"id": "halobot",
"n": "HaloBot", "geo": "FR",
"r": ["HaloBot"],
"rx": [" HaloBot\\/(\\d+)\\."],
"url": "https://www.haloscan.com/"
},
{"id": "wayback",
"n": "Internet Archive",
"r": ["archive.org_bot"],
"rx": [" archive.org_bot "],
"url": "https://archive.org/details/archive.org_bot"
},
{"id": "alibaba",
"n": "Alibaba AliyunSec Bot",
"r": ["AliyunSecBot"],
"rx": ["AliyunSecBot/Aliyun"],
"url": "https://www.aqtronix.com/useragents/?Action=ShowAgentDetails&Name=AliyunSecBot"
},
{"id": "ibou",
"n": "Ibou Bot",
"r": ["IbouBot"],
"rx": ["IbouBot\\/(\\d+\\.\\d+)"],
"url": "https://ibou.io/iboubot.html"
},
{"id": "gohttp",
"n": "Go HTTP Clients",
"r": [],
"rx": ["Go\\-http\\-client\\/(\\d+)", "quic\\-go\\-HTTP\\/(\\d+)"],
"url": "https://github.com/golang/goen.wi"
},
{"id": "bnl",
"n": "BnL Harvester",
"r": [],
"rx": ["NLUX_IAHarvester\\/(\\d+)"],
"url": "http://crawl.bnl.lu/"
},
{"id": "turnitin",
"n": "TurnitinBot",
"r": [],
"rx": ["Turnitin\\s"],
"url": "https://www.turnitin.com/robot/crawlerinfo.html"
}
]