Bots touchups

This commit is contained in:
Sascha Leib
2025-10-17 18:04:34 +02:00
parent 20997f50c5
commit f207f6f2a8
7 changed files with 28 additions and 12 deletions

View File

@@ -41,6 +41,10 @@
"id": "susClient", "desc": "Client identifier that is popular with bot networks",
"bot": 10
},
{"func": "matchesClient", "params": ["undici","gohttp"],
"id": "botClient", "desc": "Client identifier indicates web crawler",
"bot": 100
},
{"func": "combinationTest", "params": [["macos","chromeold"],["macos","msie"],["winold","edge"],["winold","brave"]],
"id": "suspPC", "desc": "Suspicious combination of platform and client",
"bot": 30

View File

@@ -78,8 +78,8 @@
},
{"id": "openai",
"n": "OpenAI/ChatGPT", "geo": "US",
"r": ["OAI-SearchBot", "ChatGPT-User", "GPTBot"],
"rx": ["OAI-SearchBot\\/(\\d+\\.\\d+);", "ChatGPT-User\\/(\\d+\\.\\d+);", "GPTBot\\/(\\d+\\.\\d+);"],
"r": ["OAI-SearchBot", "GPTBot"],
"rx": ["OAI-SearchBot\\/(\\d+\\.\\d+);", "GPTBot\\/(\\d+\\.\\d+);"],
"url": "https://platform.openai.com/docs/bots/"
},
{"id": "claude",
@@ -96,8 +96,8 @@
},
{"id": "metabots",
"n": "Meta/Facebook", "geo": "US",
"r": ["facebookexternalhit", "facebookcatalog","meta-webindexer","meta-externalads","meta-externalagent","meta-externalfetcher"],
"rx": ["facebook\\w+\\/(\\d+\\.\\d+)", "meta-\\w+\\/(\\d+\\.\\d+)"],
"r": ["facebookcatalog","meta-webindexer","meta-externalads","meta-externalagent"],
"rx": ["facebook[cw]\\w+\\/(\\d+\\.\\d+)", "meta-externala\\w+\\/(\\d+\\.\\d+)"],
"url": "https://developers.facebook.com/docs/sharing/webmasters/crawler"
},
{"id": "qwant",
@@ -292,12 +292,6 @@
"rx": ["IbouBot\\/(\\d+\\.\\d+)"],
"url": "https://ibou.io/iboubot.html"
},
{"id": "gohttp",
"n": "Go HTTP Clients",
"r": [],
"rx": ["Go\\-http\\-client\\/(\\d+)", "quic\\-go\\-HTTP\\/(\\d+)"],
"url": "https://github.com/golang/goen.wi"
},
{"id": "bnl",
"n": "BnL Harvester",
"r": [],

View File

@@ -98,5 +98,21 @@
{"n": "Google NotebookLM",
"id": "goognblm",
"rx": ["Google-NotebookLM"]
},
{"n": "ChatGPT",
"id": "chatgpt",
"rx": ["ChatGPT-User\\/(\\d\\.\\d)"]
},
{"n": "Meta Apps",
"id": "meta",
"rx": ["facebookexternalhit\\/(\\d\\.\\d)", "meta-externalfetcher\\/(\\d\\.\\d)"]
},
{"n": "Undici-based crawler",
"id": "undici",
"rx": ["undici"]
},
{"n": "Go HTTP crawler",
"id": "gohttp",
"rx": ["Go\\-http\\-client\\/(\\d+)", "quic\\-go\\-HTTP\\/(\\d+)"]
}
]

View File

@@ -57,6 +57,6 @@
},
{"n": "App",
"id": "app",
"rx": [ "WhatsApp/(\\d+\\.\\d+)", "Facebot Twitterbot\\/(\\d\\.\\d)", "Google-NotebookLM"]
"rx": [ "WhatsApp/(\\d+\\.\\d+)", "Facebot Twitterbot\\/(\\d\\.\\d)", "Google-NotebookLM", "ChatGPT-User", "meta-externalfetcher"]
}
]