More known bots

This commit is contained in:
Sascha Leib
2025-09-05 23:03:22 +02:00
parent 395cd26692
commit 13592cace5
7 changed files with 188 additions and 40 deletions

View File

@@ -1,24 +1,23 @@
[
{"id": "bingbot",
"n": "Bingbot",
[ {"id": "bingbot",
"n": "Bing Bot",
"r": ["bingbot"],
"rx": ["\\sbingbot\\/(\\d+\\.\\d+);"],
"url": "http://www.bing.com/bingbot.htm"
},
{"id": "googlebot",
"n": "GoogleBot",
"n": "Google Bot",
"r": ["Googlebot"],
"rx": ["\\sGooglebot\\/(\\d+\\.\\d+);"],
"url": "http://www.google.com/bot.html"
},
{"id": "googleads",
"n": "Google AdsBot",
"n": "Google Ads Bot",
"r": ["AdsBot-Google", "AdsBot-Google-Mobile", "Mediapartners-Google"],
"rx": ["AdsBot-Google;","AdsBot-Google-Mobile;", "Mediapartners-Google\\/(\\d+\\.\\d+);"],
"url": "http://www.google.com/mobile/adsbot.html"
},
{"id": "googleapi",
"n": "Google APIs",
"n": "Google API Crawler",
"r": ["APIs-Google"],
"rx": ["APIs-Google"],
"url": "https://developers.google.com/search/docs/crawling-indexing/google-special-case-crawlers"
@@ -29,28 +28,148 @@
"rx": ["Applebot\\/(\\d+\\.\\d+);"],
"url": "http://www.apple.com/go/applebot"
},
{"id": "slurp",
"n": "Slurp (Yahoo!)",
"r": ["Slurp"],
"rx": ["Slurp[\\s;\\)]"],
"url": "http://help.yahoo.com/help/us/ysearch/slurp"
},
{"id": "ddg",
"n": "DuckDuckGo Bots",
"r": ["DuckDuckBot","DuckAssistBot","DuckDuckGo-Favicons-Bot"],
"rx": ["DuckDuckBot\\/(\\d+\\.\\d+);", "DuckAssistBot\\/(\\d+\\.\\d+);", "DuckDuckGo-Favicons-Bot\\/(\\d+\\.\\d+);"],
"url": "https://duckduckgo.com/duckduckbot.html"
},
{"id": "openai",
"n": "OpenAI/ChatGPT",
"n": "OpenAI/ChatGPT Bots",
"r": ["OAI-SearchBot", "ChatGPT-User", "GPTBot"],
"rx": ["OAI-SearchBot\\/(\\d+\\.\\d+);", "ChatGPT-User\\/(\\d+\\.\\d+);", "GPTBot\\/(\\d+\\.\\d+);"],
"url": "https://platform.openai.com/docs/bots/"
},
{"id": "claude",
"n": "Anthropic Claude Bots",
"r": ["ClaudeBot", "Claude-User", "Claude-SearchBot"],
"rx": ["ClaudeBot\\/(\\d+\\.\\d+);"],
"url": "https://darkvisitors.com/agents/claudebot"
},
{"id": "perplexity",
"n": "Perplexity Crawlers",
"r": ["PerplexityBot", "PerplexityUser"],
"rx": ["PerplexityBot\\/(\\d+\\.\\d+);", "PerplexityUser\\/(\\d+\\.\\d+);"],
"url": "https://perplexity.ai/perplexitybot"
},
{"id": "metabots",
"n": "Meta/Facebook",
"n": "Meta/Facebook Bots",
"r": ["facebookexternalhit", "facebookcatalog","meta-webindexer","meta-externalads","meta-externalagent","meta-externalfetcher"],
"rx": ["facebook\\w+\\/(\\d+\\.\\d+)", "meta-\\w+\\/(\\d+\\.\\d+)"],
"url": "https://developers.facebook.com/docs/sharing/webmasters/crawler"
},
{"id": "yandexbots",
"n": "Yandex (Russia)",
{"id": "qwant",
"n": "Qwant Web crawler",
"r": ["Qwantbot", "Qwantbot-news"],
"rx": ["Qwantbot\\/(\\d+\\.\\d+)[_;\\s]"],
"url": "https://help.qwant.com/bot/"
},
{"id": "yandex",
"n": "Yandex Bots",
"r": ["YandexBot", "YandexAdNet", "YandexBlogs", "YandexImages", "YandexImageResizer", "YandexMarket", "YandexMedia", "YandexOntoDB", "YandexSitelinks","YandexSpravBot", "YandexVertis", "YandexVerticals", "YandexVideo", "YandexWebmaster", "YandexComBot"],
"rx": ["Yandex\\w+\\/(\\d+\\.\\d+);"],
"url": "http://yandex.com/bots"
},
{"id": "seznambot",
"n": "SeznamBot (Czech)",
"n": "SeznamBot (CZ)",
"r": ["SeznamBot"],
"rx": ["SeznamBot\\/(\\d+\\.\\d+);"],
"url": "https://o-seznam.cz/napoveda/vyhledavani/en/seznambot-crawler/"
},
{"id": "ahrefs",
"n": "Ahrefs Bots (SEO/marketing)",
"r": ["AhrefsBot", "AhrefsSiteAudit"],
"rx": ["AhrefsBot\\/(\\d+\\.\\d+);", "AhrefsSiteAudit\\/(\\d+\\.\\d+);"],
"url": "https://ahrefs.com/robot/"
},
{"id": "ccbot",
"n": "Common Crawl Bot (AI-Scraper)",
"r": ["CCBot"],
"rx": ["CCBot\\/(\\d+\\.\\d+)[\\s\\.;]*"],
"url": "https://commoncrawl.org/bot.html"
},
{"id": "mjbot",
"n": "Majestic Crawler (UK)",
"r": ["MJ12bot"],
"rx": ["MJ12bot\\/v?(\\d+\\.\\d+)[\\s\\.;]"],
"url": "http://www.majestic12.co.uk/bot.php"
},
{"id": "petal",
"n": "PetalSearch Bot (China)",
"r": ["PetalBot", "AspiegelBot"],
"rx": ["[\\s;]PetalBot[\\s\\/;]", "AspiegelBot[\\)$]"],
"url": "https://webmaster.petalsearch.com/site/petalbot"
},
{"id": "barkrowler",
"n": "Barkrowler (Babbar Bot)",
"r": ["Barkrowler"],
"rx": ["[\\s;^]Barkrowler\\/(\\d+\\.\\d+)?"],
"url": "https://babbar.tech/crawler"
},
{"id": "semrush",
"n": "SemrushBot",
"r": ["SemrushBot"],
"rx": ["SemrushBot-?[A-Z]*\\/(\\d+\\.?\\d*)[;~\\s]"],
"url": "http://www.semrush.com/bot.html"
},
{"id": "bytespider",
"n": "Bytespider (ByteDance, TikTok)",
"r": ["Bytespider"],
"rx": ["Bytespider[;$]"],
"url": "https://darkvisitors.com/agents/bytespider"
},
{"id": "dfseo",
"n": "DataForSEO Bot",
"r": ["DataForSeoBot"],
"rx": ["DataForSeoBot[;\\/](\\d\\.\\d)?"],
"url": "https://dataforseo.com/dataforseo-bot"
},
{"id": "hunter",
"n": "Hunter (Velen) Crawler",
"r": ["VelenPublicWebCrawler"],
"rx": ["VelenPublicWebCrawler\\/(\\d\\.\\d)"],
"url": "https://velen.io"
},
{"id": "serpstat",
"n": "Serpstat Bot",
"r": ["serpstatbot"],
"rx": ["serpstatbot\\/(\\d\\.\\d)\\s"],
"url": "https://serpstatbot.com/"
},
{"id": "netestate",
"n": "netEstate NE Crawler (DE)",
"r": ["netEstate NE Crawler"],
"rx": ["netEstate NE Crawler\\s"],
"url": "http://www.website-datenbank.de/"
},
{"id": "imagesift",
"n": "Hive Imagesift Bot (AI Scraper)",
"r": ["ImagesiftBot"],
"rx": ["ImagesiftBot[;\\s\\/]"],
"url": "https://darkvisitors.com/agents/imagesiftbot"
},
{"id": "dotbot",
"n": "Moz.com DotBot Crawler",
"r": ["ImagesiftBot"],
"rx": ["DotBot\\/(\\d\\.\\d)[\\.;\\s]"],
"url": "https://moz.com/help/moz-procedures/crawlers/dotbot"
},
{"id": "maui",
"n": "MauiBot (unknown developer)",
"r": ["MauiBot"],
"rx": ["MauiBot\\s"],
"url": "https://darkvisitors.com/agents/mauibot"
},
{"id": "plagaware",
"n": "PlagAwareBot (DE)",
"r": ["PlagAwareBot"],
"rx": ["PlagAwareBot\\/(\\d\\.\\d)"],
"url": "https://www.plagaware.com/bot"
}
]

View File

@@ -21,7 +21,7 @@
},
{"n": "Old MacOS",
"id": "macosold",
"rx": [ "\\sMac OS X 10[\\._](\\d|1[0-3])[\\._;\\s\\)]" ]
"rx": [ "\\sMac OS X 10[\\._](\\d|1[0-3])[\\._;\\s\\)]", "\\sMac OS X (1[12])[\\._]" ]
},
{"n": "MacOS",
"id": "macos",

View File

@@ -1,24 +1,24 @@
{
"threshold": 100,
"rules": [
{"func": "obsoleteClient",
{"func": "obsoleteClient", "params": ["aol", "msie", "chromeold"],
"id": "oldClient", "desc": "Obsolete browser version",
"bot": 40
},
{"func": "obsoletePlatform",
{"func": "obsoletePlatform", "params": ["winold", "macosold"],
"id": "oldOS", "desc": "Obsolete platform version",
"bot": 40
},
{"func": "noJavaScript",
"id": "noJS", "desc": "JavaScript disabled",
"bot": 20
"bot": 40
},
{"func": "smallPageCount", "params": [1],
"id": "onePage", "desc": "Views only a single page",
"bot": 20
"id": "onePage", "desc": "Visiter loaded only a single page",
"bot": 40
},
{"func": "noTicks",
"id": "noTicks", "desc": "Visitor did not spend time reading any page",
"id": "noTicks", "desc": "Visitor did not spend time reading a page",
"bot": 10
},
{"func": "noReferences",

BIN
img/plagaware.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.0 KiB

1
img/yahoo.svg Normal file
View File

@@ -0,0 +1 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?><!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd"><svg width="100%" height="100%" viewBox="0 0 24 24" version="1.1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" xml:space="preserve" xmlns:serif="http://www.serif.com/" style="fill-rule:evenodd;clip-rule:evenodd;stroke-linejoin:round;stroke-miterlimit:2;"><path d="M12,0c6.628,0 12,5.373 12,12c-0,6.628 -5.372,12 -12,12c-6.628,-0 -12,-5.372 -12,-12c0,-6.627 5.372,-12 12,-12Z" style="fill:#720e9e;fill-rule:nonzero;"/><path d="M17.494,9.473l0.013,-0.003l0.398,-0.442l-0.02,-0.002l0.042,-0.066l-5.569,0l0.215,0.762l1.52,0l-2.558,2.367c-0.522,-0.762 -1.745,-2.511 -2.599,-3.824l1.586,0l0,-0.54l0.022,-0.155l-0.022,-0.003l0,-0.063l-6.021,-0l-0,0.761l1.883,0c0.731,0.604 3.901,4.453 4.017,4.82c0.046,0.345 0.113,2.376 -0.024,2.529c-0.164,0.237 -1.88,0.109 -2.182,0.132l-0.011,0.746c0.552,0.016 2.208,-0.001 2.767,-0.001c1.101,0 3.029,-0.028 3.3,-0.007l0.034,-0.706l-2.216,-0.033c-0.046,-0.321 -0.097,-2.342 -0.051,-2.562c0.209,-0.64 3.61,-3.288 3.959,-3.387l0.33,-0.074l1.027,-0c-0,-0.001 0.16,-0.249 0.16,-0.249Zm-0.831,5.28l0.963,0.074l1.123,-4.118c-0.187,-0.007 -1.891,-0.159 -2.112,-0.2l0.026,4.244Zm-0.166,0.66l0.005,1.006l0.488,0.041l0.526,0.036l0.155,-0.984l-0.555,-0.025c-0,-0 -0.619,-0.074 -0.619,-0.074Z" style="fill:#fff;fill-rule:nonzero;"/></svg>

After

Width:  |  Height:  |  Size: 1.4 KiB

View File

@@ -226,7 +226,7 @@ BotMon.live = {
if ( v.id == visitor.id) { /* match the pre-defined IDs */
return v;
} else if (v.ip == visitor.ip && v.agent == visitor.agent) {
console.info("Visitor ID not found, using matchin IP + User-Agent instead.");
console.warn("Visitor ID not found, using matchin IP + User-Agent instead.");
return v;
}
@@ -683,7 +683,6 @@ BotMon.live = {
const json = await response.json();
if (json.rules) {
console.log(json.rules);
this._rulesList = json.rules;
}
@@ -759,19 +758,17 @@ BotMon.live = {
func: {
// check if client is one of the obsolete ones:
obsoleteClient: function(visitor) {
obsoleteClient: function(visitor, ...clients) {
const obsClients = ['aol', 'msie', 'chromeold'];
const clientId = ( visitor._client ? visitor._client.id : '');
return obsClients.includes(clientId);
return clients.includes(clientId);
},
// check if OS/Platform is one of the obsolete ones:
obsoletePlatform: function(visitor) {
obsoletePlatform: function(visitor, ...platforms) {
const obsPlatforms = ['winold', 'macosold'];
const platformId = ( visitor._platform ? visitor._platform.id : '');
return obsPlatforms.includes(platformId);
const pId = ( visitor._platform ? visitor._platform.id : '');
return platforms.includes(pId);
},
// client does not use JavaScript:
@@ -799,7 +796,7 @@ BotMon.live = {
},
loadLogFile: async function(type, onLoaded = undefined) {
console.info('BotMon.live.data.loadLogFile(',type,')');
//console.info('BotMon.live.data.loadLogFile(',type,')');
let typeName = '';
let columns = [];
@@ -911,7 +908,7 @@ BotMon.live = {
<dd><span>Bounce rate (est.):</span><span>${bounceRate}%</span></dd>
</dl>
<dl>
<dt>Bots vs. Humans</dt>
<dt>Bots vs. Humans (page views)</dt>
<dd><span>Registered users:</span><strong>${data.bots.users}</strong></dd>
<dd><span>Probably humans:</span><strong>${data.bots.human}</strong></dd>
<dd><span>Suspected bots:</span><strong>${data.bots.suspected}</strong></dd>
@@ -924,7 +921,7 @@ BotMon.live = {
// update known bots list:
const block = document.getElementById('botmon__botslist');
block.innerHTML = "<dt>Top known bots</dt>";
block.innerHTML = "<dt>Top known bots (page views)</dt>";
let bots = BotMon.live.data.analytics.groups.knownBots.toSorted( (a, b) => {
return b._pageViews.length - a._pageViews.length;
@@ -989,6 +986,9 @@ BotMon.live = {
lists: {
init: function() {
// function shortcut:
const makeElement = BotMon.t._makeElement;
const parent = document.getElementById('botmon__today__visitorlists');
if (parent) {
@@ -1017,15 +1017,16 @@ BotMon.live = {
default:
console.warn('Unknwon list number.');
}
let group = BotMon.live.data.analytics.groups[listId];
let gCount = ''
const details = BotMon.t._makeElement('details', {
const details = makeElement('details', {
'data-group': listId,
'data-loaded': false
});
details.appendChild(BotMon.t._makeElement('summary',
undefined,
listTitle
));
const title = details.appendChild(makeElement('summary'));
title.appendChild(makeElement('span', {'class':'title'}, listTitle))
title.appendChild(makeElement('span', {'class':'counter'}, gCount))
details.addEventListener("toggle", this._onDetailsToggle);
parent.appendChild(details);
@@ -1192,6 +1193,8 @@ BotMon.live = {
visitTimeStr = Math.floor(visitDuration / 1000) + "s";
}
console.log(page);
pgLi.appendChild(make('span', {}, page.pg));
// pgLi.appendChild(make('span', {}, page.ref));
pgLi.appendChild(make('span', {}, ( page._seenBy ? page._seenBy.join(', ') : '—') + '; ' + page._tickCount));
@@ -1209,7 +1212,6 @@ BotMon.live = {
'class': 'eval'
});
data._eval.forEach( (test) => {
console.log(test);
const tObj = BotMon.live.data.rules.getRuleInfo(test);
const tDesc = tObj ? tObj.desc : test;

View File

@@ -106,8 +106,13 @@
border-radius: 0 0 .25rem .25rem;
}
& details summary {
background-color: transparent;
border: transparent none 0;
& {
background-color: transparent;
border: transparent none 0;
}
span.title {
flex-grow: 1;
}
}
}
& > details > summary {
@@ -142,7 +147,7 @@
line-height: 1.5;
background-color: #F0F0F0;
border-bottom: #CCC solid 1px;
border-radius: .5em;
border-radius: .7em;
}
details ul > li > details > summary > span {
@@ -159,6 +164,7 @@
column-gap: .25em;
height: 1.5em;
overflow: hidden;
margin-right: .2rem;
}
details ul > li > details > summary > span > span::before {
content: '';
@@ -235,10 +241,30 @@
span.bot_googleads::before, dd.bot_googleads::before,
span.bot_googleapi::before, dd.bot_googleapi::before { background-image: url('img/google.svg') }
span.bot_applebot::before, dd.bot_applebot::before { background-image: url('img/apple.svg') }
span.bot_slurp::before, dd.bot_slurp::before { background-image: url('img/yahoo.svg') }
span.bot_ddg::before, dd.bot_ddg::before { background-image: url('img/ddg.svg') }
span.bot_openai::before, dd.bot_openai::before { background-image: url('img/openai.svg') }
span.bot_claude::before, dd.bot_claude::before { background-image: url('img/anthropic.png') }
span.bot_perplexity::before, dd.bot_perplexity::before { background-image: url('img/perplexity.svg') }
span.bot_metabots::before, dd.bot_metabots::before { background-image: url('img/meta.svg') }
span.bot_yandexbots::before, dd.bot_yandexbots::before { background-image: url('img/yandex.svg') }
span.bot_qwant::before, dd.bot_qwant::before { background-image: url('img/qwant.svg') }
span.bot_yandex::before, dd.bot_yandex::before { background-image: url('img/yandex.svg') }
span.bot_seznambot::before, dd.bot_seznambot::before { background-image: url('img/seznam.svg') }
span.bot_ahrefs::before, dd.bot_ahrefs::before { background-image: url('img/ahrefs.png') }
span.bot_ccbot::before, dd.bot_ccbot::before { background-image: url('img/ccbot.png') }
span.bot_mjbot::before, dd.bot_mjbot::before { background-image: url('img/majestic.png') }
span.bot_petal::before, dd.bot_petal::before { background-image: url('img/petal.svg') }
span.bot_barkrowler::before, dd.bot_barkrowler::before { background-image: url('img/babbar.png') }
span.bot_semrush::before, dd.bot_semrush::before { background-image: url('img/semrush.png') }
span.bot_bytespider::before, dd.bot_bytespider::before { background-image: url('img/bytedance.svg') }
span.bot_dfseo::before, dd.bot_dfseo::before { background-image: url('img/dataforseo.png') }
span.bot_hunter::before, dd.bot_hunter::before { background-image: url('img/hunter.png') }
span.bot_serpstat::before, dd.bot_serpstat::before { background-image: url('img/serpstat.svg') }
span.bot_netestate::before, dd.bot_netestate::before { background-image: url('img/netestate.png') }
span.bot_imagesift::before, dd.bot_imagesift::before { background-image: url('img/hive.svg') }
/*span.bot_dotbot::before, dd.bot_dotbot::before { background-image: url('img/moz.svg') }*/
/*span.bot_maui::before, dd.bot_maui::before { background-image: url('img/maui.svg') }*/
span.bot_plagaware::before, dd.bot_plagaware::before { background-image: url('img/plagaware.png') }
/* user info */
span.user_known::before { background-image: url('img/user.svg') }