diff --git a/data/known-bots.json b/data/known-bots.json index e3d9ec0..2f0c760 100644 --- a/data/known-bots.json +++ b/data/known-bots.json @@ -1,24 +1,23 @@ -[ - {"id": "bingbot", - "n": "Bingbot", +[ {"id": "bingbot", + "n": "Bing Bot", "r": ["bingbot"], "rx": ["\\sbingbot\\/(\\d+\\.\\d+);"], "url": "http://www.bing.com/bingbot.htm" }, {"id": "googlebot", - "n": "GoogleBot", + "n": "Google Bot", "r": ["Googlebot"], "rx": ["\\sGooglebot\\/(\\d+\\.\\d+);"], "url": "http://www.google.com/bot.html" }, {"id": "googleads", - "n": "Google AdsBot", + "n": "Google Ads Bot", "r": ["AdsBot-Google", "AdsBot-Google-Mobile", "Mediapartners-Google"], "rx": ["AdsBot-Google;","AdsBot-Google-Mobile;", "Mediapartners-Google\\/(\\d+\\.\\d+);"], "url": "http://www.google.com/mobile/adsbot.html" }, {"id": "googleapi", - "n": "Google APIs", + "n": "Google API Crawler", "r": ["APIs-Google"], "rx": ["APIs-Google"], "url": "https://developers.google.com/search/docs/crawling-indexing/google-special-case-crawlers" @@ -29,28 +28,148 @@ "rx": ["Applebot\\/(\\d+\\.\\d+);"], "url": "http://www.apple.com/go/applebot" }, + {"id": "slurp", + "n": "Slurp (Yahoo!)", + "r": ["Slurp"], + "rx": ["Slurp[\\s;\\)]"], + "url": "http://help.yahoo.com/help/us/ysearch/slurp" + }, + {"id": "ddg", + "n": "DuckDuckGo Bots", + "r": ["DuckDuckBot","DuckAssistBot","DuckDuckGo-Favicons-Bot"], + "rx": ["DuckDuckBot\\/(\\d+\\.\\d+);", "DuckAssistBot\\/(\\d+\\.\\d+);", "DuckDuckGo-Favicons-Bot\\/(\\d+\\.\\d+);"], + "url": "https://duckduckgo.com/duckduckbot.html" + }, {"id": "openai", - "n": "OpenAI/ChatGPT", + "n": "OpenAI/ChatGPT Bots", "r": ["OAI-SearchBot", "ChatGPT-User", "GPTBot"], "rx": ["OAI-SearchBot\\/(\\d+\\.\\d+);", "ChatGPT-User\\/(\\d+\\.\\d+);", "GPTBot\\/(\\d+\\.\\d+);"], "url": "https://platform.openai.com/docs/bots/" }, + {"id": "claude", + "n": "Anthropic Claude Bots", + "r": ["ClaudeBot", "Claude-User", "Claude-SearchBot"], + "rx": ["ClaudeBot\\/(\\d+\\.\\d+);"], + "url": "https://darkvisitors.com/agents/claudebot" + }, + {"id": "perplexity", + "n": "Perplexity Crawlers", + "r": ["PerplexityBot", "Perplexity‑User"], + "rx": ["PerplexityBot\\/(\\d+\\.\\d+);", "Perplexity‑User\\/(\\d+\\.\\d+);"], + "url": "https://perplexity.ai/perplexitybot" + }, {"id": "metabots", - "n": "Meta/Facebook", + "n": "Meta/Facebook Bots", "r": ["facebookexternalhit", "facebookcatalog","meta-webindexer","meta-externalads","meta-externalagent","meta-externalfetcher"], "rx": ["facebook\\w+\\/(\\d+\\.\\d+)", "meta-\\w+\\/(\\d+\\.\\d+)"], "url": "https://developers.facebook.com/docs/sharing/webmasters/crawler" }, - {"id": "yandexbots", - "n": "Yandex (Russia)", + {"id": "qwant", + "n": "Qwant Web crawler", + "r": ["Qwantbot", "Qwantbot-news"], + "rx": ["Qwantbot\\/(\\d+\\.\\d+)[_;\\s]"], + "url": "https://help.qwant.com/bot/" + }, + {"id": "yandex", + "n": "Yandex Bots", "r": ["YandexBot", "YandexAdNet", "YandexBlogs", "YandexImages", "YandexImageResizer", "YandexMarket", "YandexMedia", "YandexOntoDB", "YandexSitelinks","YandexSpravBot", "YandexVertis", "YandexVerticals", "YandexVideo", "YandexWebmaster", "YandexComBot"], "rx": ["Yandex\\w+\\/(\\d+\\.\\d+);"], "url": "http://yandex.com/bots" }, {"id": "seznambot", - "n": "SeznamBot (Czech)", + "n": "SeznamBot (CZ)", "r": ["SeznamBot"], "rx": ["SeznamBot\\/(\\d+\\.\\d+);"], "url": "https://o-seznam.cz/napoveda/vyhledavani/en/seznambot-crawler/" + }, + {"id": "ahrefs", + "n": "Ahrefs Bots (SEO/marketing)", + "r": ["AhrefsBot", "AhrefsSiteAudit"], + "rx": ["AhrefsBot\\/(\\d+\\.\\d+);", "AhrefsSiteAudit\\/(\\d+\\.\\d+);"], + "url": "https://ahrefs.com/robot/" + }, + {"id": "ccbot", + "n": "Common Crawl Bot (AI-Scraper)", + "r": ["CCBot"], + "rx": ["CCBot\\/(\\d+\\.\\d+)[\\s\\.;]*"], + "url": "https://commoncrawl.org/bot.html" + }, + {"id": "mjbot", + "n": "Majestic Crawler (UK)", + "r": ["MJ12bot"], + "rx": ["MJ12bot\\/v?(\\d+\\.\\d+)[\\s\\.;]"], + "url": "http://www.majestic12.co.uk/bot.php" + }, + {"id": "petal", + "n": "PetalSearch Bot (China)", + "r": ["PetalBot", "AspiegelBot"], + "rx": ["[\\s;]PetalBot[\\s\\/;]", "AspiegelBot[\\)$]"], + "url": "https://webmaster.petalsearch.com/site/petalbot" + }, + {"id": "barkrowler", + "n": "Barkrowler (Babbar Bot)", + "r": ["Barkrowler"], + "rx": ["[\\s;^]Barkrowler\\/(\\d+\\.\\d+)?"], + "url": "https://babbar.tech/crawler" + }, + {"id": "semrush", + "n": "SemrushBot", + "r": ["SemrushBot"], + "rx": ["SemrushBot-?[A-Z]*\\/(\\d+\\.?\\d*)[;~\\s]"], + "url": "http://www.semrush.com/bot.html" + }, + {"id": "bytespider", + "n": "Bytespider (ByteDance, TikTok)", + "r": ["Bytespider"], + "rx": ["Bytespider[;$]"], + "url": "https://darkvisitors.com/agents/bytespider" + }, + {"id": "dfseo", + "n": "DataForSEO Bot", + "r": ["DataForSeoBot"], + "rx": ["DataForSeoBot[;\\/](\\d\\.\\d)?"], + "url": "https://dataforseo.com/dataforseo-bot" + }, + {"id": "hunter", + "n": "Hunter (Velen) Crawler", + "r": ["VelenPublicWebCrawler"], + "rx": ["VelenPublicWebCrawler\\/(\\d\\.\\d)"], + "url": "https://velen.io" + }, + {"id": "serpstat", + "n": "Serpstat Bot", + "r": ["serpstatbot"], + "rx": ["serpstatbot\\/(\\d\\.\\d)\\s"], + "url": "https://serpstatbot.com/" + }, + {"id": "netestate", + "n": "netEstate NE Crawler (DE)", + "r": ["netEstate NE Crawler"], + "rx": ["netEstate NE Crawler\\s"], + "url": "http://www.website-datenbank.de/" + }, + {"id": "imagesift", + "n": "Hive Imagesift Bot (AI Scraper)", + "r": ["ImagesiftBot"], + "rx": ["ImagesiftBot[;\\s\\/]"], + "url": "https://darkvisitors.com/agents/imagesiftbot" + }, + {"id": "dotbot", + "n": "Moz.com DotBot Crawler", + "r": ["ImagesiftBot"], + "rx": ["DotBot\\/(\\d\\.\\d)[\\.;\\s]"], + "url": "https://moz.com/help/moz-procedures/crawlers/dotbot" + }, + {"id": "maui", + "n": "MauiBot (unknown developer)", + "r": ["MauiBot"], + "rx": ["MauiBot\\s"], + "url": "https://darkvisitors.com/agents/mauibot" + }, + {"id": "plagaware", + "n": "PlagAwareBot (DE)", + "r": ["PlagAwareBot"], + "rx": ["PlagAwareBot\\/(\\d\\.\\d)"], + "url": "https://www.plagaware.com/bot" } ] \ No newline at end of file diff --git a/data/known-platforms.json b/data/known-platforms.json index 3619278..0fa9e31 100644 --- a/data/known-platforms.json +++ b/data/known-platforms.json @@ -21,7 +21,7 @@ }, {"n": "Old MacOS", "id": "macosold", - "rx": [ "\\sMac OS X 10[\\._](\\d|1[0-3])[\\._;\\s\\)]" ] + "rx": [ "\\sMac OS X 10[\\._](\\d|1[0-3])[\\._;\\s\\)]", "\\sMac OS X (1[12])[\\._]" ] }, {"n": "MacOS", "id": "macos", diff --git a/data/rules.json b/data/rules.json index d5d97a5..dd9b4ee 100644 --- a/data/rules.json +++ b/data/rules.json @@ -1,24 +1,24 @@ { "threshold": 100, "rules": [ - {"func": "obsoleteClient", + {"func": "obsoleteClient", "params": ["aol", "msie", "chromeold"], "id": "oldClient", "desc": "Obsolete browser version", "bot": 40 }, - {"func": "obsoletePlatform", + {"func": "obsoletePlatform", "params": ["winold", "macosold"], "id": "oldOS", "desc": "Obsolete platform version", "bot": 40 }, {"func": "noJavaScript", "id": "noJS", "desc": "JavaScript disabled", - "bot": 20 + "bot": 40 }, {"func": "smallPageCount", "params": [1], - "id": "onePage", "desc": "Views only a single page", - "bot": 20 + "id": "onePage", "desc": "Visiter loaded only a single page", + "bot": 40 }, {"func": "noTicks", - "id": "noTicks", "desc": "Visitor did not spend time reading any page", + "id": "noTicks", "desc": "Visitor did not spend time reading a page", "bot": 10 }, {"func": "noReferences", diff --git a/img/plagaware.png b/img/plagaware.png new file mode 100644 index 0000000..55f4ce7 Binary files /dev/null and b/img/plagaware.png differ diff --git a/img/yahoo.svg b/img/yahoo.svg new file mode 100644 index 0000000..488505e --- /dev/null +++ b/img/yahoo.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/script.js b/script.js index 682f88c..553dc27 100644 --- a/script.js +++ b/script.js @@ -226,7 +226,7 @@ BotMon.live = { if ( v.id == visitor.id) { /* match the pre-defined IDs */ return v; } else if (v.ip == visitor.ip && v.agent == visitor.agent) { - console.info("Visitor ID not found, using matchin IP + User-Agent instead."); + console.warn("Visitor ID not found, using matchin IP + User-Agent instead."); return v; } @@ -683,7 +683,6 @@ BotMon.live = { const json = await response.json(); if (json.rules) { - console.log(json.rules); this._rulesList = json.rules; } @@ -759,19 +758,17 @@ BotMon.live = { func: { // check if client is one of the obsolete ones: - obsoleteClient: function(visitor) { + obsoleteClient: function(visitor, ...clients) { - const obsClients = ['aol', 'msie', 'chromeold']; const clientId = ( visitor._client ? visitor._client.id : ''); - return obsClients.includes(clientId); + return clients.includes(clientId); }, // check if OS/Platform is one of the obsolete ones: - obsoletePlatform: function(visitor) { + obsoletePlatform: function(visitor, ...platforms) { - const obsPlatforms = ['winold', 'macosold']; - const platformId = ( visitor._platform ? visitor._platform.id : ''); - return obsPlatforms.includes(platformId); + const pId = ( visitor._platform ? visitor._platform.id : ''); + return platforms.includes(pId); }, // client does not use JavaScript: @@ -799,7 +796,7 @@ BotMon.live = { }, loadLogFile: async function(type, onLoaded = undefined) { - console.info('BotMon.live.data.loadLogFile(',type,')'); + //console.info('BotMon.live.data.loadLogFile(',type,')'); let typeName = ''; let columns = []; @@ -911,7 +908,7 @@ BotMon.live = {
Bounce rate (est.):${bounceRate}%
-
Bots vs. Humans
+
Bots vs. Humans (page views)
Registered users:${data.bots.users}
Probably humans:${data.bots.human}
Suspected bots:${data.bots.suspected}
@@ -924,7 +921,7 @@ BotMon.live = { // update known bots list: const block = document.getElementById('botmon__botslist'); - block.innerHTML = "
Top known bots
"; + block.innerHTML = "
Top known bots (page views)
"; let bots = BotMon.live.data.analytics.groups.knownBots.toSorted( (a, b) => { return b._pageViews.length - a._pageViews.length; @@ -989,6 +986,9 @@ BotMon.live = { lists: { init: function() { + // function shortcut: + const makeElement = BotMon.t._makeElement; + const parent = document.getElementById('botmon__today__visitorlists'); if (parent) { @@ -1017,15 +1017,16 @@ BotMon.live = { default: console.warn('Unknwon list number.'); } + let group = BotMon.live.data.analytics.groups[listId]; + let gCount = '–' - const details = BotMon.t._makeElement('details', { + const details = makeElement('details', { 'data-group': listId, 'data-loaded': false }); - details.appendChild(BotMon.t._makeElement('summary', - undefined, - listTitle - )); + const title = details.appendChild(makeElement('summary')); + title.appendChild(makeElement('span', {'class':'title'}, listTitle)) + title.appendChild(makeElement('span', {'class':'counter'}, gCount)) details.addEventListener("toggle", this._onDetailsToggle); parent.appendChild(details); @@ -1192,6 +1193,8 @@ BotMon.live = { visitTimeStr = Math.floor(visitDuration / 1000) + "s"; } + console.log(page); + pgLi.appendChild(make('span', {}, page.pg)); // pgLi.appendChild(make('span', {}, page.ref)); pgLi.appendChild(make('span', {}, ( page._seenBy ? page._seenBy.join(', ') : '—') + '; ' + page._tickCount)); @@ -1209,7 +1212,6 @@ BotMon.live = { 'class': 'eval' }); data._eval.forEach( (test) => { - console.log(test); const tObj = BotMon.live.data.rules.getRuleInfo(test); const tDesc = tObj ? tObj.desc : test; diff --git a/style.less b/style.less index 97254bc..ca458a3 100644 --- a/style.less +++ b/style.less @@ -106,8 +106,13 @@ border-radius: 0 0 .25rem .25rem; } & details summary { - background-color: transparent; - border: transparent none 0; + & { + background-color: transparent; + border: transparent none 0; + } + span.title { + flex-grow: 1; + } } } & > details > summary { @@ -142,7 +147,7 @@ line-height: 1.5; background-color: #F0F0F0; border-bottom: #CCC solid 1px; - border-radius: .5em; + border-radius: .7em; } details ul > li > details > summary > span { @@ -159,6 +164,7 @@ column-gap: .25em; height: 1.5em; overflow: hidden; + margin-right: .2rem; } details ul > li > details > summary > span > span::before { content: ''; @@ -235,10 +241,30 @@ span.bot_googleads::before, dd.bot_googleads::before, span.bot_googleapi::before, dd.bot_googleapi::before { background-image: url('img/google.svg') } span.bot_applebot::before, dd.bot_applebot::before { background-image: url('img/apple.svg') } + span.bot_slurp::before, dd.bot_slurp::before { background-image: url('img/yahoo.svg') } + span.bot_ddg::before, dd.bot_ddg::before { background-image: url('img/ddg.svg') } span.bot_openai::before, dd.bot_openai::before { background-image: url('img/openai.svg') } + span.bot_claude::before, dd.bot_claude::before { background-image: url('img/anthropic.png') } + span.bot_perplexity::before, dd.bot_perplexity::before { background-image: url('img/perplexity.svg') } span.bot_metabots::before, dd.bot_metabots::before { background-image: url('img/meta.svg') } - span.bot_yandexbots::before, dd.bot_yandexbots::before { background-image: url('img/yandex.svg') } + span.bot_qwant::before, dd.bot_qwant::before { background-image: url('img/qwant.svg') } + span.bot_yandex::before, dd.bot_yandex::before { background-image: url('img/yandex.svg') } span.bot_seznambot::before, dd.bot_seznambot::before { background-image: url('img/seznam.svg') } + span.bot_ahrefs::before, dd.bot_ahrefs::before { background-image: url('img/ahrefs.png') } + span.bot_ccbot::before, dd.bot_ccbot::before { background-image: url('img/ccbot.png') } + span.bot_mjbot::before, dd.bot_mjbot::before { background-image: url('img/majestic.png') } + span.bot_petal::before, dd.bot_petal::before { background-image: url('img/petal.svg') } + span.bot_barkrowler::before, dd.bot_barkrowler::before { background-image: url('img/babbar.png') } + span.bot_semrush::before, dd.bot_semrush::before { background-image: url('img/semrush.png') } + span.bot_bytespider::before, dd.bot_bytespider::before { background-image: url('img/bytedance.svg') } + span.bot_dfseo::before, dd.bot_dfseo::before { background-image: url('img/dataforseo.png') } + span.bot_hunter::before, dd.bot_hunter::before { background-image: url('img/hunter.png') } + span.bot_serpstat::before, dd.bot_serpstat::before { background-image: url('img/serpstat.svg') } + span.bot_netestate::before, dd.bot_netestate::before { background-image: url('img/netestate.png') } + span.bot_imagesift::before, dd.bot_imagesift::before { background-image: url('img/hive.svg') } + /*span.bot_dotbot::before, dd.bot_dotbot::before { background-image: url('img/moz.svg') }*/ + /*span.bot_maui::before, dd.bot_maui::before { background-image: url('img/maui.svg') }*/ + span.bot_plagaware::before, dd.bot_plagaware::before { background-image: url('img/plagaware.png') } /* user info */ span.user_known::before { background-image: url('img/user.svg') }