diff --git a/data/known-bots.json b/data/known-bots.json index e3d9ec0..2f0c760 100644 --- a/data/known-bots.json +++ b/data/known-bots.json @@ -1,24 +1,23 @@ -[ - {"id": "bingbot", - "n": "Bingbot", +[ {"id": "bingbot", + "n": "Bing Bot", "r": ["bingbot"], "rx": ["\\sbingbot\\/(\\d+\\.\\d+);"], "url": "http://www.bing.com/bingbot.htm" }, {"id": "googlebot", - "n": "GoogleBot", + "n": "Google Bot", "r": ["Googlebot"], "rx": ["\\sGooglebot\\/(\\d+\\.\\d+);"], "url": "http://www.google.com/bot.html" }, {"id": "googleads", - "n": "Google AdsBot", + "n": "Google Ads Bot", "r": ["AdsBot-Google", "AdsBot-Google-Mobile", "Mediapartners-Google"], "rx": ["AdsBot-Google;","AdsBot-Google-Mobile;", "Mediapartners-Google\\/(\\d+\\.\\d+);"], "url": "http://www.google.com/mobile/adsbot.html" }, {"id": "googleapi", - "n": "Google APIs", + "n": "Google API Crawler", "r": ["APIs-Google"], "rx": ["APIs-Google"], "url": "https://developers.google.com/search/docs/crawling-indexing/google-special-case-crawlers" @@ -29,28 +28,148 @@ "rx": ["Applebot\\/(\\d+\\.\\d+);"], "url": "http://www.apple.com/go/applebot" }, + {"id": "slurp", + "n": "Slurp (Yahoo!)", + "r": ["Slurp"], + "rx": ["Slurp[\\s;\\)]"], + "url": "http://help.yahoo.com/help/us/ysearch/slurp" + }, + {"id": "ddg", + "n": "DuckDuckGo Bots", + "r": ["DuckDuckBot","DuckAssistBot","DuckDuckGo-Favicons-Bot"], + "rx": ["DuckDuckBot\\/(\\d+\\.\\d+);", "DuckAssistBot\\/(\\d+\\.\\d+);", "DuckDuckGo-Favicons-Bot\\/(\\d+\\.\\d+);"], + "url": "https://duckduckgo.com/duckduckbot.html" + }, {"id": "openai", - "n": "OpenAI/ChatGPT", + "n": "OpenAI/ChatGPT Bots", "r": ["OAI-SearchBot", "ChatGPT-User", "GPTBot"], "rx": ["OAI-SearchBot\\/(\\d+\\.\\d+);", "ChatGPT-User\\/(\\d+\\.\\d+);", "GPTBot\\/(\\d+\\.\\d+);"], "url": "https://platform.openai.com/docs/bots/" }, + {"id": "claude", + "n": "Anthropic Claude Bots", + "r": ["ClaudeBot", "Claude-User", "Claude-SearchBot"], + "rx": ["ClaudeBot\\/(\\d+\\.\\d+);"], + "url": "https://darkvisitors.com/agents/claudebot" + }, + {"id": "perplexity", + "n": "Perplexity Crawlers", + "r": ["PerplexityBot", "Perplexity‑User"], + "rx": ["PerplexityBot\\/(\\d+\\.\\d+);", "Perplexity‑User\\/(\\d+\\.\\d+);"], + "url": "https://perplexity.ai/perplexitybot" + }, {"id": "metabots", - "n": "Meta/Facebook", + "n": "Meta/Facebook Bots", "r": ["facebookexternalhit", "facebookcatalog","meta-webindexer","meta-externalads","meta-externalagent","meta-externalfetcher"], "rx": ["facebook\\w+\\/(\\d+\\.\\d+)", "meta-\\w+\\/(\\d+\\.\\d+)"], "url": "https://developers.facebook.com/docs/sharing/webmasters/crawler" }, - {"id": "yandexbots", - "n": "Yandex (Russia)", + {"id": "qwant", + "n": "Qwant Web crawler", + "r": ["Qwantbot", "Qwantbot-news"], + "rx": ["Qwantbot\\/(\\d+\\.\\d+)[_;\\s]"], + "url": "https://help.qwant.com/bot/" + }, + {"id": "yandex", + "n": "Yandex Bots", "r": ["YandexBot", "YandexAdNet", "YandexBlogs", "YandexImages", "YandexImageResizer", "YandexMarket", "YandexMedia", "YandexOntoDB", "YandexSitelinks","YandexSpravBot", "YandexVertis", "YandexVerticals", "YandexVideo", "YandexWebmaster", "YandexComBot"], "rx": ["Yandex\\w+\\/(\\d+\\.\\d+);"], "url": "http://yandex.com/bots" }, {"id": "seznambot", - "n": "SeznamBot (Czech)", + "n": "SeznamBot (CZ)", "r": ["SeznamBot"], "rx": ["SeznamBot\\/(\\d+\\.\\d+);"], "url": "https://o-seznam.cz/napoveda/vyhledavani/en/seznambot-crawler/" + }, + {"id": "ahrefs", + "n": "Ahrefs Bots (SEO/marketing)", + "r": ["AhrefsBot", "AhrefsSiteAudit"], + "rx": ["AhrefsBot\\/(\\d+\\.\\d+);", "AhrefsSiteAudit\\/(\\d+\\.\\d+);"], + "url": "https://ahrefs.com/robot/" + }, + {"id": "ccbot", + "n": "Common Crawl Bot (AI-Scraper)", + "r": ["CCBot"], + "rx": ["CCBot\\/(\\d+\\.\\d+)[\\s\\.;]*"], + "url": "https://commoncrawl.org/bot.html" + }, + {"id": "mjbot", + "n": "Majestic Crawler (UK)", + "r": ["MJ12bot"], + "rx": ["MJ12bot\\/v?(\\d+\\.\\d+)[\\s\\.;]"], + "url": "http://www.majestic12.co.uk/bot.php" + }, + {"id": "petal", + "n": "PetalSearch Bot (China)", + "r": ["PetalBot", "AspiegelBot"], + "rx": ["[\\s;]PetalBot[\\s\\/;]", "AspiegelBot[\\)$]"], + "url": "https://webmaster.petalsearch.com/site/petalbot" + }, + {"id": "barkrowler", + "n": "Barkrowler (Babbar Bot)", + "r": ["Barkrowler"], + "rx": ["[\\s;^]Barkrowler\\/(\\d+\\.\\d+)?"], + "url": "https://babbar.tech/crawler" + }, + {"id": "semrush", + "n": "SemrushBot", + "r": ["SemrushBot"], + "rx": ["SemrushBot-?[A-Z]*\\/(\\d+\\.?\\d*)[;~\\s]"], + "url": "http://www.semrush.com/bot.html" + }, + {"id": "bytespider", + "n": "Bytespider (ByteDance, TikTok)", + "r": ["Bytespider"], + "rx": ["Bytespider[;$]"], + "url": "https://darkvisitors.com/agents/bytespider" + }, + {"id": "dfseo", + "n": "DataForSEO Bot", + "r": ["DataForSeoBot"], + "rx": ["DataForSeoBot[;\\/](\\d\\.\\d)?"], + "url": "https://dataforseo.com/dataforseo-bot" + }, + {"id": "hunter", + "n": "Hunter (Velen) Crawler", + "r": ["VelenPublicWebCrawler"], + "rx": ["VelenPublicWebCrawler\\/(\\d\\.\\d)"], + "url": "https://velen.io" + }, + {"id": "serpstat", + "n": "Serpstat Bot", + "r": ["serpstatbot"], + "rx": ["serpstatbot\\/(\\d\\.\\d)\\s"], + "url": "https://serpstatbot.com/" + }, + {"id": "netestate", + "n": "netEstate NE Crawler (DE)", + "r": ["netEstate NE Crawler"], + "rx": ["netEstate NE Crawler\\s"], + "url": "http://www.website-datenbank.de/" + }, + {"id": "imagesift", + "n": "Hive Imagesift Bot (AI Scraper)", + "r": ["ImagesiftBot"], + "rx": ["ImagesiftBot[;\\s\\/]"], + "url": "https://darkvisitors.com/agents/imagesiftbot" + }, + {"id": "dotbot", + "n": "Moz.com DotBot Crawler", + "r": ["ImagesiftBot"], + "rx": ["DotBot\\/(\\d\\.\\d)[\\.;\\s]"], + "url": "https://moz.com/help/moz-procedures/crawlers/dotbot" + }, + {"id": "maui", + "n": "MauiBot (unknown developer)", + "r": ["MauiBot"], + "rx": ["MauiBot\\s"], + "url": "https://darkvisitors.com/agents/mauibot" + }, + {"id": "plagaware", + "n": "PlagAwareBot (DE)", + "r": ["PlagAwareBot"], + "rx": ["PlagAwareBot\\/(\\d\\.\\d)"], + "url": "https://www.plagaware.com/bot" } ] \ No newline at end of file diff --git a/data/known-platforms.json b/data/known-platforms.json index 3619278..0fa9e31 100644 --- a/data/known-platforms.json +++ b/data/known-platforms.json @@ -21,7 +21,7 @@ }, {"n": "Old MacOS", "id": "macosold", - "rx": [ "\\sMac OS X 10[\\._](\\d|1[0-3])[\\._;\\s\\)]" ] + "rx": [ "\\sMac OS X 10[\\._](\\d|1[0-3])[\\._;\\s\\)]", "\\sMac OS X (1[12])[\\._]" ] }, {"n": "MacOS", "id": "macos", diff --git a/data/rules.json b/data/rules.json index d5d97a5..dd9b4ee 100644 --- a/data/rules.json +++ b/data/rules.json @@ -1,24 +1,24 @@ { "threshold": 100, "rules": [ - {"func": "obsoleteClient", + {"func": "obsoleteClient", "params": ["aol", "msie", "chromeold"], "id": "oldClient", "desc": "Obsolete browser version", "bot": 40 }, - {"func": "obsoletePlatform", + {"func": "obsoletePlatform", "params": ["winold", "macosold"], "id": "oldOS", "desc": "Obsolete platform version", "bot": 40 }, {"func": "noJavaScript", "id": "noJS", "desc": "JavaScript disabled", - "bot": 20 + "bot": 40 }, {"func": "smallPageCount", "params": [1], - "id": "onePage", "desc": "Views only a single page", - "bot": 20 + "id": "onePage", "desc": "Visiter loaded only a single page", + "bot": 40 }, {"func": "noTicks", - "id": "noTicks", "desc": "Visitor did not spend time reading any page", + "id": "noTicks", "desc": "Visitor did not spend time reading a page", "bot": 10 }, {"func": "noReferences", diff --git a/img/plagaware.png b/img/plagaware.png new file mode 100644 index 0000000..55f4ce7 Binary files /dev/null and b/img/plagaware.png differ diff --git a/img/yahoo.svg b/img/yahoo.svg new file mode 100644 index 0000000..488505e --- /dev/null +++ b/img/yahoo.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/script.js b/script.js index 682f88c..553dc27 100644 --- a/script.js +++ b/script.js @@ -226,7 +226,7 @@ BotMon.live = { if ( v.id == visitor.id) { /* match the pre-defined IDs */ return v; } else if (v.ip == visitor.ip && v.agent == visitor.agent) { - console.info("Visitor ID not found, using matchin IP + User-Agent instead."); + console.warn("Visitor ID not found, using matchin IP + User-Agent instead."); return v; } @@ -683,7 +683,6 @@ BotMon.live = { const json = await response.json(); if (json.rules) { - console.log(json.rules); this._rulesList = json.rules; } @@ -759,19 +758,17 @@ BotMon.live = { func: { // check if client is one of the obsolete ones: - obsoleteClient: function(visitor) { + obsoleteClient: function(visitor, ...clients) { - const obsClients = ['aol', 'msie', 'chromeold']; const clientId = ( visitor._client ? visitor._client.id : ''); - return obsClients.includes(clientId); + return clients.includes(clientId); }, // check if OS/Platform is one of the obsolete ones: - obsoletePlatform: function(visitor) { + obsoletePlatform: function(visitor, ...platforms) { - const obsPlatforms = ['winold', 'macosold']; - const platformId = ( visitor._platform ? visitor._platform.id : ''); - return obsPlatforms.includes(platformId); + const pId = ( visitor._platform ? visitor._platform.id : ''); + return platforms.includes(pId); }, // client does not use JavaScript: @@ -799,7 +796,7 @@ BotMon.live = { }, loadLogFile: async function(type, onLoaded = undefined) { - console.info('BotMon.live.data.loadLogFile(',type,')'); + //console.info('BotMon.live.data.loadLogFile(',type,')'); let typeName = ''; let columns = []; @@ -911,7 +908,7 @@ BotMon.live = {