From 43b05c9311d8e10461f9f14fddab69b8225ef770 Mon Sep 17 00:00:00 2001 From: Sascha Leib Date: Tue, 9 Sep 2025 09:36:13 +0200 Subject: [PATCH] More bot detection criteria --- data/known-bots.json | 6 ++++ data/rules.json | 66 +++++++++++++++++++++++--------------------- script.js | 9 ++++++ 3 files changed, 49 insertions(+), 32 deletions(-) diff --git a/data/known-bots.json b/data/known-bots.json index 228d576..6576ea8 100644 --- a/data/known-bots.json +++ b/data/known-bots.json @@ -201,5 +201,11 @@ "r": ["StartmeBot"], "rx": ["StartmeBot\\/(\\d+\\.\\d+)[;\\s\\(\\.]"], "url": "https://support.start.me/en/articles/9182933-what-is-startmebot" + }, + {"id": "bitsight", + "n": "BitSightBot", + "r": ["BitSightBot"], + "rx": ["BitSightBot\\/(\\d+\\.\\d+)"], + "url": "https://darkvisitors.com/agents/bitsightbot" } ] \ No newline at end of file diff --git a/data/rules.json b/data/rules.json index 57582f1..27dc207 100644 --- a/data/rules.json +++ b/data/rules.json @@ -47,49 +47,51 @@ }, {"func": "combinationTest", "params": [["macos", "msie"], ["win10", "safari"]], "id": "impPC", "desc": "Impossible combination of platform and client", - "bot": 100 + "bot": 80 }, {"func": "loadSpeed", "params": [3, 20], "id": "speedRun", "desc": "Average time between page loads is less than 20 seconds", - "bot": 100 + "bot": 80 + }, + {"func": "noAcceptLang", + "id": "noAcc", "desc": "Client has no, or an empty, “Accept-Language” header", + "bot": 40 } ], "ipRanges": [ - {"from": "3...", "to": "3.255.255.255", "label": "Amazon Data Services [US]"}, - {"from": "8.127..", "to": "8.223.255.254", "label": "Alibaba [CN]"}, - {"from": "24.240..", "to": "24.243.255.254", "label": "Charter [US]"}, - {"from": "27.106..", "to": "27.106.127.254", "label": "Huawei [US]"}, - {"from": "34...", "to": "34.191.255.254", "label": "Google LLC"}, - {"from": "45.131.216.", "to": "45.131.219.254", "label": "Lonconnect [UK]"}, - {"from": "45.184.16.1.", "to": "45.184.31.254", "label": "Orbi Telecom [BR]"}, - {"from": "45.224..", "to": "45.227.195.254", "label": "Various BR ISPs [BR]"}, - {"from": "46.250.160.", "to": "46.250.191.254", "label": "Huawei [MX]"}, - {"from": "49.0.200.", "to": "49.0.255.254", "label": "Huawei [SG]"}, + {"from": "3.0.0.0", "to": "3.255.255.255", "label": "Amazon Data Services [US]"}, + {"from": "8.127.0.0", "to": "8.223.255.254", "label": "Alibaba [CN]"}, + {"from": "24.240.0.0", "to": "24.243.255.254", "label": "Charter [US]"}, + {"from": "27.106.0.0", "to": "27.106.127.254", "label": "Huawei [US]"}, + {"from": "34.0.0.0", "to": "34.191.255.254", "label": "Google LLC"}, + {"from": "45.0.0.0", "to": "45.255.255.254", "label": "Various small ISPs, mostly BR"}, + {"from": "46.250.160.0", "to": "46.250.191.254", "label": "Huawei [MX]"}, + {"from": "49.0.200.0", "to": "49.0.255.254", "label": "Huawei [SG]"}, {"from": "84.37.35.0", "to": "84.37.255.254", "label": "GTT.net [US]"}, {"from": "94.74.64.0", "to": "94.74.127.254", "label": "Huawei [HK]"}, - {"from": "101...", "to": "119.255.255.254", "label": "SinNet [CN]"}, - {"from": "111.119.192.", "to": "111.119.255.254", "label": "Huawei [SG]"}, - {"from": "119...", "to": "101.207.255.254", "label": "Unicom [CN]"}, + {"from": "101.0.0.0", "to": "101.255.255.254", "label": "ChinaNet [CN]"}, + {"from": "110.238.96.0", "to": "110.238.127.255", "label": "Huawei [SG]"}, + {"from": "111.119.192.0", "to": "111.119.255.255", "label": "Huawei [SG]"}, + {"from": "119.0.0.0", "to": "101.207.255.254", "label": "Unicom [CN]"}, {"from": "121.91.168.", "to": "121.91.175.254", "label": "Huawei [HK]"}, - {"from": "122.8..", "to": "122.8.255.254", "label": "CN-ISP [CN]"}, - {"from": "122.9..", "to": "122.9.255.254", "label": "Huawei [CN]"}, - {"from": "124.243.128.", "to": "124.243.191.254", "label": "Huawei [SG]"}, - {"from": "142.147.128.", "to": "1142.147.255.254", "label": "Web2Objects LLC [US]"}, - {"from": "150.40.128.", "to": "150.40.255.254", "label": "Huawei [HK]"}, - {"from": "159.138..", "to": "159.138.225.254", "label": "Huawei [TH]"}, - {"from": "162.128..", "to": "162.128.255.254", "label": "Zenlayer [SG]"}, - {"from": "166.108.192.", "to": "166.108.255.254", "label": "Huawei [SG]"}, - {"from": "177...", "to": "177.223.255.254", "label": "THS [BR]"}, - {"from": "183.87.32.", "to": "183.87.159.254", "label": "Huawei [HK]"}, - {"from": "187.180..", "to": "187.183.255.254", "label": "Claro [BR]"}, - {"from": "188.239..", "to": "188.239.63.254", "label": "Huawei [SG]"}, - {"from": "189.1.192.", "to": "189.1.255.254", "label": "Huawei [HK]"}, - {"from": "189.76..", "to": "189.127.255.254", "label": "Hard Online [BR]"}, - {"from": "190.92.192.", "to": "190.92.255.254", "label": "Huawei [HK]"}, - {"from": "192.124.170.", "to": "192.124.182.254", "label": "Relcom [CZ]"}, + {"from": "122.8.0.0", "to": "122.8.255.254", "label": "CN-ISP [CN]"}, + {"from": "122.9.0.0", "to": "122.9.255.254", "label": "Huawei [CN]"}, + {"from": "124.243.128.0", "to": "124.243.191.254", "label": "Huawei [SG]"}, + {"from": "142.147.128.0", "to": "1142.147.255.254", "label": "Web2Objects LLC [US]"}, + {"from": "150.40.128.0", "to": "150.40.255.254", "label": "Huawei [HK]"}, + {"from": "159.138.0.0", "to": "159.138.225.254", "label": "Huawei [TH]"}, + {"from": "162.128.0.0", "to": "162.128.255.254", "label": "Zenlayer [SG]"}, + {"from": "166.108.192.0", "to": "166.108.255.254", "label": "Huawei [SG]"}, + {"from": "177.0.0.0", "to": "177.255.255.255", "label": "BrasilNET [BR]"}, + {"from": "183.87.32.0", "to": "183.87.159.254", "label": "Huawei [HK]"}, + {"from": "187.180.0.0", "to": "187.183.255.254", "label": "Claro [BR]"}, + {"from": "188.239.0.0", "to": "188.239.63.254", "label": "Huawei [SG]"}, + {"from": "189.127.255.255", "to": "189.127.255.255", "label": "Huawei BR/HK"}, + {"from": "190.92.192.0", "to": "190.92.255.254", "label": "Huawei [HK]"}, + {"from": "192.124.170.0", "to": "192.124.182.254", "label": "Relcom [CZ]"}, {"from": "2001:4800::::::", "to": "2001:4fff:ffff:ffff:ffff:ffff:ffff:ffff", "label": "Rackspace/Google [US]"}, {"from": "2600:1f00::::::", "to": "2600:1fff:ffff:ffff:ffff:ffff:ffff:ffff", "label": "Amazon Cloud [US]"}, {"from": "2804:::::::", "to": "2804:FFFF:FFFF:FFFF:FFFF:FFFF:FFFF:FFFF", "label": "Inspire [BR]"}, - {"from": "2a0a:4cc0:0:0:0:0:0:0", "to": "2a0a:4cc0:FFFF:FFFF:FFFF:FFFF:FFFF:FFFF", "label": "Netcup [DE]"} + {"from": "2a0a:4cc0::::::", "to": "2a0a:4cc0:FFFF:FFFF:FFFF:FFFF:FFFF:FFFF", "label": "Netcup [DE]"} ] } \ No newline at end of file diff --git a/script.js b/script.js index e259338..75fa890 100644 --- a/script.js +++ b/script.js @@ -990,6 +990,15 @@ BotMon.live = { return false; }, + // Is there an accept-language field defined at all? + noAcceptLang: function(visitor) { + + if (!visitor.accept || visitor.accept.length <= 0) { // no accept-languages header + return true; + } + // TODO: parametrize this! + return false; + }, // At least x page views were recorded, but they come within less than y seconds loadSpeed: function(visitor, minItems, maxTime) {