From 5526d629716309458f7b9d964116dabb7c0ffb2a Mon Sep 17 00:00:00 2001 From: Sascha Leib Date: Sat, 6 Sep 2025 16:20:58 +0200 Subject: [PATCH] Improved Bot detection --- data/known-platforms.json | 2 +- data/rules.json | 52 ++++++++++- plugin.info.txt | 2 +- script.js | 192 +++++++++++++++++++++++++++++++------- style.less | 2 +- 5 files changed, 212 insertions(+), 38 deletions(-) diff --git a/data/known-platforms.json b/data/known-platforms.json index 0fa9e31..0a810be 100644 --- a/data/known-platforms.json +++ b/data/known-platforms.json @@ -25,7 +25,7 @@ }, {"n": "MacOS", "id": "macos", - "rx": [ "\\sMac OS X 10[\\._](1[4-5])[\\._;\\s\\)]", "\\sMac OS X (2[6-9])[\\._;\\s\\)]" ] + "rx": [ "\\sMac OS X 10[\\._](1[4-5])[\\._;\\s\\)]", "\\sMac OS X (1[4-5])[\\._;\\s\\)]", "\\sMac OS X (2[6-9])[\\._;\\s\\)]" ] }, {"n": "Vintage Windows", "id": "winold", diff --git a/data/rules.json b/data/rules.json index dd9b4ee..2c049d4 100644 --- a/data/rules.json +++ b/data/rules.json @@ -1,6 +1,10 @@ { "threshold": 100, "rules": [ + {"func": "fromKnownBotIP", + "id": "botIpRange", "desc": "Common Bot IP range", + "bot": 50 + }, {"func": "obsoleteClient", "params": ["aol", "msie", "chromeold"], "id": "oldClient", "desc": "Obsolete browser version", "bot": 40 @@ -14,16 +18,58 @@ "bot": 40 }, {"func": "smallPageCount", "params": [1], - "id": "onePage", "desc": "Visiter loaded only a single page", + "id": "onePage", "desc": "Visiter viewed only a single page", "bot": 40 }, {"func": "noTicks", "id": "noTicks", "desc": "Visitor did not spend time reading a page", "bot": 10 }, - {"func": "noReferences", - "id": "noRefs", "desc": "No reference field", + {"func": "noReferrer", + "id": "noRefs", "desc": "No referer field", "bot": 30 + }, + {"func": "clientTest", "params": ["brave"], + "id": "susClient", "desc": "Client identifier that is popular with bot networks", + "bot": 10 + }, + {"func": "combTest", "params": [["macos", "chrome"]], + "id": "unusualPC", "desc": "Unusual combination of platform and client", + "bot": 10 + }, + {"func": "combTest", "params": [["macos", "chromeold"],["macosold", "brave"],["winold", "edge"],["winold", "brave"]], + "id": "suspPC", "desc": "Suspicious combination of platform and client", + "bot": 30 + }, + {"func": "combTest", "params": [["macos", "msie"], ["win10", "safari"]], + "id": "impPC", "desc": "Impossible combination of platform and client", + "bot": 100 } + ], + "ipRanges": [ + {"from": "8.127.0.0", "to": "8.223.255.255", "isp": "alibaba", "loc":"cn"}, + {"from": "24.240.0.0", "to": "24.243.255.255", "isp": "charter", "loc":"us"}, + {"from": "27.106.0.0", "to": "27.106.127.255", "isp": "huawei", "loc":"hk"}, + {"from": "45.224.0.0", "to": "45.227.195.255", "isp": "various", "loc": "br"}, + {"from": "46.250.160.0", "to": "46.250.191.255", "isp": "huawei", "loc":"mx"}, + {"from": "94.74.64.0", "to": "94.74.127.255", "isp": "huawei", "loc":"hk"}, + {"from": "101.0.0.0", "to": "119.255.255.255", "isp": "sinnet", "loc":"cn"}, + {"from": "111.119.192.0", "to": "111.119.255.255", "isp": "huawei", "loc":"sg"}, + {"from": "119.0.0.0", "to": "101.207.255.255", "isp": "unicom", "loc":"cn"}, + {"from": "121.91.168.0", "to": "121.91.175.255", "isp": "huawei", "loc":"hk"}, + {"from": "122.8.0.0", "to": "122.8.255.255", "isp": "cnisp", "loc":"cn"}, + {"from": "122.9.0.0", "to": "122.9.255.255", "isp": "huawei", "loc":"cn"}, + {"from": "124.243.128.0", "to": "124.243.191.255", "isp": "huawei", "loc":"sg"}, + {"from": "150.40.128.0", "to": "150.40.255.255", "isp": "huawei", "loc":"hk"}, + {"from": "159.138.0.0", "to": "159.138.225.255", "isp": "huawei", "loc":"th"}, + {"from": "166.108.192.0", "to": "166.108.255.255", "isp": "huawei", "loc":"sg"}, + {"from": "177.0.0.0", "to": "177.223.255.255", "isp": "ths", "loc":"br"}, + {"from": "183.87.32.0", "to": "183.87.159.255", "isp": "huawei", "loc":"hk"}, + {"from": "187.180.0.0", "to": "187.183.255.255", "isp": "claro", "loc":"br"}, + {"from": "188.239.0.0", "to": "188.239.63.255", "isp": "huawei", "loc":"sg"}, + {"from": "189.1.192.0", "to": "189.1.255.255", "isp": "huawei", "loc":"hk"}, + {"from": "189.76.0.0", "to": "189.127.255.255", "isp": "hardonline", "loc":"br"}, + {"from": "190.92.192.0", "to": "190.92.255.255", "isp": "huawei", "loc":"hk"}, + {"from": "2804:0:0:0:0:0:0:0", "to": "2804:FFFF:FFFF:FFFF:FFFF:FFFF:FFFF:FFFF", "isp": "inspire", "loc":"br"} ] } \ No newline at end of file diff --git a/plugin.info.txt b/plugin.info.txt index 3052bd0..1505946 100644 --- a/plugin.info.txt +++ b/plugin.info.txt @@ -1,7 +1,7 @@ base botmon author Sascha Leib email ad@hominem.com -date 2025-09-05 +date 2025-09-06 name Bot Monitoring desc Live monitoring of bot traffic on your DokuWiki instance (under development) url https://www.dokuwiki.org/plugin:botmon diff --git a/script.js b/script.js index 8dad6bd..3586f5b 100644 --- a/script.js +++ b/script.js @@ -1,7 +1,7 @@ "use strict"; /* DokuWiki BotMon Plugin Script file */ -/* 04.09.2025 - 0.1.8 - pre-release */ -/* Authors: Sascha Leib */ +/* 06.09.2025 - 0.2.0 - beta */ +/* Author: Sascha Leib */ // enumeration of user types: const BM_USERTYPE = Object.freeze({ @@ -39,7 +39,6 @@ const BotMon = { /* internal tools */ t: { - /* helper function to call inits of sub-objects */ _callInit: function(obj) { //console.info('BotMon.t._callInit(obj=',obj,')'); @@ -92,6 +91,15 @@ const BotMon = { console.error(e); } return r; + }, + + /* helper to convert an ip address string to a normalised format: */ + _ip2Num: function(ip) { + if (ip.indexOf(':') > 0) { /* IP6 */ + return (ip.split(':').map(d => ('0000'+d).slice(-4) ).join('')); + } else { /* IP4 */ + return Number(ip.split('.').map(d => ('000'+d).slice(-3) ).join('')); + } } } }; @@ -126,6 +134,9 @@ BotMon.live = { // set the flags: switch(file) { + case 'rules': + data._dispatchRulesLoaded = true; + break; case 'bots': data._dispatchBotsLoaded = true; break; @@ -135,9 +146,6 @@ BotMon.live = { case 'platforms': data._dispatchPlatformsLoaded = true; break; - case 'rules': - data._dispatchRulesLoaded = true; - break; default: // ignore } @@ -226,7 +234,7 @@ BotMon.live = { if ( v.id == visitor.id) { /* match the pre-defined IDs */ return v; } else if (v.ip == visitor.ip && v.agent == visitor.agent) { - console.warn("Visitor ID not found, using matchin IP + User-Agent instead."); + console.warn(`Visitor ID “${v.id}” not found, using matchin IP + User-Agent instead.`); return v; } @@ -353,7 +361,7 @@ BotMon.live = { // find the visit info: let visitor = model.findVisitor(dat); if (!visitor) { - console.warn(`No visitor with ID ${dat.id}, registering a new one.`); + console.info(`No visitor with ID “${dat.id}” found, registering as a new one.`); visitor = model.registerVisit(dat, type); } if (visitor) { @@ -379,11 +387,20 @@ BotMon.live = { // helper function to create a new "page view" item: _makePageView: function(data, type) { + + // try to parse the referrer: + let rUrl = null; + try { + rUrl = ( data.ref && data.ref !== '' ? new URL(data.ref) : null ); + } catch (e) { + console.info(`Invalid referer: “${data.ref}”.`); + } + return { _by: type, ip: data.ip, pg: data.pg, - ref: data.ref || '', + _ref: rUrl, _firstSeen: data.ts, _lastSeen: data.ts, _seenBy: [type], @@ -690,6 +707,22 @@ BotMon.live = { this._threshold = json.threshold; } + if (json.ipRanges) { + // clean up the IPs first: + let list = []; + json.ipRanges.forEach( it => { + let item = { + 'from': BotMon.t._ip2Num(it.from), + 'to': BotMon.t._ip2Num(it.to), + 'isp': it.isp, + 'loc': it.loc + }; + list.push(item); + }); + + this._botIPs = list; + } + this._ready = true; } catch (error) { @@ -773,7 +806,9 @@ BotMon.live = { // client does not use JavaScript: noJavaScript: function(visitor) { - return (visitor._jsClient === false); + + return !(visitor._seenBy.includes('log') || visitor._seenBy.includes('tck')); + }, // are there at lest num pages loaded? @@ -787,10 +822,75 @@ BotMon.live = { return !visitor._seenBy.includes('tck'); }, - // there are no references in any of the page visits: - noReferences: function(visitor) { - return (visitor._hasReferrer === true); + // there are no referrers in any of the page visits: + noReferrer: function(visitor) { + + let r = false; // return value + for (let i = 0; i < visitor._pageViews.length; i++) { + if (!visitor._pageViews[i]._ref) { + r = true; + break; + } + } + return r; + }, + + // test for specific client identifiers: + clientTest: function(visitor, ...list) { + + for (let i=0; i= ipRange.from && ipNum <= ipRange.to) { + return ipRange; + } + + }; + return null; + } }, @@ -1066,6 +1166,8 @@ BotMon.live = { const make = BotMon.t._makeElement; let ipType = ( data.ip.indexOf(':') >= 0 ? '6' : '4' ); + const platformName = (data._platform ? data._platform.n : 'Unknown'); + const clientName = (data._client ? data._client.n: 'Unknown'); const li = make('li'); // root list item const details = make('details'); @@ -1073,10 +1175,6 @@ BotMon.live = { details.appendChild(summary); const span1 = make('span'); /* left-hand group */ - - const platformName = (data._platform ? data._platform.n : 'Unknown'); - const clientName = (data._client ? data._client.n: 'Unknown'); - if (data._type == BM_USERTYPE.KNOWN_BOT) { /* Bot only */ const botName = ( data._bot && data._bot.n ? data._bot.n : "Unknown"); @@ -1123,7 +1221,22 @@ BotMon.live = { summary.appendChild(span2); - // create expanable section: + // add details expandable section: + details.appendChild(BotMon.live.gui.lists._makeVisitorDetails(data, type)); + + li.appendChild(details); + return li; + }, + + _makeVisitorDetails: function(data, type) { + + // shortcut for neater code: + const make = BotMon.t._makeElement; + + let ipType = ( data.ip.indexOf(':') >= 0 ? '6' : '4' ); + if (data.ip == '127.0.0.1' || data.ip == '::1' ) ipType = '0'; + const platformName = (data._platform ? data._platform.n : 'Unknown'); + const clientName = (data._client ? data._client.n: 'Unknown'); const dl = make('dl', {'class': 'visitor_details'}); @@ -1171,7 +1284,7 @@ BotMon.live = { } dl.appendChild(make('dt', {}, "User-Agent:")); - dl.appendChild(make('dd', {'class': 'agent' + ipType}, data.agent)); + dl.appendChild(make('dd', {'class': 'agent'}, data.agent)); dl.appendChild(make('dt', {}, "Visitor Type:")); dl.appendChild(make('dd', undefined, data._type )); @@ -1182,6 +1295,8 @@ BotMon.live = { dl.appendChild(make('dt', {}, "Visited pages:")); const pagesDd = make('dd', {'class': 'pages'}); const pageList = make('ul'); + + /* list all page views */ data._pageViews.forEach( (page) => { const pgLi = make('li'); @@ -1191,10 +1306,16 @@ BotMon.live = { visitTimeStr = Math.floor(visitDuration / 1000) + "s"; } - console.log(page); - - pgLi.appendChild(make('span', {}, page.pg)); - // pgLi.appendChild(make('span', {}, page.ref)); + pgLi.appendChild(make('span', {}, page.pg)); /* DW Page ID */ + if (page._ref) { + pgLi.appendChild(make('span', { + 'data-ref': page._ref.host, + 'title': "Referrer: " + page._ref.full + }, page._ref.site)); + } else { + pgLi.appendChild(make('span', { + }, "No referer")); + } pgLi.appendChild(make('span', {}, ( page._seenBy ? page._seenBy.join(', ') : '—') + '; ' + page._tickCount)); pgLi.appendChild(make('span', {}, page._firstSeen.toLocaleString())); pgLi.appendChild(make('span', {}, page._lastSeen.toLocaleString())); @@ -1203,25 +1324,36 @@ BotMon.live = { pagesDd.appendChild(pageList); dl.appendChild(pagesDd); + /* add bot evaluation: */ if (data._eval) { dl.appendChild(make('dt', {}, "Evaluation:")); const evalDd = make('dd'); const testList = make('ul',{ 'class': 'eval' }); - data._eval.forEach( (test) => { + data._eval.forEach( test => { const tObj = BotMon.live.data.rules.getRuleInfo(test); - const tDesc = tObj ? tObj.desc : test; + let tDesc = tObj ? tObj.desc : test; + // special case for Bot IP range test: + if (tObj.func == 'fromKnownBotIP') { + const rangeInfo = BotMon.live.data.rules.getBotIPInfo(data.ip); + if (rangeInfo) { + tDesc += ` (${rangeInfo.isp}, ${rangeInfo.loc.toUpperCase()})`; + } + } + + // create the entry field const tstLi = make('li'); tstLi.appendChild(make('span', { - 'class': 'test test_' . test - }, ( tObj ? tObj.desc : test ))); + 'data-testid': test + }, tDesc)); tstLi.appendChild(make('span', {}, ( tObj ? tObj.bot : '—') )); testList.appendChild(tstLi); }); + // add total row const tst2Li = make('li', { 'class': 'total' }); @@ -1232,11 +1364,7 @@ BotMon.live = { evalDd.appendChild(testList); dl.appendChild(evalDd); } - - details.appendChild(dl); - - li.appendChild(details); - return li; + return dl; } } diff --git a/style.less b/style.less index ca458a3..59e56a8 100644 --- a/style.less +++ b/style.less @@ -399,7 +399,7 @@ ul.eval { li:nth-child(odd) { - background-color: #202022; + background-color: #333337; } li.total { border-top-color: #CCC;