diff --git a/cleanup.php b/cleanup.php index ea21193..ea97973 100644 --- a/cleanup.php +++ b/cleanup.php @@ -1,27 +1,25 @@ -

BotMon Cleanup Script

- +} \ No newline at end of file diff --git a/data/known-bots.json b/data/known-bots.json index b886df4..2ce7df2 100644 --- a/data/known-bots.json +++ b/data/known-bots.json @@ -7,7 +7,7 @@ {"id": "googlebot", "n": "Google Bot", "r": ["Googlebot"], - "rx": ["\\sGooglebot\\/(\\d+\\.\\d+);"], + "rx": ["Googlebot\\/(\\d+\\.\\d+)", "Googlebot-Image\\/(\\d+\\.\\d+)"], "url": "http://www.google.com/bot.html" }, {"id": "googleads", diff --git a/data/known-clients.json b/data/known-clients.json index ef40b61..5663613 100644 --- a/data/known-clients.json +++ b/data/known-clients.json @@ -41,7 +41,7 @@ }, {"n": "Edge", "id": "msedge", - "rx": [ "\\sEdg\\/(\\d+)\\.", "\\sEdge\\/(\\d+)" ] + "rx": [ "\\sEdg\\/(\\d+)", "\\sEdge\\/(\\d+)", "EdgiOS\\/(\\d+)" ] }, {"n": "Old Chrome", "id": "chromeold", diff --git a/data/rules.json b/data/rules.json index 8ddb9cf..6124bcb 100644 --- a/data/rules.json +++ b/data/rules.json @@ -13,16 +13,16 @@ "id": "oldOS", "desc": "Obsolete platform version", "bot": 40 }, - {"func": "noJavaScript", - "id": "noJS", "desc": "JavaScript disabled", - "bot": 40 - }, {"func": "smallPageCount", "params": [1], "id": "onePage", "desc": "Visiter viewed only a single page", "bot": 40 }, - {"func": "noTicks", - "id": "noTicks", "desc": "Visitor did not spend time reading a page", + {"func": "noRecord", "params": ["log"], + "id": "noClient", "desc": "No client-side JS log was recorded", + "bot": 50 + }, + {"func": "noRecord", "params": ["tck"], + "id": "noTicks", "desc": "No client ticks were recorded", "bot": 10 }, {"func": "noReferrer", @@ -48,33 +48,41 @@ {"func": "combTest", "params": [["macos", "msie"], ["win10", "safari"]], "id": "impPC", "desc": "Impossible combination of platform and client", "bot": 100 + }, + {"func": "loadSpeed", "params": [3, 20], + "id": "speedRun", "desc": "Average time between page loads is less than 20 seconds", + "bot": 100 } ], "ipRanges": [ - {"from": "8.127.0.0", "to": "8.223.255.255", "isp": "alibaba", "loc":"cn"}, - {"from": "24.240.0.0", "to": "24.243.255.255", "isp": "charter", "loc":"us"}, - {"from": "27.106.0.0", "to": "27.106.127.255", "isp": "huawei", "loc":"hk"}, - {"from": "45.224.0.0", "to": "45.227.195.255", "isp": "various", "loc": "br"}, - {"from": "46.250.160.0", "to": "46.250.191.255", "isp": "huawei", "loc":"mx"}, - {"from": "94.74.64.0", "to": "94.74.127.255", "isp": "huawei", "loc":"hk"}, - {"from": "101.0.0.0", "to": "119.255.255.255", "isp": "sinnet", "loc":"cn"}, - {"from": "111.119.192.0", "to": "111.119.255.255", "isp": "huawei", "loc":"sg"}, - {"from": "119.0.0.0", "to": "101.207.255.255", "isp": "unicom", "loc":"cn"}, - {"from": "121.91.168.0", "to": "121.91.175.255", "isp": "huawei", "loc":"hk"}, - {"from": "122.8.0.0", "to": "122.8.255.255", "isp": "cnisp", "loc":"cn"}, - {"from": "122.9.0.0", "to": "122.9.255.255", "isp": "huawei", "loc":"cn"}, - {"from": "124.243.128.0", "to": "124.243.191.255", "isp": "huawei", "loc":"sg"}, - {"from": "150.40.128.0", "to": "150.40.255.255", "isp": "huawei", "loc":"hk"}, - {"from": "159.138.0.0", "to": "159.138.225.255", "isp": "huawei", "loc":"th"}, - {"from": "162.128.0.2505", "to": "162.128.127.255", "isp": "zenlayer", "loc":"sg"}, - {"from": "166.108.192.0", "to": "166.108.255.255", "isp": "huawei", "loc":"sg"}, - {"from": "177.0.0.0", "to": "177.223.255.255", "isp": "ths", "loc":"br"}, - {"from": "183.87.32.0", "to": "183.87.159.255", "isp": "huawei", "loc":"hk"}, - {"from": "187.180.0.0", "to": "187.183.255.255", "isp": "claro", "loc":"br"}, - {"from": "188.239.0.0", "to": "188.239.63.255", "isp": "huawei", "loc":"sg"}, - {"from": "189.1.192.0", "to": "189.1.255.255", "isp": "huawei", "loc":"hk"}, - {"from": "189.76.0.0", "to": "189.127.255.255", "isp": "hardonline", "loc":"br"}, - {"from": "190.92.192.0", "to": "190.92.255.255", "isp": "huawei", "loc":"hk"}, - {"from": "2804:0:0:0:0:0:0:0", "to": "2804:FFFF:FFFF:FFFF:FFFF:FFFF:FFFF:FFFF", "isp": "inspire", "loc":"br"} + {"from": "8.127.0.0", "to": "8.223.255.255", "isp": "Alibaba", "loc":"cn"}, + {"from": "24.240.0.0", "to": "24.243.255.255", "isp": "Charter", "loc":"us"}, + {"from": "27.106.0.0", "to": "27.106.127.255", "isp": "Huawei", "loc":"hk"}, + {"from": "34.0.0.0", "to": "34.191.255.255", "isp": "Google LLC", "loc":"zz"}, + {"from": "45.224.0.0", "to": "45.227.195.255", "isp": "Various BR ISPs", "loc": "br"}, + {"from": "46.250.160.0", "to": "46.250.191.255", "isp": "Huawei", "loc":"mx"}, + {"from": "49.0.200.0", "to": "49.0.255.255", "isp": "Huawei", "loc":"sg"}, + {"from": "94.74.64.0", "to": "94.74.127.255", "isp": "Huawei", "loc":"hk"}, + {"from": "101.0.0.0", "to": "119.255.255.255", "isp": "SinNet", "loc":"cn"}, + {"from": "111.119.192.0", "to": "111.119.255.255", "isp": "Huawei", "loc":"sg"}, + {"from": "119.0.0.0", "to": "101.207.255.255", "isp": "Unicom", "loc":"cn"}, + {"from": "121.91.168.0", "to": "121.91.175.255", "isp": "Huawei", "loc":"hk"}, + {"from": "122.8.0.0", "to": "122.8.255.255", "isp": "CN-ISP", "loc":"cn"}, + {"from": "122.9.0.0", "to": "122.9.255.255", "isp": "Huawei", "loc":"cn"}, + {"from": "124.243.128.0", "to": "124.243.191.255", "isp": "Huawei", "loc":"sg"}, + {"from": "150.40.128.0", "to": "150.40.255.255", "isp": "Huawei", "loc":"hk"}, + {"from": "159.138.0.0", "to": "159.138.225.255", "isp": "Huawei", "loc":"th"}, + {"from": "162.128.0.0", "to": "162.128.255.255", "isp": "Zenlayer", "loc":"sg"}, + {"from": "166.108.192.0", "to": "166.108.255.255", "isp": "Huawei", "loc":"sg"}, + {"from": "177.0.0.0", "to": "177.223.255.255", "isp": "THS", "loc":"br"}, + {"from": "183.87.32.0", "to": "183.87.159.255", "isp": "Huawei", "loc":"hk"}, + {"from": "187.180.0.0", "to": "187.183.255.255", "isp": "Claro", "loc":"br"}, + {"from": "188.239.0.0", "to": "188.239.63.255", "isp": "Huawei", "loc":"sg"}, + {"from": "189.1.192.0", "to": "189.1.255.255", "isp": "Huawei", "loc":"hk"}, + {"from": "189.76.0.0", "to": "189.127.255.255", "isp": "Hard Online", "loc":"br"}, + {"from": "190.92.192.0", "to": "190.92.255.255", "isp": "Huawei", "loc":"hk"}, + {"from": "2804:0:0:0:0:0:0:0", "to": "2804:FFFF:FFFF:FFFF:FFFF:FFFF:FFFF:FFFF", "isp": "Inspire", "loc":"br"}, + {"from": "2600:1f00:0000:0000:0000:0000:0000:0000", "to": "2600:1fff:ffff:ffff:ffff:ffff:ffff:ffff", "isp": "Amazon Cloud", "loc":"us"}, + {"from": "2001:4800:0000:0000:0000:0000:0000:0000", "to": "2001:4fff:ffff:ffff:ffff:ffff:ffff:ffff", "isp": "Rackspace/Google", "loc":"us"} ] } \ No newline at end of file diff --git a/img/fileview.svg b/img/fileview.svg new file mode 100644 index 0000000..abac089 --- /dev/null +++ b/img/fileview.svg @@ -0,0 +1 @@ +View only \ No newline at end of file diff --git a/plugin.info.txt b/plugin.info.txt index 1505946..3c60c14 100644 --- a/plugin.info.txt +++ b/plugin.info.txt @@ -1,7 +1,7 @@ base botmon author Sascha Leib email ad@hominem.com -date 2025-09-06 +date 2025-09-07 name Bot Monitoring desc Live monitoring of bot traffic on your DokuWiki instance (under development) url https://www.dokuwiki.org/plugin:botmon diff --git a/script.js b/script.js index e64d12d..a7c7b9c 100644 --- a/script.js +++ b/script.js @@ -100,6 +100,40 @@ const BotMon = { } else { /* IP4 */ return Number(ip.split('.').map(d => ('000'+d).slice(-3) ).join('')); } + }, + + /* helper function to format a Date object to show only the time. */ + /* returns String */ + _formatTime: function(date) { + + if (date) { + return ('0'+date.getHours()).slice(-2) + ':' + ('0'+date.getMinutes()).slice(-2) + ':' + ('0'+date.getSeconds()).slice(-2); + } else { + return null; + } + + }, + + /* helper function to show a time difference in seconds or minutes */ + /* returns String */ + _formatTimeDiff: function(dateA, dateB) { + + // if the second date is ealier, swap them: + if (dateA > dateB) dateB = [dateA, dateA = dateB][0]; + + // get the difference in milliseconds: + let ms = dateB - dateA; + + if (ms > 50) { /* ignore small time spans */ + const h = Math.floor((ms / (1000 * 60 * 60)) % 24); + const m = Math.floor((ms / (1000 * 60)) % 60); + const s = Math.floor((ms / 1000) % 60); + + return ( h>0 ? h + 'h ': '') + ( m>0 ? m + 'm ': '') + ( s>0 ? s + 's': ''); + } + + return null; + } } }; @@ -208,38 +242,40 @@ BotMon.live = { // shortcut to make code more readable: const model = BotMon.live.data.model; + const timeout = 60 * 60 * 1000; /* session timeout: One hour */ + // loop over all visitors already registered: for (let i=0; i nv.ts) { visitor._firstSeen = nv.ts; } } @@ -306,6 +342,7 @@ BotMon.live = { prereg._lastSeen = nv.ts; // increase view count: prereg._viewCount += 1; + prereg._tickCount += 1; } // update referrer state: @@ -336,7 +373,9 @@ BotMon.live = { } if (visitor) { - visitor._lastSeen = dat.ts; + if (visitor._lastSeen < dat.ts) { + visitor._lastSeen = dat.ts; + } if (!visitor._seenBy.includes(type)) { visitor._seenBy.push(type); } @@ -355,6 +394,7 @@ BotMon.live = { prereg = model._makePageView(dat, type); visitor._pageViews.push(prereg); } + prereg._tickCount += 1; }, // updating visit data from the ticker log: @@ -380,7 +420,7 @@ BotMon.live = { // get the page view info: let pv = model._getPageView(visitor, dat); if (!pv) { - console.warn(`No page view for visit ID ${dat.id}, page ${dat.pg}, registering a new one.`); + console.warn(`No page view for visit ID “${dat.id}”, page “${dat.pg}”, registering a new one.`); pv = model._makePageView(dat, type); visitor._pageViews.push(pv); } @@ -451,6 +491,7 @@ BotMon.live = { // shortcut to make code more readable: const model = BotMon.live.data.model; + const me = BotMon.live.data.analytics; BotMon.live.gui.status.showBusy("Analysing data …"); @@ -481,6 +522,11 @@ BotMon.live = { v._eval = e.rules; v._botVal = e.val; + // add each page view to IP range information (unless it is already from a known bot IP range): + v._pageViews.forEach( pv => { + me._addToIPRanges(pv.ip); + }); + if (e.isBot) { // likely bots v._type = BM_USERTYPE.LIKELY_BOT; this.data.bots.suspected += v._pageViews.length; @@ -496,9 +542,55 @@ BotMon.live = { }); BotMon.live.gui.status.hideBusy('Done.'); + console.log(BotMon.live.data.analytics._ipRange); + + }, + + // visits from IP ranges: + _ipRange: { + ip4: [], + ip6: [] + }, + /** + * Adds a visit to the IP range statistics. + * + * This helps to identify IP ranges that are used by bots. + * + * @param {string} ip The IP address to add. + */ + _addToIPRanges: function(ip) { + + const me = BotMon.live.data.analytics; + const ipv = (ip.indexOf(':') > 0 ? 6 : 4); + + const ipArr = ip.split( ipv == 6 ? ':' : '.'); + const maxSegments = (ipv == 6 ? 4 : 3); + + let arr = (ipv == 6 ? me._ipRange.ip6 : me._ipRange.ip4); + let it = null; + for (let i = 0; i < Math.min(ipArr.length, maxSegments); i++) { + it = arr.find( a => { a.seg == ipArr[i]; } ); + if (!it) { + it = {seg: ipArr[i], count: 1}; + if (i= minItems) { + //console.log('loadSpeed', visitor._pageViews.length, minItems, maxTime); + + const pvArr = visitor._pageViews.map(pv => pv._lastSeen).sort(); + + let totalTime = 0; + for (let i=1; i < pvArr.length; i++) { + totalTime += (pvArr[i] - pvArr[i-1]); + } + + //console.log(' ', totalTime , Math.round(totalTime / (pvArr.length * 1000)), (( totalTime / pvArr.length ) <= maxTime * 1000), visitor.ip); + + return (( totalTime / pvArr.length ) <= maxTime * 1000); + } } }, @@ -1051,7 +1159,7 @@ BotMon.live = { for (let i=0; i < Math.min(bots.length, 4); i++) { const dd = makeElement('dd'); - dd.appendChild(makeElement('span', {'class': 'bot bot_' + bots[i]._bot.id}, bots[i]._bot.n)); + dd.appendChild(makeElement('span', {'class': 'bot bot_' + bots[i]._bot.id }, bots[i]._bot.n)); dd.appendChild(makeElement('strong', undefined, bots[i]._pageViews.length)); block.appendChild(dd); } @@ -1295,7 +1403,7 @@ BotMon.live = { dl.appendChild(make('dd', {'class': 'has_icon ip' + data.typ}, data.id)); } - if ((data._lastSeen - data._firstSeen) < 1) { + if (Math.abs(data._lastSeen - data._firstSeen) < 100) { dl.appendChild(make('dt', {}, "Seen:")); dl.appendChild(make('dd', {'class': 'seen'}, data._firstSeen.toLocaleString())); } else { @@ -1339,16 +1447,28 @@ BotMon.live = { }, "No referer")); } pgLi.appendChild(make('span', {}, ( page._seenBy ? page._seenBy.join(', ') : '—') + '; ' + page._tickCount)); - pgLi.appendChild(make('span', {}, page._firstSeen.toLocaleString())); - pgLi.appendChild(make('span', {}, page._lastSeen.toLocaleString())); + pgLi.appendChild(make('span', {}, BotMon.t._formatTime(page._firstSeen))); + + // get the time difference: + const tDiff = BotMon.t._formatTimeDiff(page._firstSeen, page._lastSeen); + if (tDiff) { + pgLi.appendChild(make('span', {'class': 'visit-length', 'title': 'Last seen: ' + page._lastSeen.toLocaleString()}, tDiff)); + } else { + pgLi.appendChild(make('span', {'class': 'bounce'}, "Bounce")); + } + pageList.appendChild(pgLi); }); pagesDd.appendChild(pageList); dl.appendChild(pagesDd); - /* add bot evaluation: */ + /* bot evaluation rating */ + dl.appendChild(make('dt', undefined, "Bot rating:")); + dl.appendChild(make('dd', {'class': 'bot-rating'}, data._botVal + '/' + BotMon.live.data.rules._threshold )); + + /* add bot evaluation details: */ if (data._eval) { - dl.appendChild(make('dt', {}, "Evaluation:")); + dl.appendChild(make('dt', {}, "Bot evaluation details:")); const evalDd = make('dd'); const testList = make('ul',{ 'class': 'eval' @@ -1379,9 +1499,9 @@ BotMon.live = { const tst2Li = make('li', { 'class': 'total' }); - tst2Li.appendChild(make('span', {}, "Total:")); + /*tst2Li.appendChild(make('span', {}, "Total:")); tst2Li.appendChild(make('span', {}, data._botVal)); - testList.appendChild(tst2Li); + testList.appendChild(tst2Li);*/ evalDd.appendChild(testList); dl.appendChild(evalDd); diff --git a/style.less b/style.less index 86bd864..bbd9d97 100644 --- a/style.less +++ b/style.less @@ -193,9 +193,14 @@ white-space: nowrap; } dd { - grid-column: 2; - display: inline-block; - background-color: transparent; + & { + grid-column: 2; + display: inline-block; + background-color: transparent; + } + &.bot-rating { + text-align: right; + } } dd.pages { & { @@ -207,6 +212,22 @@ justify-content: space-between; align-items: center; } + span { + &.visit-length { + min-width: min-content; + } + &.bounce { + width: 1.25em; height: 1.25em; + overflow: hidden; + } + &.bounce::before { + display: inline-block; + content: ''; + width: 1.25em; height: 1.25em; + background: transparent url('img/bounce.svg') center no-repeat; + background-size: 1.25em; + } + } } } }