diff --git a/admin.php b/admin.php index 9ce6a51..10d9dec 100644 --- a/admin.php +++ b/admin.php @@ -35,30 +35,76 @@ class admin_plugin_botmon extends AdminPlugin { $pluginPath = $conf['basedir'] . 'lib/plugins/' . $this->getPluginName(); /* Plugin Headline */ - echo '
'; - echo '

Bot Monitoring Plugin

'; + echo '
+

Bot Monitoring Plugin

+ '; - /* tab navigation */ - echo ''; + if ($this->hasOldLogFiles()) { + echo '
Note: There are old log files that can be deleted. Click here to run a delete script, or use cron to automatically delete them.
'; + } - // Beta warning message: - echo '
Please note: This plugin is still in the early stages of development and does not (yet) clean up its logs directory.
You can clean up the old log files by clicking here, or by adding the cleanup script to your cron jobs.
'; - - /* Live tab */ - echo '
'; - echo '

Today

'; - echo '
Loading …
'; - echo '
'; - echo '
Visitor logs'; - echo '
'; - echo '
'; - echo '
busy indicatorInitialising …
'; - echo '
'; - echo '
'; + echo '
+

Today

+
Loading …
+
+
+ Bot overview +
+
+
+
+
+
+
+ Web metrics +
+
+
+
+
+
+
+ Visitor logs +
+
+
+ +
+
'; } + + /** + * Check if there are old log files that can be deleted. + * + * @return bool true if there are old log files, false otherwise + */ + private function hasOldLogFiles() { + + $today = gmdate('Y-m-d'); + $yesterday = gmdate('Y-m-d', time() - 86400); + + // scan the log directory and delete all files except for today and yesterday: + $dir = scandir(getcwd() . '/lib/plugins/botmon/logs'); + foreach($dir as $file) { + $fName = pathinfo($file, PATHINFO_BASENAME); + $bName = strtok($fName, '.'); + + if ($bName == '' || $bName == 'logfiles') { + // ignore + } else if ($bName == $today || $bName == $yesterday) { + // skip + } else { + return true; + } + } + return false; + } } \ No newline at end of file diff --git a/cleanup.php b/cleanup.php index ea97973..a931512 100644 --- a/cleanup.php +++ b/cleanup.php @@ -22,4 +22,5 @@ foreach($dir as $file) { echo " File “{$fName}” could not be deleted!\n"; } } -} \ No newline at end of file +} +echo "Done.\n"; \ No newline at end of file diff --git a/data/known-bots.json b/config/known-bots.json similarity index 100% rename from data/known-bots.json rename to config/known-bots.json diff --git a/data/known-clients.json b/config/known-clients.json similarity index 100% rename from data/known-clients.json rename to config/known-clients.json diff --git a/data/known-platforms.json b/config/known-platforms.json similarity index 100% rename from data/known-platforms.json rename to config/known-platforms.json diff --git a/data/rules.json b/config/rules.json similarity index 79% rename from data/rules.json rename to config/rules.json index 27dc207..06c3560 100644 --- a/data/rules.json +++ b/config/rules.json @@ -29,9 +29,9 @@ "id": "noRefs", "desc": "No referer field", "bot": 30 }, - {"func": "matchLang", "params": ["en"], - "id": "langMatch", "desc": "Client’s ‘Accept-Language’ header does not match the page language (except English pages)", - "bot": 20 + {"func": "matchLang", "params": [], + "id": "langMatch", "desc": "Client’s ‘Accept-Language’ header does not match the page language", + "bot": 30 }, {"func": "matchesClient", "params": ["brave"], "id": "susClient", "desc": "Client identifier that is popular with bot networks", @@ -54,12 +54,12 @@ "bot": 80 }, {"func": "noAcceptLang", - "id": "noAcc", "desc": "Client has no, or an empty, “Accept-Language” header", + "id": "noAcc", "desc": "No “Accept-Language” header", "bot": 40 } ], "ipRanges": [ - {"from": "3.0.0.0", "to": "3.255.255.255", "label": "Amazon Data Services [US]"}, + {"from": "3.0.0.0", "to": "3.255.255.254", "label": "Amazon Data Services [US]"}, {"from": "8.127.0.0", "to": "8.223.255.254", "label": "Alibaba [CN]"}, {"from": "24.240.0.0", "to": "24.243.255.254", "label": "Charter [US]"}, {"from": "27.106.0.0", "to": "27.106.127.254", "label": "Huawei [US]"}, @@ -70,8 +70,8 @@ {"from": "84.37.35.0", "to": "84.37.255.254", "label": "GTT.net [US]"}, {"from": "94.74.64.0", "to": "94.74.127.254", "label": "Huawei [HK]"}, {"from": "101.0.0.0", "to": "101.255.255.254", "label": "ChinaNet [CN]"}, - {"from": "110.238.96.0", "to": "110.238.127.255", "label": "Huawei [SG]"}, - {"from": "111.119.192.0", "to": "111.119.255.255", "label": "Huawei [SG]"}, + {"from": "110.238.96.0", "to": "110.238.127.254", "label": "Huawei [SG]"}, + {"from": "111.119.192.0", "to": "111.119.255.254", "label": "Huawei [SG]"}, {"from": "119.0.0.0", "to": "101.207.255.254", "label": "Unicom [CN]"}, {"from": "121.91.168.", "to": "121.91.175.254", "label": "Huawei [HK]"}, {"from": "122.8.0.0", "to": "122.8.255.254", "label": "CN-ISP [CN]"}, @@ -82,14 +82,17 @@ {"from": "159.138.0.0", "to": "159.138.225.254", "label": "Huawei [TH]"}, {"from": "162.128.0.0", "to": "162.128.255.254", "label": "Zenlayer [SG]"}, {"from": "166.108.192.0", "to": "166.108.255.254", "label": "Huawei [SG]"}, - {"from": "177.0.0.0", "to": "177.255.255.255", "label": "BrasilNET [BR]"}, + {"from": "177.0.0.0", "to": "177.255.255.254", "label": "BrasilNET [BR]"}, + {"from": "179.0.0.0", "to": "179.255.255.254", "label": "BrasilNET [BR]"}, {"from": "183.87.32.0", "to": "183.87.159.254", "label": "Huawei [HK]"}, - {"from": "187.180.0.0", "to": "187.183.255.254", "label": "Claro [BR]"}, - {"from": "188.239.0.0", "to": "188.239.63.254", "label": "Huawei [SG]"}, - {"from": "189.127.255.255", "to": "189.127.255.255", "label": "Huawei BR/HK"}, - {"from": "190.92.192.0", "to": "190.92.255.254", "label": "Huawei [HK]"}, + {"from": "186.0.0.0", "to": "186.255.255.254", "label": "South-American ISPs (186.x)"}, + {"from": "187.0.0.0", "to": "187.255.255.254", "label": "South-American ISPs (187.x)"}, + {"from": "188.0.0.0", "to": "188.255.255.254", "label": "South-American ISPs (188.x)"}, + {"from": "189.0.0.0", "to": "189.255.255.254", "label": "South-American ISPs (189.x)"}, + {"from": "190.0.0.0", "to": "190.255.255.254", "label": "South-American ISPs (190.x)"}, {"from": "192.124.170.0", "to": "192.124.182.254", "label": "Relcom [CZ]"}, {"from": "2001:4800::::::", "to": "2001:4fff:ffff:ffff:ffff:ffff:ffff:ffff", "label": "Rackspace/Google [US]"}, + {"from": "2001:0ee0::::::", "to": "2001:ee3:ffff:ffff:ffff:ffff:ffff:ffff", "mask": 30, "label": "VNPT [VN]"}, {"from": "2600:1f00::::::", "to": "2600:1fff:ffff:ffff:ffff:ffff:ffff:ffff", "label": "Amazon Cloud [US]"}, {"from": "2804:::::::", "to": "2804:FFFF:FFFF:FFFF:FFFF:FFFF:FFFF:FFFF", "label": "Inspire [BR]"}, {"from": "2a0a:4cc0::::::", "to": "2a0a:4cc0:FFFF:FFFF:FFFF:FFFF:FFFF:FFFF", "label": "Netcup [DE]"} diff --git a/plugin.info.txt b/plugin.info.txt index 525b0b4..060d288 100644 --- a/plugin.info.txt +++ b/plugin.info.txt @@ -1,7 +1,7 @@ base botmon author Sascha Leib email ad@hominem.com -date 2025-09-08 +date 2025-09-09 name Bot Monitoring desc Live monitoring of bot traffic on your DokuWiki instance (under development) url https://www.dokuwiki.org/plugin:botmon diff --git a/script.js b/script.js index 75fa890..41d16e7 100644 --- a/script.js +++ b/script.js @@ -541,10 +541,6 @@ BotMon.live = { } }); - // clean up the ip ranges: - me._cleanIPRanges(); - console.log(BotMon.live.data.analytics._ipRange); - BotMon.live.gui.status.hideBusy('Done.'); }, @@ -562,6 +558,7 @@ BotMon.live = { */ _addToIPRanges: function(ip) { + // #TODO: handle nestled ranges! const me = BotMon.live.data.analytics; const ipv = (ip.indexOf(':') > 0 ? 6 : 4); @@ -592,16 +589,39 @@ BotMon.live = { } }, - _cleanIPRanges: function() { - const me = BotMon.live.data.analytics; - - for (const [n, arr] of Object.entries(me._ipRange)) { - - arr.forEach( (it, i) => { - if (it.count <= 1) arr.splice(i, 1); - }); + getTopBotIPRanges: function(max) { - }; + const me = BotMon.live.data.analytics; + + const kMinHits = 2; + + // combine the ip lists, removing all lower volume branches: + let ipTypes = [4,6]; + const tmpList = []; + for (let i=0; i { + if (it.count > kMinHits) { + it.type = ipType; + tmpList.push(it); + } + }); + tmpList.sort( (a,b) => b.count - a.count); + } + + // reduce to only the top (max) items and create the target format: + // #TODO: handle nestled ranges! + let rList = []; + for (let j=0; Math.min(max, tmpList.length) > j; j++) { + const rangeInfo = tmpList[j]; + rList.push({ + 'ip': rangeInfo.seg + ( rangeInfo.type == 4 ? '.x.x.x' : '::x'), + 'typ': rangeInfo.type, + 'num': rangeInfo.count + }); + } + + return rList; } }, @@ -612,7 +632,7 @@ BotMon.live = { // Load the list of known bots: BotMon.live.gui.status.showBusy("Loading known bots …"); - const url = BotMon._baseDir + 'data/known-bots.json'; + const url = BotMon._baseDir + 'config/known-bots.json'; try { const response = await fetch(url); if (!response.ok) { @@ -687,7 +707,7 @@ BotMon.live = { // Load the list of known bots: BotMon.live.gui.status.showBusy("Loading known clients"); - const url = BotMon._baseDir + 'data/known-clients.json'; + const url = BotMon._baseDir + 'config/known-clients.json'; try { const response = await fetch(url); if (!response.ok) { @@ -747,7 +767,7 @@ BotMon.live = { // Load the list of known bots: BotMon.live.gui.status.showBusy("Loading known platforms"); - const url = BotMon._baseDir + 'data/known-platforms.json'; + const url = BotMon._baseDir + 'config/known-platforms.json'; try { const response = await fetch(url); if (!response.ok) { @@ -806,7 +826,7 @@ BotMon.live = { // Load the list of known bots: BotMon.live.gui.status.showBusy("Loading list of rules …"); - const url = BotMon._baseDir + 'data/rules.json'; + const url = BotMon._baseDir + 'config/rules.json'; try { const response = await fetch(url); if (!response.ok) { @@ -1138,52 +1158,105 @@ BotMon.live = { make: function() { const data = BotMon.live.data.analytics.data; - const parent = document.getElementById('botmon__today__content'); // shortcut for neater code: const makeElement = BotMon.t._makeElement; - if (parent) { + const botsVsHumans = document.getElementById('botmon__today__botsvshumans'); + if (botsVsHumans) { + botsVsHumans.appendChild(makeElement('dt', {}, "Bots vs. Humans (page views)")); - const bounceRate = Math.round(data.totalVisits / data.totalPageViews * 100); - - jQuery(parent).prepend(jQuery(` -
- Overview -
-
-
Web metrics
-
Total page views:${data.totalPageViews}
-
Total visitors (est.):${data.totalVisits}
-
Bounce rate (est.):${bounceRate}%
-
-
-
Bots vs. Humans (page views)
-
Registered users:${data.bots.users}
-
Probably humans:${data.bots.human}
-
Suspected bots:${data.bots.suspected}
-
Known bots:${data.bots.known}
-
-
-
-
- `)); - - // update known bots list: - const block = document.getElementById('botmon__botslist'); - block.innerHTML = "
Top known bots (page views)
"; - - let bots = BotMon.live.data.analytics.groups.knownBots.toSorted( (a, b) => { - return b._pageViews.length - a._pageViews.length; - }); - - for (let i=0; i < Math.min(bots.length, 4); i++) { + for (let i = 3; i >= 0; i--) { const dd = makeElement('dd'); - dd.appendChild(makeElement('span', {'class': 'bot bot_' + bots[i]._bot.id }, bots[i]._bot.n)); - dd.appendChild(makeElement('strong', undefined, bots[i]._pageViews.length)); - block.appendChild(dd); + let title = ''; + let value = ''; + switch(i) { + case 0: + title = "Registered users:"; + value = data.bots.users; + break; + case 1: + title = "Probably humans:"; + value = data.bots.human; + break; + case 2: + title = "Suspected bots:"; + value = data.bots.suspected; + break; + case 3: + title = "Known bots:"; + value = data.bots.known; + break; + default: + console.warn(`Unknown list type ${i}.`); + } + dd.appendChild(makeElement('span', {}, title)); + dd.appendChild(makeElement('strong', {}, value)); + botsVsHumans.appendChild(dd); } } + + // update known bots list: + const botlist = document.getElementById('botmon__botslist'); + botlist.innerHTML = "
Top 5 known bots (page views)
"; + + let bots = BotMon.live.data.analytics.groups.knownBots.toSorted( (a, b) => { + return b._pageViews.length - a._pageViews.length; + }); + + for (let i=0; i < Math.min(bots.length, 5); i++) { + const dd = makeElement('dd'); + dd.appendChild(makeElement('span', {'class': 'bot bot_' + bots[i]._bot.id }, bots[i]._bot.n)); + dd.appendChild(makeElement('strong', undefined, bots[i]._pageViews.length)); + botlist.appendChild(dd); + } + + // update the suspected bot IP ranges list: + const botIps = document.getElementById('botmon__today__botips'); + if (botIps) { + botIps.appendChild(makeElement('dt', {}, "Top 5 suspected bots’ IP ranges")); + + const ipList = BotMon.live.data.analytics.getTopBotIPRanges(5); + ipList.forEach( (ipInfo) => { + const li = makeElement('dd'); + li.appendChild(makeElement('span', {'class': 'ip ip' + ipInfo.typ }, ipInfo.ip)); + li.appendChild(makeElement('span', {'class': 'count' }, ipInfo.num)); + botIps.append(li) + }) + } + + // update the webmetrics overview: + const wmoverview = document.getElementById('botmon__today__wm_overview'); + if (wmoverview) { + const bounceRate = Math.round(data.totalVisits / data.totalPageViews * 100); + + wmoverview.appendChild(makeElement('dt', {}, "Overview")); + for (let i = 0; i < 3; i++) { + const dd = makeElement('dd'); + let title = ''; + let value = ''; + switch(i) { + case 0: + title = "Total page views:"; + value = data.totalPageViews; + break; + case 1: + title = "Total visitors (est.):"; + value = data.totalVisits; + break; + case 2: + title = "Bounce rate (est.):"; + value = bounceRate + '%'; + break; + default: + console.warn(`Unknown list type ${i}.`); + } + dd.appendChild(makeElement('span', {}, title)); + dd.appendChild(makeElement('strong', {}, value)); + wmoverview.appendChild(dd); + } + } + } },