Bot IP ranges overview and cleanups

This commit is contained in:
Sascha Leib
2025-09-10 00:02:42 +02:00
parent 43b05c9311
commit 0c03961572
8 changed files with 214 additions and 91 deletions

View File

@@ -35,30 +35,76 @@ class admin_plugin_botmon extends AdminPlugin {
$pluginPath = $conf['basedir'] . 'lib/plugins/' . $this->getPluginName();
/* Plugin Headline */
echo '<div id="botmon__admin">';
echo '<h1>Bot Monitoring Plugin</h1>';
echo '<div id="botmon__admin">
<h1>Bot Monitoring Plugin</h1>
<nav id="botmon__tabs">
<ul class="tabs" role="tablist">
<li role="presentation" class="active"><a role="tab" href="#botmon__panel1" aria-controls="botmon__panel1" id="botmon__tab1" aria-selected="true">Today</a></li>
</ul>
</nav>';
/* tab navigation */
echo '<nav id="botmon__tabs">';
echo '<ul class="tabs" role="tablist">';
echo '<li role="presentation" class="active">';
echo '<a role="tab" href="#botmon__panel1" aria-controls="botmon__panel1" id="botmon__tab1" aria-selected="true">Today</a></li>';
echo '</ul></nav>';
if ($this->hasOldLogFiles()) {
echo '<div class="info"><strong>Note:</strong> There are old log files that can be deleted. <a href="' . $pluginPath . '/cleanup.php" target="_blank">Click here</a> to run a delete script, or use <em>cron</em> to automatically delete them.</div>';
}
// Beta warning message:
echo '<div class="info"><strong>Please note:</strong> This plugin is still in the early stages of development and does not (yet) clean up its <code>logs</code> directory.<br>You can clean up the old log files by <a href="' . $pluginPath . '/cleanup.php" target="_blank">clicking here</a>, or by adding the cleanup script to your cron jobs.</div>';
/* Live tab */
echo '<article role="tabpanel" id="botmon__today"">';
echo '<h2 class="a11y">Today</h2>';
echo '<header id="botmon__today__title">Loading&nbsp;&hellip;</header>';
echo '<div id="botmon__today__content">';
echo '<details id="botmon__today__visitors"><summary>Visitor logs</summary>';
echo '<div id="botmon__today__visitorlists"></div>';
echo '</details></div>';
echo '<footer aria-live="polite"><img src="' . $pluginPath . '/img/spinner.svg" id="botmon__today__busy" width="12" height="12" alt="busy indicator"><span id="botmon__today__status">Initialising&nbsp;&hellip;</span></footer>';
echo '</article>';
echo '</div><!-- End of BotMon Admin Tool -->';
echo '<article role="tabpanel" id="botmon__today"">
<h2 class="a11y">Today</h2>
<header id="botmon__today__title">Loading&nbsp;&hellip;</header>
<div id="botmon__today__content">
<details id="botmon__today__overview" open>
<summary>Bot overview</summary>
<div class="grid-3-columns">
<dl id="botmon__today__botsvshumans"></dl>
<dl id="botmon__botslist"></dl>
<dl id="botmon__today__botips"></dl>
</div>
</details>
<details id="botmon__today__webmetrics">
<summary>Web metrics</summary>
<div class="grid-3-columns">
<dl id="botmon__today__wm_overview"></dl>
<dl></dl>
<dl></dl>
</div>
</details>
<details id="botmon__today__visitors">
<summary>Visitor logs</summary>
<div id="botmon__today__visitorlists"></div>
</details>
</div>
<footer aria-live="polite">
<img src="' . $pluginPath . '/img/spinner.svg" id="botmon__today__busy" width="12" height="12" alt="busy indicator">
<span id="botmon__today__status">Initialising&nbsp;&hellip;</span>
</footer>
</article>
</div><!-- End of BotMon Admin Tool -->';
}
/**
* Check if there are old log files that can be deleted.
*
* @return bool true if there are old log files, false otherwise
*/
private function hasOldLogFiles() {
$today = gmdate('Y-m-d');
$yesterday = gmdate('Y-m-d', time() - 86400);
// scan the log directory and delete all files except for today and yesterday:
$dir = scandir(getcwd() . '/lib/plugins/botmon/logs');
foreach($dir as $file) {
$fName = pathinfo($file, PATHINFO_BASENAME);
$bName = strtok($fName, '.');
if ($bName == '' || $bName == 'logfiles') {
// ignore
} else if ($bName == $today || $bName == $yesterday) {
// skip
} else {
return true;
}
}
return false;
}
}

View File

@@ -22,4 +22,5 @@ foreach($dir as $file) {
echo " File “{$fName}” could not be deleted!\n";
}
}
}
}
echo "Done.\n";

View File

@@ -29,9 +29,9 @@
"id": "noRefs", "desc": "No referer field",
"bot": 30
},
{"func": "matchLang", "params": ["en"],
"id": "langMatch", "desc": "Clients Accept-Language header does not match the page language (except English pages)",
"bot": 20
{"func": "matchLang", "params": [],
"id": "langMatch", "desc": "Clients Accept-Language header does not match the page language",
"bot": 30
},
{"func": "matchesClient", "params": ["brave"],
"id": "susClient", "desc": "Client identifier that is popular with bot networks",
@@ -54,12 +54,12 @@
"bot": 80
},
{"func": "noAcceptLang",
"id": "noAcc", "desc": "Client has no, or an empty, “Accept-Language” header",
"id": "noAcc", "desc": "No “Accept-Language” header",
"bot": 40
}
],
"ipRanges": [
{"from": "3.0.0.0", "to": "3.255.255.255", "label": "Amazon Data Services [US]"},
{"from": "3.0.0.0", "to": "3.255.255.254", "label": "Amazon Data Services [US]"},
{"from": "8.127.0.0", "to": "8.223.255.254", "label": "Alibaba [CN]"},
{"from": "24.240.0.0", "to": "24.243.255.254", "label": "Charter [US]"},
{"from": "27.106.0.0", "to": "27.106.127.254", "label": "Huawei [US]"},
@@ -70,8 +70,8 @@
{"from": "84.37.35.0", "to": "84.37.255.254", "label": "GTT.net [US]"},
{"from": "94.74.64.0", "to": "94.74.127.254", "label": "Huawei [HK]"},
{"from": "101.0.0.0", "to": "101.255.255.254", "label": "ChinaNet [CN]"},
{"from": "110.238.96.0", "to": "110.238.127.255", "label": "Huawei [SG]"},
{"from": "111.119.192.0", "to": "111.119.255.255", "label": "Huawei [SG]"},
{"from": "110.238.96.0", "to": "110.238.127.254", "label": "Huawei [SG]"},
{"from": "111.119.192.0", "to": "111.119.255.254", "label": "Huawei [SG]"},
{"from": "119.0.0.0", "to": "101.207.255.254", "label": "Unicom [CN]"},
{"from": "121.91.168.", "to": "121.91.175.254", "label": "Huawei [HK]"},
{"from": "122.8.0.0", "to": "122.8.255.254", "label": "CN-ISP [CN]"},
@@ -82,14 +82,17 @@
{"from": "159.138.0.0", "to": "159.138.225.254", "label": "Huawei [TH]"},
{"from": "162.128.0.0", "to": "162.128.255.254", "label": "Zenlayer [SG]"},
{"from": "166.108.192.0", "to": "166.108.255.254", "label": "Huawei [SG]"},
{"from": "177.0.0.0", "to": "177.255.255.255", "label": "BrasilNET [BR]"},
{"from": "177.0.0.0", "to": "177.255.255.254", "label": "BrasilNET [BR]"},
{"from": "179.0.0.0", "to": "179.255.255.254", "label": "BrasilNET [BR]"},
{"from": "183.87.32.0", "to": "183.87.159.254", "label": "Huawei [HK]"},
{"from": "187.180.0.0", "to": "187.183.255.254", "label": "Claro [BR]"},
{"from": "188.239.0.0", "to": "188.239.63.254", "label": "Huawei [SG]"},
{"from": "189.127.255.255", "to": "189.127.255.255", "label": "Huawei BR/HK"},
{"from": "190.92.192.0", "to": "190.92.255.254", "label": "Huawei [HK]"},
{"from": "186.0.0.0", "to": "186.255.255.254", "label": "South-American ISPs (186.x)"},
{"from": "187.0.0.0", "to": "187.255.255.254", "label": "South-American ISPs (187.x)"},
{"from": "188.0.0.0", "to": "188.255.255.254", "label": "South-American ISPs (188.x)"},
{"from": "189.0.0.0", "to": "189.255.255.254", "label": "South-American ISPs (189.x)"},
{"from": "190.0.0.0", "to": "190.255.255.254", "label": "South-American ISPs (190.x)"},
{"from": "192.124.170.0", "to": "192.124.182.254", "label": "Relcom [CZ]"},
{"from": "2001:4800::::::", "to": "2001:4fff:ffff:ffff:ffff:ffff:ffff:ffff", "label": "Rackspace/Google [US]"},
{"from": "2001:0ee0::::::", "to": "2001:ee3:ffff:ffff:ffff:ffff:ffff:ffff", "mask": 30, "label": "VNPT [VN]"},
{"from": "2600:1f00::::::", "to": "2600:1fff:ffff:ffff:ffff:ffff:ffff:ffff", "label": "Amazon Cloud [US]"},
{"from": "2804:::::::", "to": "2804:FFFF:FFFF:FFFF:FFFF:FFFF:FFFF:FFFF", "label": "Inspire [BR]"},
{"from": "2a0a:4cc0::::::", "to": "2a0a:4cc0:FFFF:FFFF:FFFF:FFFF:FFFF:FFFF", "label": "Netcup [DE]"}

View File

@@ -1,7 +1,7 @@
base botmon
author Sascha Leib
email ad@hominem.com
date 2025-09-08
date 2025-09-09
name Bot Monitoring
desc Live monitoring of bot traffic on your DokuWiki instance (under development)
url https://www.dokuwiki.org/plugin:botmon

183
script.js
View File

@@ -541,10 +541,6 @@ BotMon.live = {
}
});
// clean up the ip ranges:
me._cleanIPRanges();
console.log(BotMon.live.data.analytics._ipRange);
BotMon.live.gui.status.hideBusy('Done.');
},
@@ -562,6 +558,7 @@ BotMon.live = {
*/
_addToIPRanges: function(ip) {
// #TODO: handle nestled ranges!
const me = BotMon.live.data.analytics;
const ipv = (ip.indexOf(':') > 0 ? 6 : 4);
@@ -592,16 +589,39 @@ BotMon.live = {
}
},
_cleanIPRanges: function() {
const me = BotMon.live.data.analytics;
for (const [n, arr] of Object.entries(me._ipRange)) {
arr.forEach( (it, i) => {
if (it.count <= 1) arr.splice(i, 1);
});
getTopBotIPRanges: function(max) {
};
const me = BotMon.live.data.analytics;
const kMinHits = 2;
// combine the ip lists, removing all lower volume branches:
let ipTypes = [4,6];
const tmpList = [];
for (let i=0; i<ipTypes.length; i++) {
const ipType = ipTypes[i];
(ipType == 6 ? me._ipRange.ip6 : me._ipRange.ip4).forEach( it => {
if (it.count > kMinHits) {
it.type = ipType;
tmpList.push(it);
}
});
tmpList.sort( (a,b) => b.count - a.count);
}
// reduce to only the top (max) items and create the target format:
// #TODO: handle nestled ranges!
let rList = [];
for (let j=0; Math.min(max, tmpList.length) > j; j++) {
const rangeInfo = tmpList[j];
rList.push({
'ip': rangeInfo.seg + ( rangeInfo.type == 4 ? '.x.x.x' : '::x'),
'typ': rangeInfo.type,
'num': rangeInfo.count
});
}
return rList;
}
},
@@ -612,7 +632,7 @@ BotMon.live = {
// Load the list of known bots:
BotMon.live.gui.status.showBusy("Loading known bots …");
const url = BotMon._baseDir + 'data/known-bots.json';
const url = BotMon._baseDir + 'config/known-bots.json';
try {
const response = await fetch(url);
if (!response.ok) {
@@ -687,7 +707,7 @@ BotMon.live = {
// Load the list of known bots:
BotMon.live.gui.status.showBusy("Loading known clients");
const url = BotMon._baseDir + 'data/known-clients.json';
const url = BotMon._baseDir + 'config/known-clients.json';
try {
const response = await fetch(url);
if (!response.ok) {
@@ -747,7 +767,7 @@ BotMon.live = {
// Load the list of known bots:
BotMon.live.gui.status.showBusy("Loading known platforms");
const url = BotMon._baseDir + 'data/known-platforms.json';
const url = BotMon._baseDir + 'config/known-platforms.json';
try {
const response = await fetch(url);
if (!response.ok) {
@@ -806,7 +826,7 @@ BotMon.live = {
// Load the list of known bots:
BotMon.live.gui.status.showBusy("Loading list of rules …");
const url = BotMon._baseDir + 'data/rules.json';
const url = BotMon._baseDir + 'config/rules.json';
try {
const response = await fetch(url);
if (!response.ok) {
@@ -1138,52 +1158,105 @@ BotMon.live = {
make: function() {
const data = BotMon.live.data.analytics.data;
const parent = document.getElementById('botmon__today__content');
// shortcut for neater code:
const makeElement = BotMon.t._makeElement;
if (parent) {
const botsVsHumans = document.getElementById('botmon__today__botsvshumans');
if (botsVsHumans) {
botsVsHumans.appendChild(makeElement('dt', {}, "Bots vs. Humans (page views)"));
const bounceRate = Math.round(data.totalVisits / data.totalPageViews * 100);
jQuery(parent).prepend(jQuery(`
<details id="botmon__today__overview" open>
<summary>Overview</summary>
<div class="grid-3-columns">
<dl>
<dt>Web metrics</dt>
<dd><span>Total page views:</span><strong>${data.totalPageViews}</strong></dd>
<dd><span>Total visitors (est.):</span><span>${data.totalVisits}</span></dd>
<dd><span>Bounce rate (est.):</span><span>${bounceRate}%</span></dd>
</dl>
<dl>
<dt>Bots vs. Humans (page views)</dt>
<dd><span>Registered users:</span><strong>${data.bots.users}</strong></dd>
<dd><span>Probably humans:</span><strong>${data.bots.human}</strong></dd>
<dd><span>Suspected bots:</span><strong>${data.bots.suspected}</strong></dd>
<dd><span>Known bots:</span><strong>${data.bots.known}</strong></dd>
</dl>
<dl id="botmon__botslist"></dl>
</div>
</details>
`));
// update known bots list:
const block = document.getElementById('botmon__botslist');
block.innerHTML = "<dt>Top known bots (page views)</dt>";
let bots = BotMon.live.data.analytics.groups.knownBots.toSorted( (a, b) => {
return b._pageViews.length - a._pageViews.length;
});
for (let i=0; i < Math.min(bots.length, 4); i++) {
for (let i = 3; i >= 0; i--) {
const dd = makeElement('dd');
dd.appendChild(makeElement('span', {'class': 'bot bot_' + bots[i]._bot.id }, bots[i]._bot.n));
dd.appendChild(makeElement('strong', undefined, bots[i]._pageViews.length));
block.appendChild(dd);
let title = '';
let value = '';
switch(i) {
case 0:
title = "Registered users:";
value = data.bots.users;
break;
case 1:
title = "Probably humans:";
value = data.bots.human;
break;
case 2:
title = "Suspected bots:";
value = data.bots.suspected;
break;
case 3:
title = "Known bots:";
value = data.bots.known;
break;
default:
console.warn(`Unknown list type ${i}.`);
}
dd.appendChild(makeElement('span', {}, title));
dd.appendChild(makeElement('strong', {}, value));
botsVsHumans.appendChild(dd);
}
}
// update known bots list:
const botlist = document.getElementById('botmon__botslist');
botlist.innerHTML = "<dt>Top 5 known bots (page views)</dt>";
let bots = BotMon.live.data.analytics.groups.knownBots.toSorted( (a, b) => {
return b._pageViews.length - a._pageViews.length;
});
for (let i=0; i < Math.min(bots.length, 5); i++) {
const dd = makeElement('dd');
dd.appendChild(makeElement('span', {'class': 'bot bot_' + bots[i]._bot.id }, bots[i]._bot.n));
dd.appendChild(makeElement('strong', undefined, bots[i]._pageViews.length));
botlist.appendChild(dd);
}
// update the suspected bot IP ranges list:
const botIps = document.getElementById('botmon__today__botips');
if (botIps) {
botIps.appendChild(makeElement('dt', {}, "Top 5 suspected bots IP ranges"));
const ipList = BotMon.live.data.analytics.getTopBotIPRanges(5);
ipList.forEach( (ipInfo) => {
const li = makeElement('dd');
li.appendChild(makeElement('span', {'class': 'ip ip' + ipInfo.typ }, ipInfo.ip));
li.appendChild(makeElement('span', {'class': 'count' }, ipInfo.num));
botIps.append(li)
})
}
// update the webmetrics overview:
const wmoverview = document.getElementById('botmon__today__wm_overview');
if (wmoverview) {
const bounceRate = Math.round(data.totalVisits / data.totalPageViews * 100);
wmoverview.appendChild(makeElement('dt', {}, "Overview"));
for (let i = 0; i < 3; i++) {
const dd = makeElement('dd');
let title = '';
let value = '';
switch(i) {
case 0:
title = "Total page views:";
value = data.totalPageViews;
break;
case 1:
title = "Total visitors (est.):";
value = data.totalVisits;
break;
case 2:
title = "Bounce rate (est.):";
value = bounceRate + '%';
break;
default:
console.warn(`Unknown list type ${i}.`);
}
dd.appendChild(makeElement('span', {}, title));
dd.appendChild(makeElement('strong', {}, value));
wmoverview.appendChild(dd);
}
}
}
},