Bot IP ranges overview and cleanups
This commit is contained in:
90
admin.php
90
admin.php
@@ -35,30 +35,76 @@ class admin_plugin_botmon extends AdminPlugin {
|
||||
$pluginPath = $conf['basedir'] . 'lib/plugins/' . $this->getPluginName();
|
||||
|
||||
/* Plugin Headline */
|
||||
echo '<div id="botmon__admin">';
|
||||
echo '<h1>Bot Monitoring Plugin</h1>';
|
||||
echo '<div id="botmon__admin">
|
||||
<h1>Bot Monitoring Plugin</h1>
|
||||
<nav id="botmon__tabs">
|
||||
<ul class="tabs" role="tablist">
|
||||
<li role="presentation" class="active"><a role="tab" href="#botmon__panel1" aria-controls="botmon__panel1" id="botmon__tab1" aria-selected="true">Today</a></li>
|
||||
</ul>
|
||||
</nav>';
|
||||
|
||||
/* tab navigation */
|
||||
echo '<nav id="botmon__tabs">';
|
||||
echo '<ul class="tabs" role="tablist">';
|
||||
echo '<li role="presentation" class="active">';
|
||||
echo '<a role="tab" href="#botmon__panel1" aria-controls="botmon__panel1" id="botmon__tab1" aria-selected="true">Today</a></li>';
|
||||
echo '</ul></nav>';
|
||||
if ($this->hasOldLogFiles()) {
|
||||
echo '<div class="info"><strong>Note:</strong> There are old log files that can be deleted. <a href="' . $pluginPath . '/cleanup.php" target="_blank">Click here</a> to run a delete script, or use <em>cron</em> to automatically delete them.</div>';
|
||||
}
|
||||
|
||||
// Beta warning message:
|
||||
echo '<div class="info"><strong>Please note:</strong> This plugin is still in the early stages of development and does not (yet) clean up its <code>logs</code> directory.<br>You can clean up the old log files by <a href="' . $pluginPath . '/cleanup.php" target="_blank">clicking here</a>, or by adding the cleanup script to your cron jobs.</div>';
|
||||
|
||||
/* Live tab */
|
||||
echo '<article role="tabpanel" id="botmon__today"">';
|
||||
echo '<h2 class="a11y">Today</h2>';
|
||||
echo '<header id="botmon__today__title">Loading …</header>';
|
||||
echo '<div id="botmon__today__content">';
|
||||
echo '<details id="botmon__today__visitors"><summary>Visitor logs</summary>';
|
||||
echo '<div id="botmon__today__visitorlists"></div>';
|
||||
echo '</details></div>';
|
||||
echo '<footer aria-live="polite"><img src="' . $pluginPath . '/img/spinner.svg" id="botmon__today__busy" width="12" height="12" alt="busy indicator"><span id="botmon__today__status">Initialising …</span></footer>';
|
||||
echo '</article>';
|
||||
echo '</div><!-- End of BotMon Admin Tool -->';
|
||||
echo '<article role="tabpanel" id="botmon__today"">
|
||||
<h2 class="a11y">Today</h2>
|
||||
<header id="botmon__today__title">Loading …</header>
|
||||
<div id="botmon__today__content">
|
||||
<details id="botmon__today__overview" open>
|
||||
<summary>Bot overview</summary>
|
||||
<div class="grid-3-columns">
|
||||
<dl id="botmon__today__botsvshumans"></dl>
|
||||
<dl id="botmon__botslist"></dl>
|
||||
<dl id="botmon__today__botips"></dl>
|
||||
</div>
|
||||
</details>
|
||||
<details id="botmon__today__webmetrics">
|
||||
<summary>Web metrics</summary>
|
||||
<div class="grid-3-columns">
|
||||
<dl id="botmon__today__wm_overview"></dl>
|
||||
<dl></dl>
|
||||
<dl></dl>
|
||||
</div>
|
||||
</details>
|
||||
<details id="botmon__today__visitors">
|
||||
<summary>Visitor logs</summary>
|
||||
<div id="botmon__today__visitorlists"></div>
|
||||
</details>
|
||||
</div>
|
||||
<footer aria-live="polite">
|
||||
<img src="' . $pluginPath . '/img/spinner.svg" id="botmon__today__busy" width="12" height="12" alt="busy indicator">
|
||||
<span id="botmon__today__status">Initialising …</span>
|
||||
</footer>
|
||||
</article>
|
||||
</div><!-- End of BotMon Admin Tool -->';
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if there are old log files that can be deleted.
|
||||
*
|
||||
* @return bool true if there are old log files, false otherwise
|
||||
*/
|
||||
private function hasOldLogFiles() {
|
||||
|
||||
$today = gmdate('Y-m-d');
|
||||
$yesterday = gmdate('Y-m-d', time() - 86400);
|
||||
|
||||
// scan the log directory and delete all files except for today and yesterday:
|
||||
$dir = scandir(getcwd() . '/lib/plugins/botmon/logs');
|
||||
foreach($dir as $file) {
|
||||
$fName = pathinfo($file, PATHINFO_BASENAME);
|
||||
$bName = strtok($fName, '.');
|
||||
|
||||
if ($bName == '' || $bName == 'logfiles') {
|
||||
// ignore
|
||||
} else if ($bName == $today || $bName == $yesterday) {
|
||||
// skip
|
||||
} else {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
}
|
||||
@@ -22,4 +22,5 @@ foreach($dir as $file) {
|
||||
echo " File “{$fName}” could not be deleted!\n";
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
echo "Done.\n";
|
||||
@@ -29,9 +29,9 @@
|
||||
"id": "noRefs", "desc": "No referer field",
|
||||
"bot": 30
|
||||
},
|
||||
{"func": "matchLang", "params": ["en"],
|
||||
"id": "langMatch", "desc": "Client’s ‘Accept-Language’ header does not match the page language (except English pages)",
|
||||
"bot": 20
|
||||
{"func": "matchLang", "params": [],
|
||||
"id": "langMatch", "desc": "Client’s ‘Accept-Language’ header does not match the page language",
|
||||
"bot": 30
|
||||
},
|
||||
{"func": "matchesClient", "params": ["brave"],
|
||||
"id": "susClient", "desc": "Client identifier that is popular with bot networks",
|
||||
@@ -54,12 +54,12 @@
|
||||
"bot": 80
|
||||
},
|
||||
{"func": "noAcceptLang",
|
||||
"id": "noAcc", "desc": "Client has no, or an empty, “Accept-Language” header",
|
||||
"id": "noAcc", "desc": "No “Accept-Language” header",
|
||||
"bot": 40
|
||||
}
|
||||
],
|
||||
"ipRanges": [
|
||||
{"from": "3.0.0.0", "to": "3.255.255.255", "label": "Amazon Data Services [US]"},
|
||||
{"from": "3.0.0.0", "to": "3.255.255.254", "label": "Amazon Data Services [US]"},
|
||||
{"from": "8.127.0.0", "to": "8.223.255.254", "label": "Alibaba [CN]"},
|
||||
{"from": "24.240.0.0", "to": "24.243.255.254", "label": "Charter [US]"},
|
||||
{"from": "27.106.0.0", "to": "27.106.127.254", "label": "Huawei [US]"},
|
||||
@@ -70,8 +70,8 @@
|
||||
{"from": "84.37.35.0", "to": "84.37.255.254", "label": "GTT.net [US]"},
|
||||
{"from": "94.74.64.0", "to": "94.74.127.254", "label": "Huawei [HK]"},
|
||||
{"from": "101.0.0.0", "to": "101.255.255.254", "label": "ChinaNet [CN]"},
|
||||
{"from": "110.238.96.0", "to": "110.238.127.255", "label": "Huawei [SG]"},
|
||||
{"from": "111.119.192.0", "to": "111.119.255.255", "label": "Huawei [SG]"},
|
||||
{"from": "110.238.96.0", "to": "110.238.127.254", "label": "Huawei [SG]"},
|
||||
{"from": "111.119.192.0", "to": "111.119.255.254", "label": "Huawei [SG]"},
|
||||
{"from": "119.0.0.0", "to": "101.207.255.254", "label": "Unicom [CN]"},
|
||||
{"from": "121.91.168.", "to": "121.91.175.254", "label": "Huawei [HK]"},
|
||||
{"from": "122.8.0.0", "to": "122.8.255.254", "label": "CN-ISP [CN]"},
|
||||
@@ -82,14 +82,17 @@
|
||||
{"from": "159.138.0.0", "to": "159.138.225.254", "label": "Huawei [TH]"},
|
||||
{"from": "162.128.0.0", "to": "162.128.255.254", "label": "Zenlayer [SG]"},
|
||||
{"from": "166.108.192.0", "to": "166.108.255.254", "label": "Huawei [SG]"},
|
||||
{"from": "177.0.0.0", "to": "177.255.255.255", "label": "BrasilNET [BR]"},
|
||||
{"from": "177.0.0.0", "to": "177.255.255.254", "label": "BrasilNET [BR]"},
|
||||
{"from": "179.0.0.0", "to": "179.255.255.254", "label": "BrasilNET [BR]"},
|
||||
{"from": "183.87.32.0", "to": "183.87.159.254", "label": "Huawei [HK]"},
|
||||
{"from": "187.180.0.0", "to": "187.183.255.254", "label": "Claro [BR]"},
|
||||
{"from": "188.239.0.0", "to": "188.239.63.254", "label": "Huawei [SG]"},
|
||||
{"from": "189.127.255.255", "to": "189.127.255.255", "label": "Huawei BR/HK"},
|
||||
{"from": "190.92.192.0", "to": "190.92.255.254", "label": "Huawei [HK]"},
|
||||
{"from": "186.0.0.0", "to": "186.255.255.254", "label": "South-American ISPs (186.x)"},
|
||||
{"from": "187.0.0.0", "to": "187.255.255.254", "label": "South-American ISPs (187.x)"},
|
||||
{"from": "188.0.0.0", "to": "188.255.255.254", "label": "South-American ISPs (188.x)"},
|
||||
{"from": "189.0.0.0", "to": "189.255.255.254", "label": "South-American ISPs (189.x)"},
|
||||
{"from": "190.0.0.0", "to": "190.255.255.254", "label": "South-American ISPs (190.x)"},
|
||||
{"from": "192.124.170.0", "to": "192.124.182.254", "label": "Relcom [CZ]"},
|
||||
{"from": "2001:4800::::::", "to": "2001:4fff:ffff:ffff:ffff:ffff:ffff:ffff", "label": "Rackspace/Google [US]"},
|
||||
{"from": "2001:0ee0::::::", "to": "2001:ee3:ffff:ffff:ffff:ffff:ffff:ffff", "mask": 30, "label": "VNPT [VN]"},
|
||||
{"from": "2600:1f00::::::", "to": "2600:1fff:ffff:ffff:ffff:ffff:ffff:ffff", "label": "Amazon Cloud [US]"},
|
||||
{"from": "2804:::::::", "to": "2804:FFFF:FFFF:FFFF:FFFF:FFFF:FFFF:FFFF", "label": "Inspire [BR]"},
|
||||
{"from": "2a0a:4cc0::::::", "to": "2a0a:4cc0:FFFF:FFFF:FFFF:FFFF:FFFF:FFFF", "label": "Netcup [DE]"}
|
||||
@@ -1,7 +1,7 @@
|
||||
base botmon
|
||||
author Sascha Leib
|
||||
email ad@hominem.com
|
||||
date 2025-09-08
|
||||
date 2025-09-09
|
||||
name Bot Monitoring
|
||||
desc Live monitoring of bot traffic on your DokuWiki instance (under development)
|
||||
url https://www.dokuwiki.org/plugin:botmon
|
||||
|
||||
183
script.js
183
script.js
@@ -541,10 +541,6 @@ BotMon.live = {
|
||||
}
|
||||
});
|
||||
|
||||
// clean up the ip ranges:
|
||||
me._cleanIPRanges();
|
||||
console.log(BotMon.live.data.analytics._ipRange);
|
||||
|
||||
BotMon.live.gui.status.hideBusy('Done.');
|
||||
},
|
||||
|
||||
@@ -562,6 +558,7 @@ BotMon.live = {
|
||||
*/
|
||||
_addToIPRanges: function(ip) {
|
||||
|
||||
// #TODO: handle nestled ranges!
|
||||
const me = BotMon.live.data.analytics;
|
||||
const ipv = (ip.indexOf(':') > 0 ? 6 : 4);
|
||||
|
||||
@@ -592,16 +589,39 @@ BotMon.live = {
|
||||
}
|
||||
|
||||
},
|
||||
_cleanIPRanges: function() {
|
||||
const me = BotMon.live.data.analytics;
|
||||
|
||||
for (const [n, arr] of Object.entries(me._ipRange)) {
|
||||
|
||||
arr.forEach( (it, i) => {
|
||||
if (it.count <= 1) arr.splice(i, 1);
|
||||
});
|
||||
getTopBotIPRanges: function(max) {
|
||||
|
||||
};
|
||||
const me = BotMon.live.data.analytics;
|
||||
|
||||
const kMinHits = 2;
|
||||
|
||||
// combine the ip lists, removing all lower volume branches:
|
||||
let ipTypes = [4,6];
|
||||
const tmpList = [];
|
||||
for (let i=0; i<ipTypes.length; i++) {
|
||||
const ipType = ipTypes[i];
|
||||
(ipType == 6 ? me._ipRange.ip6 : me._ipRange.ip4).forEach( it => {
|
||||
if (it.count > kMinHits) {
|
||||
it.type = ipType;
|
||||
tmpList.push(it);
|
||||
}
|
||||
});
|
||||
tmpList.sort( (a,b) => b.count - a.count);
|
||||
}
|
||||
|
||||
// reduce to only the top (max) items and create the target format:
|
||||
// #TODO: handle nestled ranges!
|
||||
let rList = [];
|
||||
for (let j=0; Math.min(max, tmpList.length) > j; j++) {
|
||||
const rangeInfo = tmpList[j];
|
||||
rList.push({
|
||||
'ip': rangeInfo.seg + ( rangeInfo.type == 4 ? '.x.x.x' : '::x'),
|
||||
'typ': rangeInfo.type,
|
||||
'num': rangeInfo.count
|
||||
});
|
||||
}
|
||||
|
||||
return rList;
|
||||
}
|
||||
},
|
||||
|
||||
@@ -612,7 +632,7 @@ BotMon.live = {
|
||||
|
||||
// Load the list of known bots:
|
||||
BotMon.live.gui.status.showBusy("Loading known bots …");
|
||||
const url = BotMon._baseDir + 'data/known-bots.json';
|
||||
const url = BotMon._baseDir + 'config/known-bots.json';
|
||||
try {
|
||||
const response = await fetch(url);
|
||||
if (!response.ok) {
|
||||
@@ -687,7 +707,7 @@ BotMon.live = {
|
||||
|
||||
// Load the list of known bots:
|
||||
BotMon.live.gui.status.showBusy("Loading known clients");
|
||||
const url = BotMon._baseDir + 'data/known-clients.json';
|
||||
const url = BotMon._baseDir + 'config/known-clients.json';
|
||||
try {
|
||||
const response = await fetch(url);
|
||||
if (!response.ok) {
|
||||
@@ -747,7 +767,7 @@ BotMon.live = {
|
||||
|
||||
// Load the list of known bots:
|
||||
BotMon.live.gui.status.showBusy("Loading known platforms");
|
||||
const url = BotMon._baseDir + 'data/known-platforms.json';
|
||||
const url = BotMon._baseDir + 'config/known-platforms.json';
|
||||
try {
|
||||
const response = await fetch(url);
|
||||
if (!response.ok) {
|
||||
@@ -806,7 +826,7 @@ BotMon.live = {
|
||||
|
||||
// Load the list of known bots:
|
||||
BotMon.live.gui.status.showBusy("Loading list of rules …");
|
||||
const url = BotMon._baseDir + 'data/rules.json';
|
||||
const url = BotMon._baseDir + 'config/rules.json';
|
||||
try {
|
||||
const response = await fetch(url);
|
||||
if (!response.ok) {
|
||||
@@ -1138,52 +1158,105 @@ BotMon.live = {
|
||||
make: function() {
|
||||
|
||||
const data = BotMon.live.data.analytics.data;
|
||||
const parent = document.getElementById('botmon__today__content');
|
||||
|
||||
// shortcut for neater code:
|
||||
const makeElement = BotMon.t._makeElement;
|
||||
|
||||
if (parent) {
|
||||
const botsVsHumans = document.getElementById('botmon__today__botsvshumans');
|
||||
if (botsVsHumans) {
|
||||
botsVsHumans.appendChild(makeElement('dt', {}, "Bots vs. Humans (page views)"));
|
||||
|
||||
const bounceRate = Math.round(data.totalVisits / data.totalPageViews * 100);
|
||||
|
||||
jQuery(parent).prepend(jQuery(`
|
||||
<details id="botmon__today__overview" open>
|
||||
<summary>Overview</summary>
|
||||
<div class="grid-3-columns">
|
||||
<dl>
|
||||
<dt>Web metrics</dt>
|
||||
<dd><span>Total page views:</span><strong>${data.totalPageViews}</strong></dd>
|
||||
<dd><span>Total visitors (est.):</span><span>${data.totalVisits}</span></dd>
|
||||
<dd><span>Bounce rate (est.):</span><span>${bounceRate}%</span></dd>
|
||||
</dl>
|
||||
<dl>
|
||||
<dt>Bots vs. Humans (page views)</dt>
|
||||
<dd><span>Registered users:</span><strong>${data.bots.users}</strong></dd>
|
||||
<dd><span>Probably humans:</span><strong>${data.bots.human}</strong></dd>
|
||||
<dd><span>Suspected bots:</span><strong>${data.bots.suspected}</strong></dd>
|
||||
<dd><span>Known bots:</span><strong>${data.bots.known}</strong></dd>
|
||||
</dl>
|
||||
<dl id="botmon__botslist"></dl>
|
||||
</div>
|
||||
</details>
|
||||
`));
|
||||
|
||||
// update known bots list:
|
||||
const block = document.getElementById('botmon__botslist');
|
||||
block.innerHTML = "<dt>Top known bots (page views)</dt>";
|
||||
|
||||
let bots = BotMon.live.data.analytics.groups.knownBots.toSorted( (a, b) => {
|
||||
return b._pageViews.length - a._pageViews.length;
|
||||
});
|
||||
|
||||
for (let i=0; i < Math.min(bots.length, 4); i++) {
|
||||
for (let i = 3; i >= 0; i--) {
|
||||
const dd = makeElement('dd');
|
||||
dd.appendChild(makeElement('span', {'class': 'bot bot_' + bots[i]._bot.id }, bots[i]._bot.n));
|
||||
dd.appendChild(makeElement('strong', undefined, bots[i]._pageViews.length));
|
||||
block.appendChild(dd);
|
||||
let title = '';
|
||||
let value = '';
|
||||
switch(i) {
|
||||
case 0:
|
||||
title = "Registered users:";
|
||||
value = data.bots.users;
|
||||
break;
|
||||
case 1:
|
||||
title = "Probably humans:";
|
||||
value = data.bots.human;
|
||||
break;
|
||||
case 2:
|
||||
title = "Suspected bots:";
|
||||
value = data.bots.suspected;
|
||||
break;
|
||||
case 3:
|
||||
title = "Known bots:";
|
||||
value = data.bots.known;
|
||||
break;
|
||||
default:
|
||||
console.warn(`Unknown list type ${i}.`);
|
||||
}
|
||||
dd.appendChild(makeElement('span', {}, title));
|
||||
dd.appendChild(makeElement('strong', {}, value));
|
||||
botsVsHumans.appendChild(dd);
|
||||
}
|
||||
}
|
||||
|
||||
// update known bots list:
|
||||
const botlist = document.getElementById('botmon__botslist');
|
||||
botlist.innerHTML = "<dt>Top 5 known bots (page views)</dt>";
|
||||
|
||||
let bots = BotMon.live.data.analytics.groups.knownBots.toSorted( (a, b) => {
|
||||
return b._pageViews.length - a._pageViews.length;
|
||||
});
|
||||
|
||||
for (let i=0; i < Math.min(bots.length, 5); i++) {
|
||||
const dd = makeElement('dd');
|
||||
dd.appendChild(makeElement('span', {'class': 'bot bot_' + bots[i]._bot.id }, bots[i]._bot.n));
|
||||
dd.appendChild(makeElement('strong', undefined, bots[i]._pageViews.length));
|
||||
botlist.appendChild(dd);
|
||||
}
|
||||
|
||||
// update the suspected bot IP ranges list:
|
||||
const botIps = document.getElementById('botmon__today__botips');
|
||||
if (botIps) {
|
||||
botIps.appendChild(makeElement('dt', {}, "Top 5 suspected bots’ IP ranges"));
|
||||
|
||||
const ipList = BotMon.live.data.analytics.getTopBotIPRanges(5);
|
||||
ipList.forEach( (ipInfo) => {
|
||||
const li = makeElement('dd');
|
||||
li.appendChild(makeElement('span', {'class': 'ip ip' + ipInfo.typ }, ipInfo.ip));
|
||||
li.appendChild(makeElement('span', {'class': 'count' }, ipInfo.num));
|
||||
botIps.append(li)
|
||||
})
|
||||
}
|
||||
|
||||
// update the webmetrics overview:
|
||||
const wmoverview = document.getElementById('botmon__today__wm_overview');
|
||||
if (wmoverview) {
|
||||
const bounceRate = Math.round(data.totalVisits / data.totalPageViews * 100);
|
||||
|
||||
wmoverview.appendChild(makeElement('dt', {}, "Overview"));
|
||||
for (let i = 0; i < 3; i++) {
|
||||
const dd = makeElement('dd');
|
||||
let title = '';
|
||||
let value = '';
|
||||
switch(i) {
|
||||
case 0:
|
||||
title = "Total page views:";
|
||||
value = data.totalPageViews;
|
||||
break;
|
||||
case 1:
|
||||
title = "Total visitors (est.):";
|
||||
value = data.totalVisits;
|
||||
break;
|
||||
case 2:
|
||||
title = "Bounce rate (est.):";
|
||||
value = bounceRate + '%';
|
||||
break;
|
||||
default:
|
||||
console.warn(`Unknown list type ${i}.`);
|
||||
}
|
||||
dd.appendChild(makeElement('span', {}, title));
|
||||
dd.appendChild(makeElement('strong', {}, value));
|
||||
wmoverview.appendChild(dd);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
},
|
||||
|
||||
|
||||
Reference in New Issue
Block a user