More bot criteria

This commit is contained in:
Sascha Leib
2025-09-07 16:11:17 +02:00
parent 451abfad91
commit 446aa816f1
8 changed files with 255 additions and 107 deletions

View File

@@ -1,27 +1,25 @@
<h1>BotMon Cleanup Script</h1>
<ul><?php
<?php header("Content-Type: text/plain"); ?>BotMon Cleanup Script
=====================
<?php
// exclude the following two dates:
$today = gmdate('Y-m-d');
$yesterday = gmdate('Y-m-d', time() - 86400);
// exclude the following two dates:
$today = gmdate('Y-m-d');
$yesterday = gmdate('Y-m-d', time() - 86400);
// scan the log directory and delete all files except for today and yesterday:
$dir = scandir('logs');
foreach($dir as $file) {
$fName = pathinfo($file, PATHINFO_BASENAME);
$bName = strtok($fName, '.');
// scan the log directory and delete all files except for today and yesterday:
$dir = scandir('logs');
foreach($dir as $file) {
$fName = pathinfo($file, PATHINFO_BASENAME);
$bName = strtok($fName, '.');
echo "<li>File “{$fName} ";
if ($bName == '' || $bName == 'logfiles') {
echo " <em>ignored</em></li>";
} else if ($bName == $today || $bName == $yesterday) {
echo " <em>skipped</em></li>";
if ($bName == '' || $bName == 'logfiles') {
//echo "File “{$fName}” ignored.";
} else if ($bName == $today || $bName == $yesterday) {
//echo "File “{$fName}” skipped.\n";
} else {
if (unlink('logs/' . $file)) {
echo "File “{$fName}” deleted.\n";
} else {
if (unlink('logs/' . $file)) {
echo "deleted.</li>";
} else {
echo " <strong>not deleted!</strong></li>";
}
echo " File “{$fName}” could not be deleted!\n";
}
}
?></ul>
}

View File

@@ -7,7 +7,7 @@
{"id": "googlebot",
"n": "Google Bot",
"r": ["Googlebot"],
"rx": ["\\sGooglebot\\/(\\d+\\.\\d+);"],
"rx": ["Googlebot\\/(\\d+\\.\\d+)", "Googlebot-Image\\/(\\d+\\.\\d+)"],
"url": "http://www.google.com/bot.html"
},
{"id": "googleads",

View File

@@ -41,7 +41,7 @@
},
{"n": "Edge",
"id": "msedge",
"rx": [ "\\sEdg\\/(\\d+)\\.", "\\sEdge\\/(\\d+)" ]
"rx": [ "\\sEdg\\/(\\d+)", "\\sEdge\\/(\\d+)", "EdgiOS\\/(\\d+)" ]
},
{"n": "Old Chrome",
"id": "chromeold",

View File

@@ -13,16 +13,16 @@
"id": "oldOS", "desc": "Obsolete platform version",
"bot": 40
},
{"func": "noJavaScript",
"id": "noJS", "desc": "JavaScript disabled",
"bot": 40
},
{"func": "smallPageCount", "params": [1],
"id": "onePage", "desc": "Visiter viewed only a single page",
"bot": 40
},
{"func": "noTicks",
"id": "noTicks", "desc": "Visitor did not spend time reading a page",
{"func": "noRecord", "params": ["log"],
"id": "noClient", "desc": "No client-side JS log was recorded",
"bot": 50
},
{"func": "noRecord", "params": ["tck"],
"id": "noTicks", "desc": "No client ticks were recorded",
"bot": 10
},
{"func": "noReferrer",
@@ -48,33 +48,41 @@
{"func": "combTest", "params": [["macos", "msie"], ["win10", "safari"]],
"id": "impPC", "desc": "Impossible combination of platform and client",
"bot": 100
},
{"func": "loadSpeed", "params": [3, 20],
"id": "speedRun", "desc": "Average time between page loads is less than 20 seconds",
"bot": 100
}
],
"ipRanges": [
{"from": "8.127.0.0", "to": "8.223.255.255", "isp": "alibaba", "loc":"cn"},
{"from": "24.240.0.0", "to": "24.243.255.255", "isp": "charter", "loc":"us"},
{"from": "27.106.0.0", "to": "27.106.127.255", "isp": "huawei", "loc":"hk"},
{"from": "45.224.0.0", "to": "45.227.195.255", "isp": "various", "loc": "br"},
{"from": "46.250.160.0", "to": "46.250.191.255", "isp": "huawei", "loc":"mx"},
{"from": "94.74.64.0", "to": "94.74.127.255", "isp": "huawei", "loc":"hk"},
{"from": "101.0.0.0", "to": "119.255.255.255", "isp": "sinnet", "loc":"cn"},
{"from": "111.119.192.0", "to": "111.119.255.255", "isp": "huawei", "loc":"sg"},
{"from": "119.0.0.0", "to": "101.207.255.255", "isp": "unicom", "loc":"cn"},
{"from": "121.91.168.0", "to": "121.91.175.255", "isp": "huawei", "loc":"hk"},
{"from": "122.8.0.0", "to": "122.8.255.255", "isp": "cnisp", "loc":"cn"},
{"from": "122.9.0.0", "to": "122.9.255.255", "isp": "huawei", "loc":"cn"},
{"from": "124.243.128.0", "to": "124.243.191.255", "isp": "huawei", "loc":"sg"},
{"from": "150.40.128.0", "to": "150.40.255.255", "isp": "huawei", "loc":"hk"},
{"from": "159.138.0.0", "to": "159.138.225.255", "isp": "huawei", "loc":"th"},
{"from": "162.128.0.2505", "to": "162.128.127.255", "isp": "zenlayer", "loc":"sg"},
{"from": "166.108.192.0", "to": "166.108.255.255", "isp": "huawei", "loc":"sg"},
{"from": "177.0.0.0", "to": "177.223.255.255", "isp": "ths", "loc":"br"},
{"from": "183.87.32.0", "to": "183.87.159.255", "isp": "huawei", "loc":"hk"},
{"from": "187.180.0.0", "to": "187.183.255.255", "isp": "claro", "loc":"br"},
{"from": "188.239.0.0", "to": "188.239.63.255", "isp": "huawei", "loc":"sg"},
{"from": "189.1.192.0", "to": "189.1.255.255", "isp": "huawei", "loc":"hk"},
{"from": "189.76.0.0", "to": "189.127.255.255", "isp": "hardonline", "loc":"br"},
{"from": "190.92.192.0", "to": "190.92.255.255", "isp": "huawei", "loc":"hk"},
{"from": "2804:0:0:0:0:0:0:0", "to": "2804:FFFF:FFFF:FFFF:FFFF:FFFF:FFFF:FFFF", "isp": "inspire", "loc":"br"}
{"from": "8.127.0.0", "to": "8.223.255.255", "isp": "Alibaba", "loc":"cn"},
{"from": "24.240.0.0", "to": "24.243.255.255", "isp": "Charter", "loc":"us"},
{"from": "27.106.0.0", "to": "27.106.127.255", "isp": "Huawei", "loc":"hk"},
{"from": "34.0.0.0", "to": "34.191.255.255", "isp": "Google LLC", "loc":"zz"},
{"from": "45.224.0.0", "to": "45.227.195.255", "isp": "Various BR ISPs", "loc": "br"},
{"from": "46.250.160.0", "to": "46.250.191.255", "isp": "Huawei", "loc":"mx"},
{"from": "49.0.200.0", "to": "49.0.255.255", "isp": "Huawei", "loc":"sg"},
{"from": "94.74.64.0", "to": "94.74.127.255", "isp": "Huawei", "loc":"hk"},
{"from": "101.0.0.0", "to": "119.255.255.255", "isp": "SinNet", "loc":"cn"},
{"from": "111.119.192.0", "to": "111.119.255.255", "isp": "Huawei", "loc":"sg"},
{"from": "119.0.0.0", "to": "101.207.255.255", "isp": "Unicom", "loc":"cn"},
{"from": "121.91.168.0", "to": "121.91.175.255", "isp": "Huawei", "loc":"hk"},
{"from": "122.8.0.0", "to": "122.8.255.255", "isp": "CN-ISP", "loc":"cn"},
{"from": "122.9.0.0", "to": "122.9.255.255", "isp": "Huawei", "loc":"cn"},
{"from": "124.243.128.0", "to": "124.243.191.255", "isp": "Huawei", "loc":"sg"},
{"from": "150.40.128.0", "to": "150.40.255.255", "isp": "Huawei", "loc":"hk"},
{"from": "159.138.0.0", "to": "159.138.225.255", "isp": "Huawei", "loc":"th"},
{"from": "162.128.0.0", "to": "162.128.255.255", "isp": "Zenlayer", "loc":"sg"},
{"from": "166.108.192.0", "to": "166.108.255.255", "isp": "Huawei", "loc":"sg"},
{"from": "177.0.0.0", "to": "177.223.255.255", "isp": "THS", "loc":"br"},
{"from": "183.87.32.0", "to": "183.87.159.255", "isp": "Huawei", "loc":"hk"},
{"from": "187.180.0.0", "to": "187.183.255.255", "isp": "Claro", "loc":"br"},
{"from": "188.239.0.0", "to": "188.239.63.255", "isp": "Huawei", "loc":"sg"},
{"from": "189.1.192.0", "to": "189.1.255.255", "isp": "Huawei", "loc":"hk"},
{"from": "189.76.0.0", "to": "189.127.255.255", "isp": "Hard Online", "loc":"br"},
{"from": "190.92.192.0", "to": "190.92.255.255", "isp": "Huawei", "loc":"hk"},
{"from": "2804:0:0:0:0:0:0:0", "to": "2804:FFFF:FFFF:FFFF:FFFF:FFFF:FFFF:FFFF", "isp": "Inspire", "loc":"br"},
{"from": "2600:1f00:0000:0000:0000:0000:0000:0000", "to": "2600:1fff:ffff:ffff:ffff:ffff:ffff:ffff", "isp": "Amazon Cloud", "loc":"us"},
{"from": "2001:4800:0000:0000:0000:0000:0000:0000", "to": "2001:4fff:ffff:ffff:ffff:ffff:ffff:ffff", "isp": "Rackspace/Google", "loc":"us"}
]
}

1
img/fileview.svg Normal file
View File

@@ -0,0 +1 @@
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><title>View only</title><path d="M17,18C17.56,18 18,18.44 18,19C18,19.56 17.56,20 17,20C16.44,20 16,19.56 16,19C16,18.44 16.44,18 17,18M17,15C14.27,15 11.94,16.66 11,19C11.94,21.34 14.27,23 17,23C19.73,23 22.06,21.34 23,19C22.06,16.66 19.73,15 17,15M17,21.5A2.5,2.5 0 0,1 14.5,19A2.5,2.5 0 0,1 17,16.5A2.5,2.5 0 0,1 19.5,19A2.5,2.5 0 0,1 17,21.5M9.27,20H6V4H13V9H18V13.07C18.7,13.15 19.36,13.32 20,13.56V8L14,2H6A2,2 0 0,0 4,4V20A2,2 0 0,0 6,22H10.5C10,21.41 9.59,20.73 9.27,20Z" style="fill:#999"/></svg>

After

Width:  |  Height:  |  Size: 565 B

View File

@@ -1,7 +1,7 @@
base botmon
author Sascha Leib
email ad@hominem.com
date 2025-09-06
date 2025-09-07
name Bot Monitoring
desc Live monitoring of bot traffic on your DokuWiki instance (under development)
url https://www.dokuwiki.org/plugin:botmon

216
script.js
View File

@@ -100,6 +100,40 @@ const BotMon = {
} else { /* IP4 */
return Number(ip.split('.').map(d => ('000'+d).slice(-3) ).join(''));
}
},
/* helper function to format a Date object to show only the time. */
/* returns String */
_formatTime: function(date) {
if (date) {
return ('0'+date.getHours()).slice(-2) + ':' + ('0'+date.getMinutes()).slice(-2) + ':' + ('0'+date.getSeconds()).slice(-2);
} else {
return null;
}
},
/* helper function to show a time difference in seconds or minutes */
/* returns String */
_formatTimeDiff: function(dateA, dateB) {
// if the second date is ealier, swap them:
if (dateA > dateB) dateB = [dateA, dateA = dateB][0];
// get the difference in milliseconds:
let ms = dateB - dateA;
if (ms > 50) { /* ignore small time spans */
const h = Math.floor((ms / (1000 * 60 * 60)) % 24);
const m = Math.floor((ms / (1000 * 60)) % 60);
const s = Math.floor((ms / 1000) % 60);
return ( h>0 ? h + 'h ': '') + ( m>0 ? m + 'm ': '') + ( s>0 ? s + 's': '');
}
return null;
}
}
};
@@ -208,38 +242,40 @@ BotMon.live = {
// shortcut to make code more readable:
const model = BotMon.live.data.model;
const timeout = 60 * 60 * 1000; /* session timeout: One hour */
// loop over all visitors already registered:
for (let i=0; i<model._visitors.length; i++) {
const v = model._visitors[i];
if (visitor._type == BM_USERTYPE.KNOWN_BOT) { /* known bots */
if (Math.abs(v._lastSeen - visitor.ts) < timeout) { /* ignore timed out visits */
if (visitor._type == BM_USERTYPE.KNOWN_BOT) { /* known bots */
// bots match when their ID matches:
if (v._bot && v._bot.id == visitor._bot.id) {
return v;
}
} else if (visitor._type == BM_USERTYPE.KNOWN_USER) { /* registered users */
//if (visitor.id == 'fsmoe7lgqb89t92vt4ju8vdl0q') console.log(visitor);
// visitors match when their names match:
if ( v.usr == visitor.usr
&& v.ip == visitor.ip
&& v.agent == visitor.agent) {
return v;
}
} else { /* any other visitor */
if ( v.id == visitor.id) { /* match the pre-defined IDs */
return v;
} else if (v.ip == visitor.ip && v.agent == visitor.agent) {
if (v.typ !== 'ip') {
console.warn(`Visitor ID “${v.id}” not found, using matchin IP + User-Agent instead.`);
// bots match when their ID matches:
if (v._bot && v._bot.id == visitor._bot.id) {
return v;
}
return v;
}
} else if (visitor._type == BM_USERTYPE.KNOWN_USER) { /* registered users */
// visitors match when their names match:
if ( v.usr == visitor.usr
&& v.ip == visitor.ip
&& v.agent == visitor.agent) {
return v;
}
} else { /* any other visitor */
if ( v.id == visitor.id) { /* match the pre-defined IDs */
return v;
} else if (v.ip == visitor.ip && v.agent == visitor.agent) {
if (v.typ !== 'ip') {
console.warn(`Visitor ID “${v.id}” not found, using matchin IP + User-Agent instead.`);
}
return v;
}
}
}
}
return null; // nothing found
@@ -288,7 +324,7 @@ BotMon.live = {
visitor._platform = BotMon.live.data.platforms.match(nv.agent); // platform info
model._visitors.push(visitor);
} else { // update existing
if (visitor._firstSeen < nv.ts) {
if (visitor._firstSeen > nv.ts) {
visitor._firstSeen = nv.ts;
}
}
@@ -306,6 +342,7 @@ BotMon.live = {
prereg._lastSeen = nv.ts;
// increase view count:
prereg._viewCount += 1;
prereg._tickCount += 1;
}
// update referrer state:
@@ -336,7 +373,9 @@ BotMon.live = {
}
if (visitor) {
visitor._lastSeen = dat.ts;
if (visitor._lastSeen < dat.ts) {
visitor._lastSeen = dat.ts;
}
if (!visitor._seenBy.includes(type)) {
visitor._seenBy.push(type);
}
@@ -355,6 +394,7 @@ BotMon.live = {
prereg = model._makePageView(dat, type);
visitor._pageViews.push(prereg);
}
prereg._tickCount += 1;
},
// updating visit data from the ticker log:
@@ -380,7 +420,7 @@ BotMon.live = {
// get the page view info:
let pv = model._getPageView(visitor, dat);
if (!pv) {
console.warn(`No page view for visit ID ${dat.id}, page ${dat.pg}, registering a new one.`);
console.warn(`No page view for visit ID ${dat.id}, page ${dat.pg}, registering a new one.`);
pv = model._makePageView(dat, type);
visitor._pageViews.push(pv);
}
@@ -451,6 +491,7 @@ BotMon.live = {
// shortcut to make code more readable:
const model = BotMon.live.data.model;
const me = BotMon.live.data.analytics;
BotMon.live.gui.status.showBusy("Analysing data …");
@@ -481,6 +522,11 @@ BotMon.live = {
v._eval = e.rules;
v._botVal = e.val;
// add each page view to IP range information (unless it is already from a known bot IP range):
v._pageViews.forEach( pv => {
me._addToIPRanges(pv.ip);
});
if (e.isBot) { // likely bots
v._type = BM_USERTYPE.LIKELY_BOT;
this.data.bots.suspected += v._pageViews.length;
@@ -496,9 +542,55 @@ BotMon.live = {
});
BotMon.live.gui.status.hideBusy('Done.');
console.log(BotMon.live.data.analytics._ipRange);
},
// visits from IP ranges:
_ipRange: {
ip4: [],
ip6: []
},
/**
* Adds a visit to the IP range statistics.
*
* This helps to identify IP ranges that are used by bots.
*
* @param {string} ip The IP address to add.
*/
_addToIPRanges: function(ip) {
const me = BotMon.live.data.analytics;
const ipv = (ip.indexOf(':') > 0 ? 6 : 4);
const ipArr = ip.split( ipv == 6 ? ':' : '.');
const maxSegments = (ipv == 6 ? 4 : 3);
let arr = (ipv == 6 ? me._ipRange.ip6 : me._ipRange.ip4);
let it = null;
for (let i = 0; i < Math.min(ipArr.length, maxSegments); i++) {
it = arr.find( a => { a.seg == ipArr[i]; } );
if (!it) {
it = {seg: ipArr[i], count: 1};
if (i<maxSegments) it.sub = [];
arr.push(it);
} else {
it.count += 1;
}
arr = it.sub;
}
},
_cleanIPRanges: function() {
const me = BotMon.live.data.analytics;
for (let i = 0; i < 1; i++) {
// once for each ip range types:
let arr = me._ipRange.ip4;
if (i=1) arr = me._ipRange.ip6;
}
}
},
bots: {
@@ -551,7 +643,7 @@ BotMon.live = {
};
r = true;
break;
}
};
};
return r;
});
@@ -816,22 +908,15 @@ BotMon.live = {
return platforms.includes(pId);
},
// client does not use JavaScript:
noJavaScript: function(visitor) {
return !(visitor._seenBy.includes('log') || visitor._seenBy.includes('tck'));
},
// are there at lest num pages loaded?
smallPageCount: function(visitor, num) {
return (visitor._pageViews.length <= Number(num));
},
// there are no ticks recorded for a visitor
// There was no entry in a specific log file for this visitor:
// note that this will also trigger the "noJavaScript" rule:
noTicks: function(visitor) {
return !visitor._seenBy.includes('tck');
noRecord: function(visitor, type) {
return !visitor._seenBy.includes(type);
},
// there are no referrers in any of the page visits:
@@ -877,6 +962,10 @@ BotMon.live = {
const ipInfo = BotMon.live.data.rules.getBotIPInfo(visitor.ip);
if (ipInfo) {
visitor._ipInKnownBotRange = true;
}
return (ipInfo !== null);
},
@@ -888,6 +977,25 @@ BotMon.live = {
return visitor.accept.split(',').indexOf(visitor.lang) < 0;
}
return false;
},
// At least x page views were recorded, but they come within less than y seconds
loadSpeed: function(visitor, minItems, maxTime) {
if (visitor._pageViews.length >= minItems) {
//console.log('loadSpeed', visitor._pageViews.length, minItems, maxTime);
const pvArr = visitor._pageViews.map(pv => pv._lastSeen).sort();
let totalTime = 0;
for (let i=1; i < pvArr.length; i++) {
totalTime += (pvArr[i] - pvArr[i-1]);
}
//console.log(' ', totalTime , Math.round(totalTime / (pvArr.length * 1000)), (( totalTime / pvArr.length ) <= maxTime * 1000), visitor.ip);
return (( totalTime / pvArr.length ) <= maxTime * 1000);
}
}
},
@@ -1051,7 +1159,7 @@ BotMon.live = {
for (let i=0; i < Math.min(bots.length, 4); i++) {
const dd = makeElement('dd');
dd.appendChild(makeElement('span', {'class': 'bot bot_' + bots[i]._bot.id}, bots[i]._bot.n));
dd.appendChild(makeElement('span', {'class': 'bot bot_' + bots[i]._bot.id }, bots[i]._bot.n));
dd.appendChild(makeElement('strong', undefined, bots[i]._pageViews.length));
block.appendChild(dd);
}
@@ -1295,7 +1403,7 @@ BotMon.live = {
dl.appendChild(make('dd', {'class': 'has_icon ip' + data.typ}, data.id));
}
if ((data._lastSeen - data._firstSeen) < 1) {
if (Math.abs(data._lastSeen - data._firstSeen) < 100) {
dl.appendChild(make('dt', {}, "Seen:"));
dl.appendChild(make('dd', {'class': 'seen'}, data._firstSeen.toLocaleString()));
} else {
@@ -1339,16 +1447,28 @@ BotMon.live = {
}, "No referer"));
}
pgLi.appendChild(make('span', {}, ( page._seenBy ? page._seenBy.join(', ') : '—') + '; ' + page._tickCount));
pgLi.appendChild(make('span', {}, page._firstSeen.toLocaleString()));
pgLi.appendChild(make('span', {}, page._lastSeen.toLocaleString()));
pgLi.appendChild(make('span', {}, BotMon.t._formatTime(page._firstSeen)));
// get the time difference:
const tDiff = BotMon.t._formatTimeDiff(page._firstSeen, page._lastSeen);
if (tDiff) {
pgLi.appendChild(make('span', {'class': 'visit-length', 'title': 'Last seen: ' + page._lastSeen.toLocaleString()}, tDiff));
} else {
pgLi.appendChild(make('span', {'class': 'bounce'}, "Bounce"));
}
pageList.appendChild(pgLi);
});
pagesDd.appendChild(pageList);
dl.appendChild(pagesDd);
/* add bot evaluation: */
/* bot evaluation rating */
dl.appendChild(make('dt', undefined, "Bot rating:"));
dl.appendChild(make('dd', {'class': 'bot-rating'}, data._botVal + '/' + BotMon.live.data.rules._threshold ));
/* add bot evaluation details: */
if (data._eval) {
dl.appendChild(make('dt', {}, "Evaluation:"));
dl.appendChild(make('dt', {}, "Bot evaluation details:"));
const evalDd = make('dd');
const testList = make('ul',{
'class': 'eval'
@@ -1379,9 +1499,9 @@ BotMon.live = {
const tst2Li = make('li', {
'class': 'total'
});
tst2Li.appendChild(make('span', {}, "Total:"));
/*tst2Li.appendChild(make('span', {}, "Total:"));
tst2Li.appendChild(make('span', {}, data._botVal));
testList.appendChild(tst2Li);
testList.appendChild(tst2Li);*/
evalDd.appendChild(testList);
dl.appendChild(evalDd);

View File

@@ -193,9 +193,14 @@
white-space: nowrap;
}
dd {
grid-column: 2;
display: inline-block;
background-color: transparent;
& {
grid-column: 2;
display: inline-block;
background-color: transparent;
}
&.bot-rating {
text-align: right;
}
}
dd.pages {
& {
@@ -207,6 +212,22 @@
justify-content: space-between;
align-items: center;
}
span {
&.visit-length {
min-width: min-content;
}
&.bounce {
width: 1.25em; height: 1.25em;
overflow: hidden;
}
&.bounce::before {
display: inline-block;
content: '';
width: 1.25em; height: 1.25em;
background: transparent url('img/bounce.svg') center no-repeat;
background-size: 1.25em;
}
}
}
}
}