Improved Bot detection
This commit is contained in:
@@ -25,7 +25,7 @@
|
||||
},
|
||||
{"n": "MacOS",
|
||||
"id": "macos",
|
||||
"rx": [ "\\sMac OS X 10[\\._](1[4-5])[\\._;\\s\\)]", "\\sMac OS X (2[6-9])[\\._;\\s\\)]" ]
|
||||
"rx": [ "\\sMac OS X 10[\\._](1[4-5])[\\._;\\s\\)]", "\\sMac OS X (1[4-5])[\\._;\\s\\)]", "\\sMac OS X (2[6-9])[\\._;\\s\\)]" ]
|
||||
},
|
||||
{"n": "Vintage Windows",
|
||||
"id": "winold",
|
||||
|
||||
@@ -1,6 +1,10 @@
|
||||
{
|
||||
"threshold": 100,
|
||||
"rules": [
|
||||
{"func": "fromKnownBotIP",
|
||||
"id": "botIpRange", "desc": "Common Bot IP range",
|
||||
"bot": 50
|
||||
},
|
||||
{"func": "obsoleteClient", "params": ["aol", "msie", "chromeold"],
|
||||
"id": "oldClient", "desc": "Obsolete browser version",
|
||||
"bot": 40
|
||||
@@ -14,16 +18,58 @@
|
||||
"bot": 40
|
||||
},
|
||||
{"func": "smallPageCount", "params": [1],
|
||||
"id": "onePage", "desc": "Visiter loaded only a single page",
|
||||
"id": "onePage", "desc": "Visiter viewed only a single page",
|
||||
"bot": 40
|
||||
},
|
||||
{"func": "noTicks",
|
||||
"id": "noTicks", "desc": "Visitor did not spend time reading a page",
|
||||
"bot": 10
|
||||
},
|
||||
{"func": "noReferences",
|
||||
"id": "noRefs", "desc": "No reference field",
|
||||
{"func": "noReferrer",
|
||||
"id": "noRefs", "desc": "No referer field",
|
||||
"bot": 30
|
||||
},
|
||||
{"func": "clientTest", "params": ["brave"],
|
||||
"id": "susClient", "desc": "Client identifier that is popular with bot networks",
|
||||
"bot": 10
|
||||
},
|
||||
{"func": "combTest", "params": [["macos", "chrome"]],
|
||||
"id": "unusualPC", "desc": "Unusual combination of platform and client",
|
||||
"bot": 10
|
||||
},
|
||||
{"func": "combTest", "params": [["macos", "chromeold"],["macosold", "brave"],["winold", "edge"],["winold", "brave"]],
|
||||
"id": "suspPC", "desc": "Suspicious combination of platform and client",
|
||||
"bot": 30
|
||||
},
|
||||
{"func": "combTest", "params": [["macos", "msie"], ["win10", "safari"]],
|
||||
"id": "impPC", "desc": "Impossible combination of platform and client",
|
||||
"bot": 100
|
||||
}
|
||||
],
|
||||
"ipRanges": [
|
||||
{"from": "8.127.0.0", "to": "8.223.255.255", "isp": "alibaba", "loc":"cn"},
|
||||
{"from": "24.240.0.0", "to": "24.243.255.255", "isp": "charter", "loc":"us"},
|
||||
{"from": "27.106.0.0", "to": "27.106.127.255", "isp": "huawei", "loc":"hk"},
|
||||
{"from": "45.224.0.0", "to": "45.227.195.255", "isp": "various", "loc": "br"},
|
||||
{"from": "46.250.160.0", "to": "46.250.191.255", "isp": "huawei", "loc":"mx"},
|
||||
{"from": "94.74.64.0", "to": "94.74.127.255", "isp": "huawei", "loc":"hk"},
|
||||
{"from": "101.0.0.0", "to": "119.255.255.255", "isp": "sinnet", "loc":"cn"},
|
||||
{"from": "111.119.192.0", "to": "111.119.255.255", "isp": "huawei", "loc":"sg"},
|
||||
{"from": "119.0.0.0", "to": "101.207.255.255", "isp": "unicom", "loc":"cn"},
|
||||
{"from": "121.91.168.0", "to": "121.91.175.255", "isp": "huawei", "loc":"hk"},
|
||||
{"from": "122.8.0.0", "to": "122.8.255.255", "isp": "cnisp", "loc":"cn"},
|
||||
{"from": "122.9.0.0", "to": "122.9.255.255", "isp": "huawei", "loc":"cn"},
|
||||
{"from": "124.243.128.0", "to": "124.243.191.255", "isp": "huawei", "loc":"sg"},
|
||||
{"from": "150.40.128.0", "to": "150.40.255.255", "isp": "huawei", "loc":"hk"},
|
||||
{"from": "159.138.0.0", "to": "159.138.225.255", "isp": "huawei", "loc":"th"},
|
||||
{"from": "166.108.192.0", "to": "166.108.255.255", "isp": "huawei", "loc":"sg"},
|
||||
{"from": "177.0.0.0", "to": "177.223.255.255", "isp": "ths", "loc":"br"},
|
||||
{"from": "183.87.32.0", "to": "183.87.159.255", "isp": "huawei", "loc":"hk"},
|
||||
{"from": "187.180.0.0", "to": "187.183.255.255", "isp": "claro", "loc":"br"},
|
||||
{"from": "188.239.0.0", "to": "188.239.63.255", "isp": "huawei", "loc":"sg"},
|
||||
{"from": "189.1.192.0", "to": "189.1.255.255", "isp": "huawei", "loc":"hk"},
|
||||
{"from": "189.76.0.0", "to": "189.127.255.255", "isp": "hardonline", "loc":"br"},
|
||||
{"from": "190.92.192.0", "to": "190.92.255.255", "isp": "huawei", "loc":"hk"},
|
||||
{"from": "2804:0:0:0:0:0:0:0", "to": "2804:FFFF:FFFF:FFFF:FFFF:FFFF:FFFF:FFFF", "isp": "inspire", "loc":"br"}
|
||||
]
|
||||
}
|
||||
@@ -1,7 +1,7 @@
|
||||
base botmon
|
||||
author Sascha Leib
|
||||
email ad@hominem.com
|
||||
date 2025-09-05
|
||||
date 2025-09-06
|
||||
name Bot Monitoring
|
||||
desc Live monitoring of bot traffic on your DokuWiki instance (under development)
|
||||
url https://www.dokuwiki.org/plugin:botmon
|
||||
|
||||
192
script.js
192
script.js
@@ -1,7 +1,7 @@
|
||||
"use strict";
|
||||
/* DokuWiki BotMon Plugin Script file */
|
||||
/* 04.09.2025 - 0.1.8 - pre-release */
|
||||
/* Authors: Sascha Leib <ad@hominem.info> */
|
||||
/* 06.09.2025 - 0.2.0 - beta */
|
||||
/* Author: Sascha Leib <ad@hominem.info> */
|
||||
|
||||
// enumeration of user types:
|
||||
const BM_USERTYPE = Object.freeze({
|
||||
@@ -39,7 +39,6 @@ const BotMon = {
|
||||
|
||||
/* internal tools */
|
||||
t: {
|
||||
|
||||
/* helper function to call inits of sub-objects */
|
||||
_callInit: function(obj) {
|
||||
//console.info('BotMon.t._callInit(obj=',obj,')');
|
||||
@@ -92,6 +91,15 @@ const BotMon = {
|
||||
console.error(e);
|
||||
}
|
||||
return r;
|
||||
},
|
||||
|
||||
/* helper to convert an ip address string to a normalised format: */
|
||||
_ip2Num: function(ip) {
|
||||
if (ip.indexOf(':') > 0) { /* IP6 */
|
||||
return (ip.split(':').map(d => ('0000'+d).slice(-4) ).join(''));
|
||||
} else { /* IP4 */
|
||||
return Number(ip.split('.').map(d => ('000'+d).slice(-3) ).join(''));
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
@@ -126,6 +134,9 @@ BotMon.live = {
|
||||
|
||||
// set the flags:
|
||||
switch(file) {
|
||||
case 'rules':
|
||||
data._dispatchRulesLoaded = true;
|
||||
break;
|
||||
case 'bots':
|
||||
data._dispatchBotsLoaded = true;
|
||||
break;
|
||||
@@ -135,9 +146,6 @@ BotMon.live = {
|
||||
case 'platforms':
|
||||
data._dispatchPlatformsLoaded = true;
|
||||
break;
|
||||
case 'rules':
|
||||
data._dispatchRulesLoaded = true;
|
||||
break;
|
||||
default:
|
||||
// ignore
|
||||
}
|
||||
@@ -226,7 +234,7 @@ BotMon.live = {
|
||||
if ( v.id == visitor.id) { /* match the pre-defined IDs */
|
||||
return v;
|
||||
} else if (v.ip == visitor.ip && v.agent == visitor.agent) {
|
||||
console.warn("Visitor ID not found, using matchin IP + User-Agent instead.");
|
||||
console.warn(`Visitor ID “${v.id}” not found, using matchin IP + User-Agent instead.`);
|
||||
return v;
|
||||
}
|
||||
|
||||
@@ -353,7 +361,7 @@ BotMon.live = {
|
||||
// find the visit info:
|
||||
let visitor = model.findVisitor(dat);
|
||||
if (!visitor) {
|
||||
console.warn(`No visitor with ID ${dat.id}, registering a new one.`);
|
||||
console.info(`No visitor with ID “${dat.id}” found, registering as a new one.`);
|
||||
visitor = model.registerVisit(dat, type);
|
||||
}
|
||||
if (visitor) {
|
||||
@@ -379,11 +387,20 @@ BotMon.live = {
|
||||
|
||||
// helper function to create a new "page view" item:
|
||||
_makePageView: function(data, type) {
|
||||
|
||||
// try to parse the referrer:
|
||||
let rUrl = null;
|
||||
try {
|
||||
rUrl = ( data.ref && data.ref !== '' ? new URL(data.ref) : null );
|
||||
} catch (e) {
|
||||
console.info(`Invalid referer: “${data.ref}”.`);
|
||||
}
|
||||
|
||||
return {
|
||||
_by: type,
|
||||
ip: data.ip,
|
||||
pg: data.pg,
|
||||
ref: data.ref || '',
|
||||
_ref: rUrl,
|
||||
_firstSeen: data.ts,
|
||||
_lastSeen: data.ts,
|
||||
_seenBy: [type],
|
||||
@@ -690,6 +707,22 @@ BotMon.live = {
|
||||
this._threshold = json.threshold;
|
||||
}
|
||||
|
||||
if (json.ipRanges) {
|
||||
// clean up the IPs first:
|
||||
let list = [];
|
||||
json.ipRanges.forEach( it => {
|
||||
let item = {
|
||||
'from': BotMon.t._ip2Num(it.from),
|
||||
'to': BotMon.t._ip2Num(it.to),
|
||||
'isp': it.isp,
|
||||
'loc': it.loc
|
||||
};
|
||||
list.push(item);
|
||||
});
|
||||
|
||||
this._botIPs = list;
|
||||
}
|
||||
|
||||
this._ready = true;
|
||||
|
||||
} catch (error) {
|
||||
@@ -773,7 +806,9 @@ BotMon.live = {
|
||||
|
||||
// client does not use JavaScript:
|
||||
noJavaScript: function(visitor) {
|
||||
return (visitor._jsClient === false);
|
||||
|
||||
return !(visitor._seenBy.includes('log') || visitor._seenBy.includes('tck'));
|
||||
|
||||
},
|
||||
|
||||
// are there at lest num pages loaded?
|
||||
@@ -787,10 +822,75 @@ BotMon.live = {
|
||||
return !visitor._seenBy.includes('tck');
|
||||
},
|
||||
|
||||
// there are no references in any of the page visits:
|
||||
noReferences: function(visitor) {
|
||||
return (visitor._hasReferrer === true);
|
||||
// there are no referrers in any of the page visits:
|
||||
noReferrer: function(visitor) {
|
||||
|
||||
let r = false; // return value
|
||||
for (let i = 0; i < visitor._pageViews.length; i++) {
|
||||
if (!visitor._pageViews[i]._ref) {
|
||||
r = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
return r;
|
||||
},
|
||||
|
||||
// test for specific client identifiers:
|
||||
clientTest: function(visitor, ...list) {
|
||||
|
||||
for (let i=0; i<list.length; i++) {
|
||||
if (visitor._client.id == list[i]) {
|
||||
return true
|
||||
}
|
||||
};
|
||||
return false;
|
||||
},
|
||||
|
||||
// unusual combinations of PLatform and Client:
|
||||
combTest: function(visitor, ...combinations) {
|
||||
|
||||
for (let i=0; i<combinations.length; i++) {
|
||||
|
||||
if (visitor._platform.id == combinations[i][0]
|
||||
&& visitor._client.id == combinations[i][1]) {
|
||||
return true
|
||||
}
|
||||
};
|
||||
|
||||
return false;
|
||||
},
|
||||
|
||||
// is the IP address from a known bot network?
|
||||
fromKnownBotIP: function(visitor) {
|
||||
|
||||
const ipInfo = BotMon.live.data.rules.getBotIPInfo(visitor.ip);
|
||||
|
||||
return (ipInfo !== null);
|
||||
}
|
||||
},
|
||||
|
||||
/* known bot IP ranges: */
|
||||
_botIPs: [],
|
||||
|
||||
// return information on a bot IP range:
|
||||
getBotIPInfo: function(ip) {
|
||||
|
||||
// shortcut to make code more readable:
|
||||
const me = BotMon.live.data.rules;
|
||||
|
||||
// convert IP address to easier comparable form:
|
||||
const ipNum = BotMon.t._ip2Num(ip);
|
||||
|
||||
for (let i=0; i < me._botIPs.length; i++) {
|
||||
const ipRange = me._botIPs[i];
|
||||
|
||||
if (ipNum >= ipRange.from && ipNum <= ipRange.to) {
|
||||
return ipRange;
|
||||
}
|
||||
|
||||
};
|
||||
return null;
|
||||
|
||||
}
|
||||
|
||||
},
|
||||
@@ -1066,6 +1166,8 @@ BotMon.live = {
|
||||
const make = BotMon.t._makeElement;
|
||||
|
||||
let ipType = ( data.ip.indexOf(':') >= 0 ? '6' : '4' );
|
||||
const platformName = (data._platform ? data._platform.n : 'Unknown');
|
||||
const clientName = (data._client ? data._client.n: 'Unknown');
|
||||
|
||||
const li = make('li'); // root list item
|
||||
const details = make('details');
|
||||
@@ -1073,10 +1175,6 @@ BotMon.live = {
|
||||
details.appendChild(summary);
|
||||
|
||||
const span1 = make('span'); /* left-hand group */
|
||||
|
||||
const platformName = (data._platform ? data._platform.n : 'Unknown');
|
||||
const clientName = (data._client ? data._client.n: 'Unknown');
|
||||
|
||||
if (data._type == BM_USERTYPE.KNOWN_BOT) { /* Bot only */
|
||||
|
||||
const botName = ( data._bot && data._bot.n ? data._bot.n : "Unknown");
|
||||
@@ -1123,7 +1221,22 @@ BotMon.live = {
|
||||
|
||||
summary.appendChild(span2);
|
||||
|
||||
// create expanable section:
|
||||
// add details expandable section:
|
||||
details.appendChild(BotMon.live.gui.lists._makeVisitorDetails(data, type));
|
||||
|
||||
li.appendChild(details);
|
||||
return li;
|
||||
},
|
||||
|
||||
_makeVisitorDetails: function(data, type) {
|
||||
|
||||
// shortcut for neater code:
|
||||
const make = BotMon.t._makeElement;
|
||||
|
||||
let ipType = ( data.ip.indexOf(':') >= 0 ? '6' : '4' );
|
||||
if (data.ip == '127.0.0.1' || data.ip == '::1' ) ipType = '0';
|
||||
const platformName = (data._platform ? data._platform.n : 'Unknown');
|
||||
const clientName = (data._client ? data._client.n: 'Unknown');
|
||||
|
||||
const dl = make('dl', {'class': 'visitor_details'});
|
||||
|
||||
@@ -1171,7 +1284,7 @@ BotMon.live = {
|
||||
}
|
||||
|
||||
dl.appendChild(make('dt', {}, "User-Agent:"));
|
||||
dl.appendChild(make('dd', {'class': 'agent' + ipType}, data.agent));
|
||||
dl.appendChild(make('dd', {'class': 'agent'}, data.agent));
|
||||
|
||||
dl.appendChild(make('dt', {}, "Visitor Type:"));
|
||||
dl.appendChild(make('dd', undefined, data._type ));
|
||||
@@ -1182,6 +1295,8 @@ BotMon.live = {
|
||||
dl.appendChild(make('dt', {}, "Visited pages:"));
|
||||
const pagesDd = make('dd', {'class': 'pages'});
|
||||
const pageList = make('ul');
|
||||
|
||||
/* list all page views */
|
||||
data._pageViews.forEach( (page) => {
|
||||
const pgLi = make('li');
|
||||
|
||||
@@ -1191,10 +1306,16 @@ BotMon.live = {
|
||||
visitTimeStr = Math.floor(visitDuration / 1000) + "s";
|
||||
}
|
||||
|
||||
console.log(page);
|
||||
|
||||
pgLi.appendChild(make('span', {}, page.pg));
|
||||
// pgLi.appendChild(make('span', {}, page.ref));
|
||||
pgLi.appendChild(make('span', {}, page.pg)); /* DW Page ID */
|
||||
if (page._ref) {
|
||||
pgLi.appendChild(make('span', {
|
||||
'data-ref': page._ref.host,
|
||||
'title': "Referrer: " + page._ref.full
|
||||
}, page._ref.site));
|
||||
} else {
|
||||
pgLi.appendChild(make('span', {
|
||||
}, "No referer"));
|
||||
}
|
||||
pgLi.appendChild(make('span', {}, ( page._seenBy ? page._seenBy.join(', ') : '—') + '; ' + page._tickCount));
|
||||
pgLi.appendChild(make('span', {}, page._firstSeen.toLocaleString()));
|
||||
pgLi.appendChild(make('span', {}, page._lastSeen.toLocaleString()));
|
||||
@@ -1203,25 +1324,36 @@ BotMon.live = {
|
||||
pagesDd.appendChild(pageList);
|
||||
dl.appendChild(pagesDd);
|
||||
|
||||
/* add bot evaluation: */
|
||||
if (data._eval) {
|
||||
dl.appendChild(make('dt', {}, "Evaluation:"));
|
||||
const evalDd = make('dd');
|
||||
const testList = make('ul',{
|
||||
'class': 'eval'
|
||||
});
|
||||
data._eval.forEach( (test) => {
|
||||
data._eval.forEach( test => {
|
||||
|
||||
const tObj = BotMon.live.data.rules.getRuleInfo(test);
|
||||
const tDesc = tObj ? tObj.desc : test;
|
||||
let tDesc = tObj ? tObj.desc : test;
|
||||
|
||||
// special case for Bot IP range test:
|
||||
if (tObj.func == 'fromKnownBotIP') {
|
||||
const rangeInfo = BotMon.live.data.rules.getBotIPInfo(data.ip);
|
||||
if (rangeInfo) {
|
||||
tDesc += ` (${rangeInfo.isp}, ${rangeInfo.loc.toUpperCase()})`;
|
||||
}
|
||||
}
|
||||
|
||||
// create the entry field
|
||||
const tstLi = make('li');
|
||||
tstLi.appendChild(make('span', {
|
||||
'class': 'test test_' . test
|
||||
}, ( tObj ? tObj.desc : test )));
|
||||
'data-testid': test
|
||||
}, tDesc));
|
||||
tstLi.appendChild(make('span', {}, ( tObj ? tObj.bot : '—') ));
|
||||
testList.appendChild(tstLi);
|
||||
});
|
||||
|
||||
// add total row
|
||||
const tst2Li = make('li', {
|
||||
'class': 'total'
|
||||
});
|
||||
@@ -1232,11 +1364,7 @@ BotMon.live = {
|
||||
evalDd.appendChild(testList);
|
||||
dl.appendChild(evalDd);
|
||||
}
|
||||
|
||||
details.appendChild(dl);
|
||||
|
||||
li.appendChild(details);
|
||||
return li;
|
||||
return dl;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -399,7 +399,7 @@
|
||||
|
||||
ul.eval {
|
||||
li:nth-child(odd) {
|
||||
background-color: #202022;
|
||||
background-color: #333337;
|
||||
}
|
||||
li.total {
|
||||
border-top-color: #CCC;
|
||||
|
||||
Reference in New Issue
Block a user