diff --git a/cleanup.php b/cleanup.php
index ea21193..ea97973 100644
--- a/cleanup.php
+++ b/cleanup.php
@@ -1,27 +1,25 @@
-
BotMon Cleanup Script
-BotMon Cleanup Script
+=====================
+File “{$fName}” – ";
- if ($bName == '' || $bName == 'logfiles') {
- echo " ignored";
- } else if ($bName == $today || $bName == $yesterday) {
- echo " skipped";
+ if ($bName == '' || $bName == 'logfiles') {
+ //echo "File “{$fName}” ignored.";
+ } else if ($bName == $today || $bName == $yesterday) {
+ //echo "File “{$fName}” skipped.\n";
+ } else {
+ if (unlink('logs/' . $file)) {
+ echo "File “{$fName}” deleted.\n";
} else {
- if (unlink('logs/' . $file)) {
- echo "deleted.";
- } else {
- echo " not deleted!";
- }
+ echo " File “{$fName}” could not be deleted!\n";
}
}
- ?>
+}
\ No newline at end of file
diff --git a/data/known-bots.json b/data/known-bots.json
index b886df4..2ce7df2 100644
--- a/data/known-bots.json
+++ b/data/known-bots.json
@@ -7,7 +7,7 @@
{"id": "googlebot",
"n": "Google Bot",
"r": ["Googlebot"],
- "rx": ["\\sGooglebot\\/(\\d+\\.\\d+);"],
+ "rx": ["Googlebot\\/(\\d+\\.\\d+)", "Googlebot-Image\\/(\\d+\\.\\d+)"],
"url": "http://www.google.com/bot.html"
},
{"id": "googleads",
diff --git a/data/known-clients.json b/data/known-clients.json
index ef40b61..5663613 100644
--- a/data/known-clients.json
+++ b/data/known-clients.json
@@ -41,7 +41,7 @@
},
{"n": "Edge",
"id": "msedge",
- "rx": [ "\\sEdg\\/(\\d+)\\.", "\\sEdge\\/(\\d+)" ]
+ "rx": [ "\\sEdg\\/(\\d+)", "\\sEdge\\/(\\d+)", "EdgiOS\\/(\\d+)" ]
},
{"n": "Old Chrome",
"id": "chromeold",
diff --git a/data/rules.json b/data/rules.json
index 8ddb9cf..6124bcb 100644
--- a/data/rules.json
+++ b/data/rules.json
@@ -13,16 +13,16 @@
"id": "oldOS", "desc": "Obsolete platform version",
"bot": 40
},
- {"func": "noJavaScript",
- "id": "noJS", "desc": "JavaScript disabled",
- "bot": 40
- },
{"func": "smallPageCount", "params": [1],
"id": "onePage", "desc": "Visiter viewed only a single page",
"bot": 40
},
- {"func": "noTicks",
- "id": "noTicks", "desc": "Visitor did not spend time reading a page",
+ {"func": "noRecord", "params": ["log"],
+ "id": "noClient", "desc": "No client-side JS log was recorded",
+ "bot": 50
+ },
+ {"func": "noRecord", "params": ["tck"],
+ "id": "noTicks", "desc": "No client ticks were recorded",
"bot": 10
},
{"func": "noReferrer",
@@ -48,33 +48,41 @@
{"func": "combTest", "params": [["macos", "msie"], ["win10", "safari"]],
"id": "impPC", "desc": "Impossible combination of platform and client",
"bot": 100
+ },
+ {"func": "loadSpeed", "params": [3, 20],
+ "id": "speedRun", "desc": "Average time between page loads is less than 20 seconds",
+ "bot": 100
}
],
"ipRanges": [
- {"from": "8.127.0.0", "to": "8.223.255.255", "isp": "alibaba", "loc":"cn"},
- {"from": "24.240.0.0", "to": "24.243.255.255", "isp": "charter", "loc":"us"},
- {"from": "27.106.0.0", "to": "27.106.127.255", "isp": "huawei", "loc":"hk"},
- {"from": "45.224.0.0", "to": "45.227.195.255", "isp": "various", "loc": "br"},
- {"from": "46.250.160.0", "to": "46.250.191.255", "isp": "huawei", "loc":"mx"},
- {"from": "94.74.64.0", "to": "94.74.127.255", "isp": "huawei", "loc":"hk"},
- {"from": "101.0.0.0", "to": "119.255.255.255", "isp": "sinnet", "loc":"cn"},
- {"from": "111.119.192.0", "to": "111.119.255.255", "isp": "huawei", "loc":"sg"},
- {"from": "119.0.0.0", "to": "101.207.255.255", "isp": "unicom", "loc":"cn"},
- {"from": "121.91.168.0", "to": "121.91.175.255", "isp": "huawei", "loc":"hk"},
- {"from": "122.8.0.0", "to": "122.8.255.255", "isp": "cnisp", "loc":"cn"},
- {"from": "122.9.0.0", "to": "122.9.255.255", "isp": "huawei", "loc":"cn"},
- {"from": "124.243.128.0", "to": "124.243.191.255", "isp": "huawei", "loc":"sg"},
- {"from": "150.40.128.0", "to": "150.40.255.255", "isp": "huawei", "loc":"hk"},
- {"from": "159.138.0.0", "to": "159.138.225.255", "isp": "huawei", "loc":"th"},
- {"from": "162.128.0.2505", "to": "162.128.127.255", "isp": "zenlayer", "loc":"sg"},
- {"from": "166.108.192.0", "to": "166.108.255.255", "isp": "huawei", "loc":"sg"},
- {"from": "177.0.0.0", "to": "177.223.255.255", "isp": "ths", "loc":"br"},
- {"from": "183.87.32.0", "to": "183.87.159.255", "isp": "huawei", "loc":"hk"},
- {"from": "187.180.0.0", "to": "187.183.255.255", "isp": "claro", "loc":"br"},
- {"from": "188.239.0.0", "to": "188.239.63.255", "isp": "huawei", "loc":"sg"},
- {"from": "189.1.192.0", "to": "189.1.255.255", "isp": "huawei", "loc":"hk"},
- {"from": "189.76.0.0", "to": "189.127.255.255", "isp": "hardonline", "loc":"br"},
- {"from": "190.92.192.0", "to": "190.92.255.255", "isp": "huawei", "loc":"hk"},
- {"from": "2804:0:0:0:0:0:0:0", "to": "2804:FFFF:FFFF:FFFF:FFFF:FFFF:FFFF:FFFF", "isp": "inspire", "loc":"br"}
+ {"from": "8.127.0.0", "to": "8.223.255.255", "isp": "Alibaba", "loc":"cn"},
+ {"from": "24.240.0.0", "to": "24.243.255.255", "isp": "Charter", "loc":"us"},
+ {"from": "27.106.0.0", "to": "27.106.127.255", "isp": "Huawei", "loc":"hk"},
+ {"from": "34.0.0.0", "to": "34.191.255.255", "isp": "Google LLC", "loc":"zz"},
+ {"from": "45.224.0.0", "to": "45.227.195.255", "isp": "Various BR ISPs", "loc": "br"},
+ {"from": "46.250.160.0", "to": "46.250.191.255", "isp": "Huawei", "loc":"mx"},
+ {"from": "49.0.200.0", "to": "49.0.255.255", "isp": "Huawei", "loc":"sg"},
+ {"from": "94.74.64.0", "to": "94.74.127.255", "isp": "Huawei", "loc":"hk"},
+ {"from": "101.0.0.0", "to": "119.255.255.255", "isp": "SinNet", "loc":"cn"},
+ {"from": "111.119.192.0", "to": "111.119.255.255", "isp": "Huawei", "loc":"sg"},
+ {"from": "119.0.0.0", "to": "101.207.255.255", "isp": "Unicom", "loc":"cn"},
+ {"from": "121.91.168.0", "to": "121.91.175.255", "isp": "Huawei", "loc":"hk"},
+ {"from": "122.8.0.0", "to": "122.8.255.255", "isp": "CN-ISP", "loc":"cn"},
+ {"from": "122.9.0.0", "to": "122.9.255.255", "isp": "Huawei", "loc":"cn"},
+ {"from": "124.243.128.0", "to": "124.243.191.255", "isp": "Huawei", "loc":"sg"},
+ {"from": "150.40.128.0", "to": "150.40.255.255", "isp": "Huawei", "loc":"hk"},
+ {"from": "159.138.0.0", "to": "159.138.225.255", "isp": "Huawei", "loc":"th"},
+ {"from": "162.128.0.0", "to": "162.128.255.255", "isp": "Zenlayer", "loc":"sg"},
+ {"from": "166.108.192.0", "to": "166.108.255.255", "isp": "Huawei", "loc":"sg"},
+ {"from": "177.0.0.0", "to": "177.223.255.255", "isp": "THS", "loc":"br"},
+ {"from": "183.87.32.0", "to": "183.87.159.255", "isp": "Huawei", "loc":"hk"},
+ {"from": "187.180.0.0", "to": "187.183.255.255", "isp": "Claro", "loc":"br"},
+ {"from": "188.239.0.0", "to": "188.239.63.255", "isp": "Huawei", "loc":"sg"},
+ {"from": "189.1.192.0", "to": "189.1.255.255", "isp": "Huawei", "loc":"hk"},
+ {"from": "189.76.0.0", "to": "189.127.255.255", "isp": "Hard Online", "loc":"br"},
+ {"from": "190.92.192.0", "to": "190.92.255.255", "isp": "Huawei", "loc":"hk"},
+ {"from": "2804:0:0:0:0:0:0:0", "to": "2804:FFFF:FFFF:FFFF:FFFF:FFFF:FFFF:FFFF", "isp": "Inspire", "loc":"br"},
+ {"from": "2600:1f00:0000:0000:0000:0000:0000:0000", "to": "2600:1fff:ffff:ffff:ffff:ffff:ffff:ffff", "isp": "Amazon Cloud", "loc":"us"},
+ {"from": "2001:4800:0000:0000:0000:0000:0000:0000", "to": "2001:4fff:ffff:ffff:ffff:ffff:ffff:ffff", "isp": "Rackspace/Google", "loc":"us"}
]
}
\ No newline at end of file
diff --git a/img/fileview.svg b/img/fileview.svg
new file mode 100644
index 0000000..abac089
--- /dev/null
+++ b/img/fileview.svg
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/plugin.info.txt b/plugin.info.txt
index 1505946..3c60c14 100644
--- a/plugin.info.txt
+++ b/plugin.info.txt
@@ -1,7 +1,7 @@
base botmon
author Sascha Leib
email ad@hominem.com
-date 2025-09-06
+date 2025-09-07
name Bot Monitoring
desc Live monitoring of bot traffic on your DokuWiki instance (under development)
url https://www.dokuwiki.org/plugin:botmon
diff --git a/script.js b/script.js
index e64d12d..a7c7b9c 100644
--- a/script.js
+++ b/script.js
@@ -100,6 +100,40 @@ const BotMon = {
} else { /* IP4 */
return Number(ip.split('.').map(d => ('000'+d).slice(-3) ).join(''));
}
+ },
+
+ /* helper function to format a Date object to show only the time. */
+ /* returns String */
+ _formatTime: function(date) {
+
+ if (date) {
+ return ('0'+date.getHours()).slice(-2) + ':' + ('0'+date.getMinutes()).slice(-2) + ':' + ('0'+date.getSeconds()).slice(-2);
+ } else {
+ return null;
+ }
+
+ },
+
+ /* helper function to show a time difference in seconds or minutes */
+ /* returns String */
+ _formatTimeDiff: function(dateA, dateB) {
+
+ // if the second date is ealier, swap them:
+ if (dateA > dateB) dateB = [dateA, dateA = dateB][0];
+
+ // get the difference in milliseconds:
+ let ms = dateB - dateA;
+
+ if (ms > 50) { /* ignore small time spans */
+ const h = Math.floor((ms / (1000 * 60 * 60)) % 24);
+ const m = Math.floor((ms / (1000 * 60)) % 60);
+ const s = Math.floor((ms / 1000) % 60);
+
+ return ( h>0 ? h + 'h ': '') + ( m>0 ? m + 'm ': '') + ( s>0 ? s + 's': '');
+ }
+
+ return null;
+
}
}
};
@@ -208,38 +242,40 @@ BotMon.live = {
// shortcut to make code more readable:
const model = BotMon.live.data.model;
+ const timeout = 60 * 60 * 1000; /* session timeout: One hour */
+
// loop over all visitors already registered:
for (let i=0; i nv.ts) {
visitor._firstSeen = nv.ts;
}
}
@@ -306,6 +342,7 @@ BotMon.live = {
prereg._lastSeen = nv.ts;
// increase view count:
prereg._viewCount += 1;
+ prereg._tickCount += 1;
}
// update referrer state:
@@ -336,7 +373,9 @@ BotMon.live = {
}
if (visitor) {
- visitor._lastSeen = dat.ts;
+ if (visitor._lastSeen < dat.ts) {
+ visitor._lastSeen = dat.ts;
+ }
if (!visitor._seenBy.includes(type)) {
visitor._seenBy.push(type);
}
@@ -355,6 +394,7 @@ BotMon.live = {
prereg = model._makePageView(dat, type);
visitor._pageViews.push(prereg);
}
+ prereg._tickCount += 1;
},
// updating visit data from the ticker log:
@@ -380,7 +420,7 @@ BotMon.live = {
// get the page view info:
let pv = model._getPageView(visitor, dat);
if (!pv) {
- console.warn(`No page view for visit ID ${dat.id}, page ${dat.pg}, registering a new one.`);
+ console.warn(`No page view for visit ID “${dat.id}”, page “${dat.pg}”, registering a new one.`);
pv = model._makePageView(dat, type);
visitor._pageViews.push(pv);
}
@@ -451,6 +491,7 @@ BotMon.live = {
// shortcut to make code more readable:
const model = BotMon.live.data.model;
+ const me = BotMon.live.data.analytics;
BotMon.live.gui.status.showBusy("Analysing data …");
@@ -481,6 +522,11 @@ BotMon.live = {
v._eval = e.rules;
v._botVal = e.val;
+ // add each page view to IP range information (unless it is already from a known bot IP range):
+ v._pageViews.forEach( pv => {
+ me._addToIPRanges(pv.ip);
+ });
+
if (e.isBot) { // likely bots
v._type = BM_USERTYPE.LIKELY_BOT;
this.data.bots.suspected += v._pageViews.length;
@@ -496,9 +542,55 @@ BotMon.live = {
});
BotMon.live.gui.status.hideBusy('Done.');
+ console.log(BotMon.live.data.analytics._ipRange);
+
+ },
+
+ // visits from IP ranges:
+ _ipRange: {
+ ip4: [],
+ ip6: []
+ },
+ /**
+ * Adds a visit to the IP range statistics.
+ *
+ * This helps to identify IP ranges that are used by bots.
+ *
+ * @param {string} ip The IP address to add.
+ */
+ _addToIPRanges: function(ip) {
+
+ const me = BotMon.live.data.analytics;
+ const ipv = (ip.indexOf(':') > 0 ? 6 : 4);
+
+ const ipArr = ip.split( ipv == 6 ? ':' : '.');
+ const maxSegments = (ipv == 6 ? 4 : 3);
+
+ let arr = (ipv == 6 ? me._ipRange.ip6 : me._ipRange.ip4);
+ let it = null;
+ for (let i = 0; i < Math.min(ipArr.length, maxSegments); i++) {
+ it = arr.find( a => { a.seg == ipArr[i]; } );
+ if (!it) {
+ it = {seg: ipArr[i], count: 1};
+ if (i= minItems) {
+ //console.log('loadSpeed', visitor._pageViews.length, minItems, maxTime);
+
+ const pvArr = visitor._pageViews.map(pv => pv._lastSeen).sort();
+
+ let totalTime = 0;
+ for (let i=1; i < pvArr.length; i++) {
+ totalTime += (pvArr[i] - pvArr[i-1]);
+ }
+
+ //console.log(' ', totalTime , Math.round(totalTime / (pvArr.length * 1000)), (( totalTime / pvArr.length ) <= maxTime * 1000), visitor.ip);
+
+ return (( totalTime / pvArr.length ) <= maxTime * 1000);
+ }
}
},
@@ -1051,7 +1159,7 @@ BotMon.live = {
for (let i=0; i < Math.min(bots.length, 4); i++) {
const dd = makeElement('dd');
- dd.appendChild(makeElement('span', {'class': 'bot bot_' + bots[i]._bot.id}, bots[i]._bot.n));
+ dd.appendChild(makeElement('span', {'class': 'bot bot_' + bots[i]._bot.id }, bots[i]._bot.n));
dd.appendChild(makeElement('strong', undefined, bots[i]._pageViews.length));
block.appendChild(dd);
}
@@ -1295,7 +1403,7 @@ BotMon.live = {
dl.appendChild(make('dd', {'class': 'has_icon ip' + data.typ}, data.id));
}
- if ((data._lastSeen - data._firstSeen) < 1) {
+ if (Math.abs(data._lastSeen - data._firstSeen) < 100) {
dl.appendChild(make('dt', {}, "Seen:"));
dl.appendChild(make('dd', {'class': 'seen'}, data._firstSeen.toLocaleString()));
} else {
@@ -1339,16 +1447,28 @@ BotMon.live = {
}, "No referer"));
}
pgLi.appendChild(make('span', {}, ( page._seenBy ? page._seenBy.join(', ') : '—') + '; ' + page._tickCount));
- pgLi.appendChild(make('span', {}, page._firstSeen.toLocaleString()));
- pgLi.appendChild(make('span', {}, page._lastSeen.toLocaleString()));
+ pgLi.appendChild(make('span', {}, BotMon.t._formatTime(page._firstSeen)));
+
+ // get the time difference:
+ const tDiff = BotMon.t._formatTimeDiff(page._firstSeen, page._lastSeen);
+ if (tDiff) {
+ pgLi.appendChild(make('span', {'class': 'visit-length', 'title': 'Last seen: ' + page._lastSeen.toLocaleString()}, tDiff));
+ } else {
+ pgLi.appendChild(make('span', {'class': 'bounce'}, "Bounce"));
+ }
+
pageList.appendChild(pgLi);
});
pagesDd.appendChild(pageList);
dl.appendChild(pagesDd);
- /* add bot evaluation: */
+ /* bot evaluation rating */
+ dl.appendChild(make('dt', undefined, "Bot rating:"));
+ dl.appendChild(make('dd', {'class': 'bot-rating'}, data._botVal + '/' + BotMon.live.data.rules._threshold ));
+
+ /* add bot evaluation details: */
if (data._eval) {
- dl.appendChild(make('dt', {}, "Evaluation:"));
+ dl.appendChild(make('dt', {}, "Bot evaluation details:"));
const evalDd = make('dd');
const testList = make('ul',{
'class': 'eval'
@@ -1379,9 +1499,9 @@ BotMon.live = {
const tst2Li = make('li', {
'class': 'total'
});
- tst2Li.appendChild(make('span', {}, "Total:"));
+ /*tst2Li.appendChild(make('span', {}, "Total:"));
tst2Li.appendChild(make('span', {}, data._botVal));
- testList.appendChild(tst2Li);
+ testList.appendChild(tst2Li);*/
evalDd.appendChild(testList);
dl.appendChild(evalDd);
diff --git a/style.less b/style.less
index 86bd864..bbd9d97 100644
--- a/style.less
+++ b/style.less
@@ -193,9 +193,14 @@
white-space: nowrap;
}
dd {
- grid-column: 2;
- display: inline-block;
- background-color: transparent;
+ & {
+ grid-column: 2;
+ display: inline-block;
+ background-color: transparent;
+ }
+ &.bot-rating {
+ text-align: right;
+ }
}
dd.pages {
& {
@@ -207,6 +212,22 @@
justify-content: space-between;
align-items: center;
}
+ span {
+ &.visit-length {
+ min-width: min-content;
+ }
+ &.bounce {
+ width: 1.25em; height: 1.25em;
+ overflow: hidden;
+ }
+ &.bounce::before {
+ display: inline-block;
+ content: '';
+ width: 1.25em; height: 1.25em;
+ background: transparent url('img/bounce.svg') center no-repeat;
+ background-size: 1.25em;
+ }
+ }
}
}
}