diff --git a/.gitignore b/.gitignore index cbf6a5e..a377e1b 100644 --- a/.gitignore +++ b/.gitignore @@ -2,5 +2,5 @@ logs/*.log.txt logs/*.srv.txt logs/*.tck.txt -config/user-config.json +config/user-*.json php_errors.log diff --git a/README.md b/README.md index 547d435..5c9c822 100644 --- a/README.md +++ b/README.md @@ -1,10 +1,8 @@ # DokuWiki Bot Monitoring Plugin Plugin for live-monitoring your DokuWiki instance for bot activity -IMPORTANT: This is an experimental plugin to investigate bot traffic. This is not a "install and forget" software, but rather requires that you actively look at and manage the log files. +IMPORTANT: This is an experimental plugin to investigate bot traffic. This is not a "install and forget" software, but rather requires that you actively look at and manage data for this to be useful. -This plugin creates various log files in its own "logs" directory. These files can get quite large, and you should check them actively. +In addition to collecting a lot fo information about bot activity on your server, it now also has a simple Captcha function that you can use to block off bots from downloading your precious content. It is however advisable to only activate this after you already have a better understanding of your own site's traffic patterns (both by bots and by humans) to avoid over-blocking legitimate users. -Also, these files can get quite large and fill up your server. Make sure to manually delete older files from time to time! - -For more information, please see the DokuWiki Plugin page at: https://www.dokuwiki.org/plugin:botmon +For more information, please see the DokuWiki Plugin page at: https://www.dokuwiki.org/plugin:botmon and the documentation found at: https://leib.be/sascha/projects/dokuwiki/botmon/index diff --git a/action.php b/action.php index ea97afa..c4cd017 100644 --- a/action.php +++ b/action.php @@ -251,7 +251,7 @@ class action_plugin_botmon extends DokuWiki_Action_Plugin { echo DOKU_TAB . DOKU_TAB . "cj.async=true;cj.defer=true;cj.type='text/javascript';" . NL; echo DOKU_TAB . DOKU_TAB . "cj.src='".DOKU_BASE."lib/plugins/botmon/captcha.js';" . NL; echo DOKU_TAB . DOKU_TAB . "document.getElementsByTagName('head')[0].appendChild(cj);" . NL; - echo DOKU_TAB . "});"; + echo DOKU_TAB . "});" . NL; // add the translated strings for the captcha: echo DOKU_TAB . '$BMLocales = {' . NL; diff --git a/admin.css b/admin.css index af620f0..cd22926 100644 --- a/admin.css +++ b/admin.css @@ -653,6 +653,7 @@ & { display: grid; grid-template-columns: min-content auto; + gap: .25em .5em; border-left: transparent none 0; margin: 0 .5rem .25rem 0; } diff --git a/admin.js b/admin.js index 751eb9d..d49d22a 100644 --- a/admin.js +++ b/admin.js @@ -161,6 +161,7 @@ const BotMon = { var bg = b; var sm = a; + if (a == 0 || b == 0) return '—'; if (a > b) { var bg = a; var sm = b; @@ -291,7 +292,8 @@ BotMon.live = { // shortcut to make code more readable: const model = BotMon.live.data.model; - //const timeout = 60 * 60 * 1000; // session timeout: One hour + // combine Bot networks to one visitor? + const combineNets = (BMSettings.hasOwnProperty('combineNets') ? BMSettings['combineNets'] : true);; if (visitor._type == BM_USERTYPE.KNOWN_BOT) { // known bots match by their bot ID: @@ -304,6 +306,23 @@ BotMon.live = { } } + } else if (combineNets && visitor.hasOwnProperty('_ipRange')) { // combine with other visits from the same range + + let nonRangeVisitor = null; + + for (let i=0; i { - // count visits and page views: - this.data.totalVisits += 1; - this.data.totalPageViews += v._viewCount; - + const captchaStr = v._captcha._str(); + + // count total visits and page views: + data.visits.total += 1; + data.loads.total += v._loadCount; + data.views.total += v._viewCount; + // check for typical bot aspects: let botScore = 0; if (v._type == BM_USERTYPE.KNOWN_BOT) { // known bots - this.data.bots.known += v._viewCount; + data.visits.bots += 1; + data.views.bots += v._viewCount; this.groups.knownBots.push(v); + // captcha counter + if (captchaStr == 'Y') { + data.captcha.bots_blocked += 1; + } else if (captchaStr == 'YN') { + data.captcha.bots_passed += 1; + } else if (captchaStr == 'W') { + data.captcha.bots_whitelisted += 1; + } + } else if (v._type == BM_USERTYPE.KNOWN_USER) { // known users */ - this.data.bots.users += v._viewCount; + data.visits.users += 1; + data.views.users += v._viewCount; this.groups.users.push(v); } else { @@ -627,30 +690,54 @@ BotMon.live = { v._botVal = e.val; if (e.isBot) { // likely bots + v._type = BM_USERTYPE.LIKELY_BOT; - this.data.bots.suspected += v._viewCount; + data.visits.suspected += 1; + data.views.suspected += v._viewCount; this.groups.suspectedBots.push(v); + + // captcha counter + if (captchaStr == 'Y') { + data.captcha.sus_blocked += 1; + } else if (captchaStr == 'YN') { + data.captcha.sus_passed += 1; + } else if (captchaStr == 'W') { + data.captcha.sus_whitelisted += 1; + } + } else { // probably humans + v._type = BM_USERTYPE.PROBABLY_HUMAN; - this.data.bots.human += v._viewCount; + data.visits.humans += 1; + data.views.humans += v._viewCount; + this.groups.humans.push(v); + + // captcha counter + if (captchaStr == 'Y') { + data.captcha.humans_blocked += 1; + } else if (captchaStr == 'YN') { + data.captcha.humans_passed += 1; + } } } // perform actions depending on the visitor type: if (v._type == BM_USERTYPE.KNOWN_BOT ) { /* known bots only */ + // no specific actions here. + } else if (v._type == BM_USERTYPE.LIKELY_BOT) { /* probable bots only */ // add bot views to IP range information: me.addToIpRanges(v); - } else { /* humans only */ + } else { /* registered users and probable humans */ // add browser and platform statistics: me.addBrowserPlatform(v); - // add + // add to referrer and pages lists: v._pageViews.forEach( pv => { me.addToRefererList(pv._ref); me.addToPagesList(pv.pg); @@ -1064,7 +1151,6 @@ BotMon.live = { } else { // no known IP range, let's collect necessary information: - // collect basic IP address info: if (ipType == BM_IPVERSION.IPv6) { ipSeg = ipAddr.split(':'); @@ -1604,14 +1690,7 @@ BotMon.live = { fromKnownBotIP: function(visitor) { //console.info('fromKnownBotIP()', visitor.ip); - const ipInfo = BotMon.live.data.ipRanges.match(visitor.ip); - - if (ipInfo) { - visitor._ipInKnownBotRange = true; - visitor._ipRange = ipInfo; - } - - return (ipInfo !== null); + return visitor.hasOwnProperty('_ipRange'); }, // is the page language mentioned in the client's accepted languages? @@ -1910,47 +1989,48 @@ BotMon.live = { */ make: function() { - const data = BotMon.live.data.analytics.data; - const maxItemsPerList = 5; // how many list items to show? + const useCaptcha = BMSettings.useCaptcha || false; const kNoData = '–'; // shown when data is missing + const kSeparator = ' / '; - // shortcut for neater code: + // shortcuts for neater code: const makeElement = BotMon.t._makeElement; + const data = BotMon.live.data.analytics.data; const botsVsHumans = document.getElementById('botmon__today__botsvshumans'); if (botsVsHumans) { - botsVsHumans.appendChild(makeElement('dt', {}, "Page views")); + botsVsHumans.appendChild(makeElement('dt', {}, "Bot statistics")); - for (let i = 0; i <= 5; i++) { + for (let i = 0; i <= ( useCaptcha ? 5 : 3 ); i++) { const dd = makeElement('dd'); let title = ''; let value = ''; switch(i) { case 0: - title = "Known bots:"; - value = data.bots.known || kNoData; + title = "Total (loads / views / visits):"; + value = (data.loads.total || kNoData) + kSeparator + (data.views.total || kNoData) + kSeparator + (data.visits.total || kNoData); break; case 1: - title = "Suspected bots:"; - value = data.bots.suspected || kNoData; + title = "Known bots (views / visits):"; + value = (data.views.bots || kNoData) + kSeparator + (data.visits.bots || kNoData); break; case 2: - title = "Probably humans:"; - value = data.bots.human || kNoData; + title = "Suspected bots (views / visits):"; + value = (data.visits.suspected || kNoData) + kSeparator + (data.views.suspected || kNoData) break; case 3: - title = "Registered users:"; - value = data.bots.users || kNoData; + title = "Bots-humans ratio (views / visits):"; + value = BotMon.t._getRatio(data.views.suspected + data.views.bots, data.views.users + data.views.humans, 100) + kSeparator + BotMon.t._getRatio(data.visits.suspected + data.visits.bots, data.visits.users + data.visits.humans, 100); break; case 4: - title = "Total:"; - value = data.totalPageViews || kNoData; + title = "Known bots blocked / passed / whitelisted:"; + value = data.captcha.bots_blocked + kSeparator + data.captcha.bots_passed + kSeparator + data.captcha.bots_whitelisted; break; case 5: - title = "Bots-humans ratio:"; - value = BotMon.t._getRatio(data.bots.suspected + data.bots.known, data.bots.users + data.bots.human, 100); + title = "Suspected bots blocked / passed / whitelisted:"; + value = data.captcha.sus_blocked + kSeparator + data.captcha.sus_passed + kSeparator + data.captcha.sus_whitelisted; break; default: console.warn(`Unknown list type ${i}.`); @@ -2007,7 +2087,7 @@ BotMon.live = { const wmoverview = document.getElementById('botmon__today__wm_overview'); if (wmoverview) { - const humanVisits = BotMon.live.data.analytics.groups.users.length + BotMon.live.data.analytics.groups.humans.length; + const humanVisits = data.views.total; const bounceRate = Math.round(100 * (BotMon.live.data.analytics.getBounceCount('users') + BotMon.live.data.analytics.getBounceCount('humans')) / humanVisits); wmoverview.appendChild(makeElement('dt', {}, "Humans’ metrics")); @@ -2017,20 +2097,20 @@ BotMon.live = { let value = ''; switch(i) { case 0: - title = "Registered users’ page views:"; - value = data.bots.users || kNoData; + title = "Registered users (views / visits):"; + value = (data.views.users || kNoData) + kSeparator + (data.visits.users || kNoData); break; case 1: - title = "“Probably humans” page views:"; - value = data.bots.human || kNoData; + title = "Probably humans (views / visits):"; + value = (data.views.humans || kNoData) + kSeparator + (data.visits.humans || kNoData); break; case 2: title = "Total human page views:"; - value = (data.bots.users + data.bots.human) || kNoData; + value = (data.views.users + data.views.humans) || kNoData; break; case 3: title = "Total human visits:"; - value = humanVisits || kNoData; + value = data.views.total || kNoData; break; case 4: title = "Humans’ bounce rate:"; @@ -2292,6 +2372,10 @@ BotMon.live = { const sumClass = ( !data._seenBy || data._seenBy.indexOf(BM_LOGTYPE.SERVER) < 0 ? 'noServer' : 'hasServer'); + // combine with other networks? + const combineNets = (BMSettings.hasOwnProperty('combineNets') ? BMSettings['combineNets'] : true) + && data.hasOwnProperty('_ipRange'); + const li = make('li'); // root list item const details = make('details'); const summary = make('summary', { @@ -2301,17 +2385,17 @@ BotMon.live = { const span1 = make('span'); /* left-hand group */ - if (data._type !== BM_USERTYPE.KNOWN_BOT) { /* No platform/client for bots */ - span1.appendChild(make('span', { /* Platform */ + /*if (data._type !== BM_USERTYPE.KNOWN_BOT) { // No platform/client for bots // disabled because no longer relevant + span1.appendChild(make('span', { // Platform 'class': 'icon_only platform pf_' + (data._platform ? data._platform.id : 'unknown'), 'title': "Platform: " + platformName }, platformName)); - span1.appendChild(make('span', { /* Client */ + span1.appendChild(make('span', { // Client 'class': 'icon_only client client cl_' + (data._client ? data._client.id : 'unknown'), 'title': "Client: " + clientName }, clientName)); - } + }*/ // identifier: if (data._type == BM_USERTYPE.KNOWN_BOT) { /* Bot only */ @@ -2331,16 +2415,22 @@ BotMon.live = { } else { /* others */ - - span1.appendChild(make('span', { // IP-Address - 'class': 'has_icon ipaddr ip' + ipType, - 'title': "IP-Address: " + data.ip - }, data.ip)); + if (combineNets) { - /*span1.appendChild(make('span', { // Internal ID - 'class': 'has_icon session typ_' + data.typ, - 'title': "ID: " + data.id - }, data.id));*/ + const ispName = BotMon.live.data.ipRanges.getOwner( data._ipRange.g ) || data._ipRange.g; + + span1.appendChild(make('span', { // IP-Address + 'class': 'has_icon ipaddr ipnet', + 'title': "IP-Range: " + data._ipRange.g + }, ispName)); + + } else { + + span1.appendChild(make('span', { // IP-Address + 'class': 'has_icon ipaddr ip' + ipType, + 'title': "IP-Address: " + data.ip + }, data.ip)); + } } span1.appendChild(make('span', { /* page views */ @@ -2362,6 +2452,7 @@ BotMon.live = { const span2 = make('span'); /* right-hand group */ // country flag: + if (!combineNets) { // not for combined networks if (data.geo && data.geo !== 'ZZ') { span2.appendChild(make('span', { 'class': 'icon_only country ctry_' + data.geo.toLowerCase(), @@ -2369,21 +2460,22 @@ BotMon.live = { 'title': "Country: " + ( data._country || "Unknown") }, ( data._country || "Unknown") )); } + } - span2.appendChild(make('span', { // seen-by icon: - 'class': 'icon_only seenby sb_' + data._seenBy.join(''), - 'title': "Seen by: " + data._seenBy.join('+') - }, data._seenBy.join(', '))); + span2.appendChild(make('span', { // seen-by icon: + 'class': 'icon_only seenby sb_' + data._seenBy.join(''), + 'title': "Seen by: " + data._seenBy.join('+') + }, data._seenBy.join(', '))); - // captcha status: - const cCode = ( data._captcha ? data._captcha._str() : ''); - if (cCode !== '') { - const cTitle = model._makeCaptchaTitle(data._captcha) - span2.appendChild(make('span', { // captcha status - 'class': 'icon_only captcha cap_' + cCode, - 'title': "Captcha-status: " + cTitle - }, cTitle)); - } + // captcha status: + const cCode = ( data._captcha ? data._captcha._str() : ''); + if (cCode !== '') { + const cTitle = model._makeCaptchaTitle(data._captcha) + span2.appendChild(make('span', { // captcha status + 'class': 'icon_only captcha cap_' + cCode, + 'title': "Captcha-status: " + cTitle + }, cTitle)); + } summary.appendChild(span2); @@ -2569,6 +2661,13 @@ BotMon.live = { dl.appendChild(evalDd); } } + + // for debugging only. Disable on production: + dl.appendChild(make('dt', {}, "Debug info:")); + const dbgDd = make('dd', {'class': 'debug'}); + dbgDd.innerHTML = '
' + JSON.stringify(data, null, 4) + '
'; + dl.appendChild(dbgDd); + // return the element to add to the UI: return dl; }, diff --git a/admin.php b/admin.php index 7a3c3a0..e9eda49 100644 --- a/admin.php +++ b/admin.php @@ -43,7 +43,7 @@ class admin_plugin_botmon extends AdminPlugin { $pluginPath = $conf['basedir'] . 'lib/plugins/' . $this->getPluginName(); /* Plugin Headline */ - echo '
+ echo NL . '

Bot Monitoring Plugin

'; } diff --git a/captcha.js b/captcha.js index f95ddcd..9f32475 100644 --- a/captcha.js +++ b/captcha.js @@ -178,7 +178,8 @@ const $BMCaptcha = { const hash = $BMCaptcha.digest.hash(dat.join('|')); // set the cookie: - document.cookie = "DWConfirm=" + hash + ';path=/;'; + document.cookie = "DWConfirm=" + encodeURIComponent(hash) + ';path=/;hostOnly;session;sameSite=strict;' + + (document.location.protocol === 'https:' ? 'secure;' : ''); } catch (err) { console.error(err); diff --git a/conf/default.php b/conf/default.php index 45cf90c..4ae29b2 100644 --- a/conf/default.php +++ b/conf/default.php @@ -5,7 +5,8 @@ * @author Sascha Leib */ -$conf['showday'] = 'yesterday'; +$conf['showday'] = 'today'; +$conf['combineNets'] = true; $conf['geoiplib'] = 'disabled'; $conf['useCaptcha'] = 'disabled'; $conf['captchaSeed'] = 'c53bc5f94929451987efa6c768d8856b'; diff --git a/conf/metadata.php b/conf/metadata.php index 64e2a86..de95acc 100644 --- a/conf/metadata.php +++ b/conf/metadata.php @@ -8,10 +8,11 @@ $meta['showday'] = array('multichoice', '_choices' => array ('yesterday', 'today')); +$meta['combineNets'] = array('onoff'); + $meta['geoiplib'] = array('multichoice', '_choices' => array ('disabled', 'phpgeoip')); -//$meta['useCaptcha'] = array('onoff'); $meta['useCaptcha'] = array('multichoice', '_choices' => array ('disabled', 'loremipsum', 'dada')); $meta['captchaSeed'] = array('string'); diff --git a/config/default-config.json b/config/default-config.json index 8134cc5..e7aab9e 100644 --- a/config/default-config.json +++ b/config/default-config.json @@ -70,7 +70,7 @@ "bot": 30 }, {"func": "blockedByCaptcha", "params": [], - "id": "blockedByCaptcha", "desc": "Visitor was blocked by captcha", + "id": "blockedByCaptcha", "desc": "Visitor did not solve the captcha", "bot": 20 }, {"func": "whitelistedByCaptcha", "params": [], diff --git a/config/known-ipranges.json b/config/known-ipranges.json index 68dcf4f..9d2956b 100644 --- a/config/known-ipranges.json +++ b/config/known-ipranges.json @@ -1,23 +1,23 @@ { "groups": [ - {"id": "alibaba", "name": "Alibaba"}, - {"id": "amazon", "name": "Amazon"}, + {"id": "alibaba", "name": "Alibaba Network"}, + {"id": "amazon", "name": "Amazon Data Centres"}, {"id": "bezeq", "name": "Bezeq Int."}, {"id": "brasilnet", "name": "BrasilNet"}, - {"id": "charter", "name": "Charter Inc."}, - {"id": "chinanet", "name": "Chinanet"}, - {"id": "cloudflare", "name": "Cloudflare Inc."}, - {"id": "cnisp", "name": "China ISP"}, + {"id": "charter", "name": "Charter Inc. Range"}, + {"id": "chinanet", "name": "ChinaNet"}, + {"id": "cloudflare", "name": "Cloudflare Network"}, + {"id": "cnisp", "name": "China ISP Range"}, {"id": "cnmob", "name": "China Mobile"}, - {"id": "google", "name": "Google LLC"}, + {"id": "google", "name": "Google LLC Network"}, {"id": "hetzner", "name": "Hetzner US"}, - {"id": "huawei", "name": "Huawei"}, + {"id": "huawei", "name": "Huawei Network"}, {"id": "misc_sa", "name": "Misc. SA ISPs"}, - {"id": "tencent", "name": "Tencent"}, + {"id": "tencent", "name": "Tencent Network"}, {"id": "unicom", "name": "China Unicom"}, {"id": "vnpt", "name": "Vietnam Telecom"}, - {"id": "vdsina", "name": "VDSina NL"}, - {"id": "zenlayer", "name": "Zenlayer"} + {"id": "vdsina", "name": "VDSina Network"}, + {"id": "zenlayer", "name": "Zenlayer Network"} ], "ranges": [ {"from": "1.92.0.0", "to": "1.95.255.254", "m": 14, "g": "huawei"}, @@ -56,6 +56,7 @@ {"from": "111.119.192.0", "to": "111.119.255.254", "m": 18, "g": "huawei"}, {"from": "113.160.0.0", "to": "113.191.255.254", "m": 11, "g": "vnpt"}, {"from": "114.208.0.0", "to": "114.223.255.254", "m": 12, "g": "unicom"}, + {"from": "114.119.0.0", "to": "114.119.255.254", "m": 11, "g": "huawei"}, {"from": "114.224.0.0", "to": "114.255.255.254", "m": 11, "g": "unicom"}, {"from": "119.8.0.0", "to": "119.8.255.254", "m": 16, "g": "huawei"}, {"from": "119.13.0.0", "to": "119.13.255.254", "m": 16, "g": "huawei"}, @@ -64,6 +65,7 @@ {"from": "122.9.0.0", "to": "122.9.255.254", "m": 16, "g": "huawei"}, {"from": "123.16.0.0", "to": "123.31.255.254", "m": 12, "g": "vnpt"}, {"from": "124.243.128.0", "to": "124.243.191.254", "m": 18, "g": "huawei"}, + {"from": "136.107.0.0", "to": "136.125.255.254", "m": "+", "g": "google"}, {"from": "138.59.0.0", "to": "138.59.225.254", "m": 16, "g": "misc_sa"}, {"from": "138.121.0.0", "to": "138.121.225.254", "m": 16, "g": "misc_sa"}, {"from": "142.147.128.0", "to": "1142.147.255.254", "m": 17, "g": "w2obj"}, diff --git a/img/addr.png b/img/addr.png index 417982d..6ced149 100644 Binary files a/img/addr.png and b/img/addr.png differ diff --git a/img/captcha.png b/img/captcha.png index 842decf..50f492b 100644 Binary files a/img/captcha.png and b/img/captcha.png differ diff --git a/lang/de/lang.php b/lang/de/lang.php index 0b240a6..1c6bd10 100644 --- a/lang/de/lang.php +++ b/lang/de/lang.php @@ -6,9 +6,9 @@ */ // Captcha dialog locale strings: -$lang['bm_dlgTitle'] = 'Benutzerüberprüfung'; -$lang['bm_dlgSubtitle'] = 'Bitte bestätige, dass du kein Bot bist:'; -$lang['bm_dlgConfirm'] = 'Klicke, um zu bestätigen.'; -$lang['bm_dlgChecking'] = 'Wird überprüft …'; +$lang['bm_dlgTitle'] = 'Benutzerüberprüfung'; +$lang['bm_dlgSubtitle'] = 'Bitte bestätige, dass du kein Bot bist:'; +$lang['bm_dlgConfirm'] = 'Klicke, um zu bestätigen.'; +$lang['bm_dlgChecking'] = 'Wird überprüft …'; $lang['bm_dlgLoading'] = 'Seite wird geladen …'; $lang['bm_dlgError'] = 'Es ist ein Fehler aufgetreten.'; diff --git a/lang/en/settings.php b/lang/en/settings.php index 2cd4700..42a2d20 100644 --- a/lang/en/settings.php +++ b/lang/en/settings.php @@ -9,6 +9,8 @@ $lang['showday'] = 'Which data to show in the “Latest” tab:'; $lang['showday_o_yesterday'] = 'Last full day (yesterday)'; $lang['showday_o_today'] = 'Ongoing logs (today)'; +$lang['combineNets'] = 'Combine visits from known IP-ranges into one entry:'; + $lang['geoiplib'] = 'Add GeoIP Information
(requires PHP module to be installed)'; $lang['geoiplib_o_disabled'] = 'Disabled'; $lang['geoiplib_o_phpgeoip'] = 'Use GeoIP Module';