X-Git-Url: http://git.shiar.net/sheet.git/blobdiff_plain/4a28039ff22d6402f923488c0681f640b6fa14a6..b6a3765d4e727ece64b19f82d3ccc1dd573a912a:/tools/mkusage-wikimedia diff --git a/tools/mkusage-wikimedia b/tools/mkusage-wikimedia index a50d899..7a59ba5 100755 --- a/tools/mkusage-wikimedia +++ b/tools/mkusage-wikimedia @@ -4,7 +4,7 @@ use warnings; use Data::Dump 'pp'; -our $VERSION = '1.00'; +our $VERSION = '1.01'; my %BROWSERID = qw( IE ie @@ -25,6 +25,8 @@ my %BROWSERID = qw( Opera-Mini op_mini BlackBerry-WebKit bb UC-Browser and_uc + Samsung-Internet samsung + Other 0 ); my %count = ( @@ -32,15 +34,21 @@ my %count = ( -site => 'https://analytics.wikimedia.org/', ); -my $recent = qr/^2017-/; +my $recent = qr/^2019-/; (readline =~ y/\t//) == 3 or die "unexpected amount of columns in header\n"; while (my $row = readline) { + $row =~ s/\r?\n\z//; my ($date, $name, $version, $pct) = split /\t/, $row; $date =~ $recent or next; $name =~ y/ /-/; - my $browser = $BROWSERID{$name} or next; + my $browser = $BROWSERID{$name}; + if (not $browser) { + warn "unknown browser: $name v$version ($pct)\n" + unless defined $browser or $pct < .005; + next; + } $version =~ s/\A-\z/0/; $count{$browser}{$version} += $pct; $count{-total} += $pct; @@ -59,6 +67,5 @@ __END__ =head1 USAGE - curl https://analytics.wikimedia.org/datasets/periodic/reports/metrics/browser/all_sites_by_browser_family_and_major_percent.tsv | - ./mkusage-wikimedia >browser-usage.inc.pl + ./mkusage-wikimedia wikipedia-analytics.tsv >browser-usage.inc.pl