5 use List::Util qw( min max sum );
6 use open qw( :std :utf8 );
7 use experimental qw( lexical_subs );
11 use Getopt::Long '2.33', qw( :config gnu_getopt );
14 Pod::Usage::pod2usage(-exitval => 0, -perldocopt => '-oman', @_);
19 'C' => sub { $opt{color} = 0 },
23 $opt{anchor} = /^[0-9]+$/ ? qr/(?:\S*\h+){$_}\K/ : qr/$_/;
24 } or die $@ =~ s/(?: at .+)?$/ for option $_[0]/r;
28 'trim|length|l=s' => sub {
29 my ($optname, $optval) = @_;
30 $optval =~ s/%$// and $opt{trimpct}++;
31 $optval =~ m/^-?[0-9]+$/ or die(
32 "Value \"$optval\" invalid for option $optname",
33 " (number or percentage expected)\n"
41 my ($optname, $optval) = @_;
43 ($opt{hidemin}, $opt{hidemax}) =
44 $optval =~ m/\A (?: ([0-9]+)? - )? ([0-9]+)? \z/x or die(
45 "Value \"$optval\" invalid for option limit",
53 'usage|h' => sub { podexit() },
54 'help' => sub { podexit(-verbose => 2) },
55 ) or exit 64; # EX_USAGE
57 $opt{width} ||= $ENV{COLUMNS} || 80;
58 $opt{color} //= -t *STDOUT; # enable on tty
59 $opt{trim} *= $opt{width} / 100 if $opt{trimpct};
60 $opt{units} = [split //, ' kMGTPEZYyzafpnμm'] if $opt{'human-readable'};
61 $opt{anchor} //= qr/\A/;
62 $opt{'value-length'} = 6 if $opt{units};
64 my (@lines, @values, @order);
66 if (defined $opt{interval}) {
75 require Tie::Array::Sorted;
76 tie @order, 'Tie::Array::Sorted', sub { $_[1] <=> $_[0] };
77 } or warn $@, "Expect slowdown with large datasets!\n";
81 $SIG{INT} = 'DEFAULT'; # reset for subsequent attempts
82 'IGNORE' # continue after assumed eof
85 my $valmatch = qr/$opt{anchor} ( \h* -? [0-9]* \.? [0-9]+ (?: e[+-]?[0-9]+ )? |)/x;
88 s/^\h*// unless $opt{unmodified};
89 push @values, s/$valmatch/\n/ && $1;
90 push @order, $1 if length $1;
91 if (defined $opt{trim} and defined $1) {
92 my $trimpos = abs $opt{trim};
96 elsif (length > $trimpos) {
97 substr($_, $trimpos - 1) = '…';
103 $SIG{INT} = 'DEFAULT';
106 $opt{color} and defined $_[0] or return '';
107 return "\e[$_[0]m" if defined wantarray;
108 $_ = color(@_) . $_ . color(0) if defined;
113 state $nr = $opt{hidemin} ? $opt{hidemin} - 1 : 0;
114 @lines and @lines > $nr or return;
116 @lines > $nr or return unless $opt{hidemin};
118 @order = sort { $b <=> $a } @order unless tied @order;
119 my $maxval = ($opt{hidemax} ? max grep { length } @values[0 .. $opt{hidemax} - 1] : $order[0]) // 0;
120 my $minval = min $order[-1] // (), 0;
121 my $lenval = $opt{'value-length'} // max map { length } @order;
122 my $len = defined $opt{trim} && $opt{trim} <= 0 ? -$opt{trim} + 1 :
123 max map { length $values[$_] && length $lines[$_] }
124 0 .. min $#lines, $opt{hidemax} || (); # left padding
125 my $size = ($maxval - $minval) &&
126 ($opt{width} - $lenval - $len) / ($maxval - $minval); # bar multiplication
129 if ($opt{markers} // 1 and $size > 0) {
130 my sub orderpos { (($order[$_[0]] + $order[$_[0] + .5]) / 2 - $minval) * $size }
131 $barmark[ (sum(@order) / @order - $minval) * $size ] = '='; # average
132 $barmark[ orderpos($#order * .31731) ] = '>';
133 $barmark[ orderpos($#order * .68269) ] = '<';
134 $barmark[ orderpos($#order / 2) ] = '+'; # mean
135 $barmark[ -$minval * $size ] = '|' if $minval < 0; # zero
136 color(36) for @barmark;
138 state $lastmax = $maxval;
139 if ($maxval > $lastmax) {
140 print ' ' x ($lenval + $len);
143 ($lastmax - $minval) * $size + .5,
144 '-' x (($values[$nr - 1] - $minval) * $size);
146 say '+' x (($maxval - $lastmax - $minval) * $size + .5);
152 @lines > $nr or return if $opt{hidemin};
155 my $unit = int(log(abs $_[0] || 1) / log(10) - 3*($_[0] < 1) + 1e-15);
156 my $float = $_[0] !~ /^0*[-0-9]{1,3}$/;
158 $float && ($unit % 3) == ($unit < 0), # tenths
159 $_[0] / 1000 ** int($unit/3), # number
160 $#{$opt{units}} * 1.5 < abs $unit ? "e$unit" : $opt{units}->[$unit/3]
164 while ($nr <= $#lines) {
165 $nr >= $opt{hidemax} and last if defined $opt{hidemax};
166 my $val = $values[$nr];
168 my $color = !$opt{color} ? undef :
169 $val == $order[0] ? 32 : # max
170 $val == $order[-1] ? 31 : # min
172 $val = $opt{units} ? sival($val) : sprintf "%*s", $lenval, $val;
173 color($color) for $val;
175 my $line = $lines[$nr] =~ s/\n/$val/r;
176 printf '%-*s', $len + length($val), $line;
177 print $barmark[$_] // '-' for 1 .. $size && (($values[$nr] || 0) - $minval) * $size + .5;
187 if ($opt{hidemin} or $opt{hidemax}) {
189 $opt{hidemax} ||= @lines;
190 printf '%s of ', sum(@values[$opt{hidemin} - 1 .. $opt{hidemax} - 1]) // 0;
193 my $total = sum @order;
194 printf '%s total', $total;
195 printf ' in %d values', scalar @values;
196 printf ' (%s min, %*.*f avg, %s max)',
197 $order[-1], 0, 2, $total / @order, $order[0];
207 barcat - graph to visualize input values
211 B<barcat> [<options>] [<input>]
215 Visualizes relative sizes of values read from input (file(s) or STDIN).
216 Contents are concatenated similar to I<cat>,
217 but numbers are reformatted and a bar graph is appended to each line.
223 =item -c, --[no-]color
225 Force colored output of values and bar markers.
226 Defaults on if output is a tty,
227 disabled otherwise such as when piped or redirected.
229 =item -f, --field=(<number>|<regexp>)
231 Compare values after a given number of whitespace separators,
232 or matching a regular expression.
234 Unspecified or I<-f0> means values are at the start of each line.
235 With I<-f1> the second word is taken instead.
236 A string can indicate the starting position of a value
237 (such as I<-f:> if preceded by colons),
238 or capture the numbers itself,
239 for example I<-f'(\d+)'> for the first digits anywhere.
241 =item -H, --human-readable
243 Format values using SI unit prefixes,
244 turning long numbers like I<12356789> into I<12.4M>.
245 Also changes an exponent I<1.602176634e-19> to I<160.2z>.
246 Short integers are aligned but kept without decimal point.
248 =item -t, --interval[=<seconds>]
250 Interval time to output partial progress.
252 =item -l, --length=[-]<size>[%]
254 Trim line contents (between number and bars)
255 to a maximum number of characters.
256 The exceeding part is replaced by an abbreviation sign,
257 unless C<--length=0>.
259 Prepend a dash (i.e. make negative) to enforce padding
260 regardless of encountered contents.
262 =item -L, --limit=(<count>|<start>-[<end>])
264 Stop output after a number of lines.
265 All input is still counted and analyzed for statistics,
266 but disregarded for padding and bar size.
270 Statistical positions to indicate on bars.
271 Cannot be customized yet,
272 only disabled by providing an empty argument.
274 Any value enables all marker characters:
281 the sum of all values divided by the number of counted lines.
286 the middle value or average between middle values.
290 Standard deviation left of the mean.
291 Only 16% of all values are lower.
295 Standard deviation right of the mean.
296 The part between B<< <--> >> encompass all I<normal> results,
297 or 68% of all entries.
303 Total statistics after all data.
305 =item -u, --unmodified
307 Do not strip leading whitespace.
308 Keep original value alignment, which may be significant in some programs.
310 =item --value-length=<size>
312 Reserved space for numbers.
314 =item -w, --width=<columns>
316 Override the maximum number of columns to use.
317 Appended graphics will extend to fill up the entire screen.
325 seq 30 | awk '{print sin($1/10)}' | barcat
327 Compare file sizes (with human-readable numbers):
329 du -d0 -b * | barcat -H
331 Memory usage of user processes with long names truncated:
333 ps xo %mem,pid,cmd | barcat -l40
335 Monitor network latency from prefixed results:
337 ping google.com | barcat -f'time=\K' -t
339 Commonly used after counting, for example users on the current server:
341 users | sed 's/ /\n/g' | sort | uniq -c | barcat
343 Letter frequencies in text files:
345 cat /usr/share/games/fortunes/*.u8 |
346 perl -CS -nE 'say for grep length, split /\PL*/, uc' |
347 sort | uniq -c | barcat
349 Number of HTTP requests per day:
351 cat log/access.log | cut -d\ -f4 | cut -d: -f1 | uniq -c | barcat
353 Any kind of database query with counts, preserving returned alignment:
355 echo 'SELECT count(*),schemaname FROM pg_tables GROUP BY 2' |
358 External datasets, like movies per year:
360 curl https://github.com/prust/wikipedia-movie-data/raw/master/movies.json |
361 perl -054 -nlE 'say if s/^"year"://' | uniq -c | barcat
363 But please get I<jq> to process JSON
364 and replace the manual selection by C<< jq '.[].year' >>.
366 Pokémon height comparison:
368 curl https://github.com/Biuni/PokemonGO-Pokedex/raw/master/pokedex.json |
369 jq -r '.pokemon[] | [.height,.num,.name] | join(" ")' | barcat
371 USD/EUR exchange rate from CSV provided by the ECB:
373 curl https://sdw.ecb.europa.eu/export.do \
374 -Gd 'node=SEARCHRESULTS&q=EXR.D.USD.EUR.SP00.A&exportType=csv' |
375 grep '^[12]' | barcat -f',\K' --value-length=7
377 Total population history from the World Bank dataset (XML):
378 External datasets, like total population in XML from the World Bank:
380 curl http://api.worldbank.org/v2/country/1W/indicator/SP.POP.TOTL |
381 xmllint --xpath '//*[local-name()="date" or local-name()="value"]' - |
382 sed -r 's,</wb:value>,\n,g; s,(<[^>]+>)+, ,g' | barcat -f1 -H
384 And of course various Git statistics, such commit count by year:
386 git log --pretty=%ci | cut -b-4 | uniq -c | barcat
388 Or the top 3 most frequent authors with statistics over all:
390 git shortlog -sn | barcat -L3 -s
392 Activity of the last days:
394 ( git log --pretty=%ci --since=30day | cut -b-10
395 seq 0 30 | xargs -i date +%F -d-{}day ) |
396 sort | uniq -c | awk '$1--' | graph
400 Mischa POSLAWSKY <perl@shiar.org>