X-Git-Url: http://git.shiar.net/git-grep-footer.git/blobdiff_plain/bd2d128ff40122f786010a9dd5a184cd2bc2de89..HEAD:/git-grep-footer diff --git a/git-grep-footer b/git-grep-footer index 8ac3a03..97fbca7 100755 --- a/git-grep-footer +++ b/git-grep-footer @@ -1,4 +1,4 @@ -#!/usr/bin/perl +#!/usr/bin/env perl use 5.010; use strict; use warnings; @@ -7,61 +7,86 @@ use Encode 'decode'; use Data::Dump 'pp'; use Getopt::Long qw(:config bundling); +our $VERSION = '1.00'; + GetOptions(\my %opt, 'debug!', + '', # stdin 'count|c!', 'simplify|s:s', 'ignore-case|i!', + 'fuzzy!', + 'grep|S=s', 'min|min-count|unique|u:i', 'max|max-count|show|n:i', + 'hash|H!', 'version|V' => sub { Getopt::Long::VersionMessage() }, 'usage|h' => sub { Getopt::Long::HelpMessage() }, 'help|man|?' => sub { Getopt::Long::HelpMessage(-verbose => 2) }, ) or exit 129; +my $inputstream = $opt{''} ? \*ARGV : eval { + require Git; + Git::command_output_pipe('log', '-z', '--pretty=format:%h%n%b', @ARGV); +} || die "Automatic git log failed: $@"; + local $| = 1; local $/ = "\0"; -my $HEADERMATCH = qr/ [a-z]+ (?: (?:-\w+)+ | \ by ) /ix; +my $HEADERMATCH = qr/ [a-z]+ (?: (?:-\w+)+ | \ by ) | cc | reference /imsx; my (%headercount, @headercache); -while (readline) { - s/(.+)\n//m; - my $hash = $1; +while (readline $inputstream) { + s/^ ([0-9a-f]{4,40}) \n//msx; + my $hash = $opt{hash} ? $1 : undef; # strip commit seperator chomp; # skip expensive checks without potential identifier m/:/ or next; # try to parse as UTF-8 - eval { $_ = decode(utf8 => $_, Encode::FB_CROAK()) }; + eval { $_ = decode(utf8 => $_, Encode::FB_CROAK()); return 1 } # if invalid, assume it's latin1 - $_ = decode(cp1252 => $_) if $@; - - my $prefix = 0; - my %attr; + or $_ = decode(cp1252 => $_); BLOCK: for (reverse split /\n\n/) { my @headers; + my $prefix = 0; LINE: for (split /\n/) { - next if not /\S/; + next if not m/\S/; my @header = m{ ^ (? $HEADERMATCH) : \s* - (? \S .+) + (? \S [^\n]+) $ - }imx or do { + }imsx or do { $prefix++; next LINE; }; push @header, $_ if defined $opt{max}; + if ($opt{fuzzy}) { + for ($header[0]) { + tr/ _/-/; + + state $BY = qr{ (?: -? b[yu] )? \Z }imsx; + s{\A si (?:ge?n|n?g) (?:e?[dt])? -? (?:of+)? $BY}{Signed-off-by}imsx; + s{\A ack (?:ed|de)? $BY}{Acked-by}imsx; + s{\A review (?:e?d)? $BY}{Reviewed-by}imsx; + s{\A teste[dt] $BY}{Tested-by}imsx; + } + } + + if (defined $opt{grep}) { + $_ ~~ qr/$opt{grep}/im or next LINE; + } + given ($opt{simplify} // 'none') { when (['email', 'authors']) { $header[1] =~ s{ @@ -72,14 +97,15 @@ while (readline) { }{<...>}imsx; } when (['var', 'vars', '']) { - when ($header[0] =~ /[ _-] (?: by | to ) $/imsx) { + when ($header[0] =~ m/[ _-] (?: by | to ) $ | ^cc$/imsx) { $header[1] = undef; } for ($header[1]) { s{\b (https?)://\S+ }{[$1]}gmsx; # url s{(?: < | \A ) [^@>\s]+ @ [^>]+ (?: > | \Z )}{<...>}igmsx; # address s{\b [0-9]+ \b}{[num]}gmsx; # number - s{\b I? [0-9a-f]{40} \b}{[sha1]}gmsx; # hash + s{\b [Ig]? [0-9a-f]{ 40} \b}{[sha1]}gmsx; # hash + s{\b [Ig]? [0-9a-f]{6,40} \b}{[hash]}gmsx; # abbrev } } when (['all', 'contents']) { @@ -104,14 +130,16 @@ while (readline) { next BLOCK if not @headers; if ($opt{debug} and $prefix) { - say "infix junk in commit $hash"; + say sprintf ': invalid lines in %s (%s)', $hash // 'block', $prefix; } for (@headers) { my $line = $_->[2] // join(': ', @$_); - if (defined $opt{min} or $opt{max}) { + $line =~ s/\A/$hash /msx if defined $hash; + + if (defined $opt{min} or $opt{max} or $opt{count}) { my $counter = \$headercount{ $_->[0] }->{ $_->[1] // '' }; - my $excess = $$counter++ - ($opt{min} // 0); + my $excess = ${$counter}++ - ($opt{min} // 0); next if $excess >= ($opt{max} || 1); next if $excess < 0; if ($opt{count}) { @@ -138,7 +166,9 @@ git-grep-footer - Find custom header lines in commit messages =head1 SYNOPSIS -F log --pretty=%b%x00 | F [OPTIONS] +F [OPTIONS] [-- ] + +F log -z --pretty=format:%b | F [OPTIONS] - =head1 DESCRIPTION @@ -147,6 +177,7 @@ a common convention to list custom metadata such as C and C. Sections are identified by at least one leading keyword containing a dash +(or exceptionally recognised) followed by a colon. =head1 OPTIONS @@ -183,6 +214,11 @@ Values will be hidden entirely, so only attribute names remain. =back +=item --grep= + +Only include lines matching the specified regular expression. +Case insensitivity can be disabled by prepending C<(?-i)>. + =item -u, --unique[=] Each match is only shown once, @@ -194,6 +230,42 @@ The original line is given for each match, but simplifications still apply for duplicate determination. Additional samples are optionally given upto the given maximum. +=item -c, --count + +Prefixes (unique) lines by the number of occurrences. +Causes output to be buffered until all input has been read (obviously). + +=item -H, --hash + +Prefixes the SHA1 hash of the (or a) matching commit. + +=back + +=head1 EXAMPLES + +=over + +=item git-grep-footer --grep=^ack v2.6.32..v2.6.33 + +Search for I lines for version I. +Append C<-uin> to skip reoccurrences. + +=item git-grep-footer -u --grep=junio + +Show distinct lines mentioning a specific author. + +=item git-grep-footer -c --simplify --grep=^si + +Compare various capitalisations and (mis)spellings of signoffs. + +=item git-grep-footer -c --simplify=all -i | sort -n -r | head -n10 + +List the ten most frequently used attribute names. + +=item git-grep-footer -n2 -i -s --hash -- --reverse + +The earliest two usages of each distinct identifier. + =back =head1 AUTHOR @@ -202,5 +274,7 @@ Mischa POSLAWSKY =head1 LICENSE -Copyright. All rights reserved. +This software is free software; +you can redistribute and/or modify it under the terms of the GNU GPL +version 2 or later.