document count option (and imply unique)
[git-grep-footer.git] / git-grep-footer
index ff279a606bc2ad4cc8545b1e2e14f039b23550ce..b676ab8aef983e241ffb59921849d94075dd889d 100755 (executable)
@@ -9,8 +9,10 @@ use Getopt::Long qw(:config bundling);
 
 GetOptions(\my %opt,
        'debug!',
 
 GetOptions(\my %opt,
        'debug!',
+       'count|c!',
        'simplify|s:s',
        'ignore-case|i!',
        'simplify|s:s',
        'ignore-case|i!',
+       'fuzzy!',
        'min|min-count|unique|u:i',
        'max|max-count|show|n:i',
        'version|V'  => sub { Getopt::Long::VersionMessage() },
        'min|min-count|unique|u:i',
        'max|max-count|show|n:i',
        'version|V'  => sub { Getopt::Long::VersionMessage() },
@@ -21,10 +23,12 @@ GetOptions(\my %opt,
 local $| = 1;
 local $/ = "\0";
 
 local $| = 1;
 local $/ = "\0";
 
-my $HEADERMATCH = qr/ [a-z]+ (?: (?:-\w+)+ | \ by ) /ix;
+my $HEADERMATCH = qr/ [a-z]+ (?: (?:-\w+)+ | \ by ) | cc | reference /ix;
+
+my (%headercount, @headercache);
 
 while (readline) {
 
 while (readline) {
-       s/(.+)\n//m;
+       s/^([0-9a-f]{4,40})\n//m and
        my $hash = $1;
 
        # strip commit seperator
        my $hash = $1;
 
        # strip commit seperator
@@ -36,12 +40,12 @@ while (readline) {
        # if invalid, assume it's latin1
               $_ = decode(cp1252 => $_) if $@;
 
        # if invalid, assume it's latin1
               $_ = decode(cp1252 => $_) if $@;
 
-       my $prefix = 0;
        my %attr;
 
        BLOCK:
        for (reverse split /\n\n/) {
                my @headers;
        my %attr;
 
        BLOCK:
        for (reverse split /\n\n/) {
                my @headers;
+               my $prefix = 0;
 
                LINE:
                for (split /\n/) {
 
                LINE:
                for (split /\n/) {
@@ -59,6 +63,18 @@ while (readline) {
 
                        push @header, $_ if defined $opt{max};
 
 
                        push @header, $_ if defined $opt{max};
 
+                       if ($opt{fuzzy}) {
+                               for ($header[0]) {
+                                       tr/ _/-/;
+
+                                       state $BY = qr{ (?: -? b[yu] )? \Z }ix;
+                                       s{^ si (?:ge?n|n?g) (?:e?[dt])? -? (?:of+)? $BY}{Signed-off-by}ix;
+                                       s{^ ack (?:ed|de)?  $BY}{Acked-by}ix;
+                                       s{^ review (?:e?d)? $BY}{Reviewed-by}ix;
+                                       s{^ teste[dt]       $BY}{Tested-by}ix;
+                               }
+                       }
+
                        given ($opt{simplify} // 'none') {
                                when (['email', 'authors']) {
                                        $header[1] =~ s{
                        given ($opt{simplify} // 'none') {
                                when (['email', 'authors']) {
                                        $header[1] =~ s{
@@ -69,14 +85,15 @@ while (readline) {
                                        }{<...>}imsx;
                                }
                                when (['var', 'vars', '']) {
                                        }{<...>}imsx;
                                }
                                when (['var', 'vars', '']) {
-                                       when ($header[0] =~ /[ _-] (?: by | to ) $/imsx) {
+                                       when ($header[0] =~ /[ _-] (?: by | to ) $ | ^cc$/imsx) {
                                                $header[1] = undef;
                                        }
                                        for ($header[1]) {
                                                s{\b (https?)://\S+ }{[$1]}gmsx;  # url
                                                s{(?: < | \A ) [^@>\s]+ @ [^>]+ (?: > | \Z )}{<...>}igmsx;  # address
                                                s{\b [0-9]+ \b}{[num]}gmsx;  # number
                                                $header[1] = undef;
                                        }
                                        for ($header[1]) {
                                                s{\b (https?)://\S+ }{[$1]}gmsx;  # url
                                                s{(?: < | \A ) [^@>\s]+ @ [^>]+ (?: > | \Z )}{<...>}igmsx;  # address
                                                s{\b [0-9]+ \b}{[num]}gmsx;  # number
-                                               s{\b I? [0-9a-f]{40} \b}{[sha1]}gmsx;  # hash
+                                               s{\b [Ig]? [0-9a-f]{  40} \b}{[sha1]}gmsx;  # hash
+                                               s{\b [Ig]? [0-9a-f]{6,40} \b}{[hash]}gmsx;  # abbrev
                                        }
                                }
                                when (['all', 'contents']) {
                                        }
                                }
                                when (['all', 'contents']) {
@@ -101,23 +118,34 @@ while (readline) {
                next BLOCK if not @headers;
 
                if ($opt{debug} and $prefix) {
                next BLOCK if not @headers;
 
                if ($opt{debug} and $prefix) {
-                       say "infix junk in commit $hash";
+                       say sprintf ': invalid lines in %s (%s)', $hash // 'block', $prefix;
                }
 
                for (@headers) {
                }
 
                for (@headers) {
-                       if (defined $opt{min} or $opt{max}) {
-                               state $seen;
-                               my $count = $seen->{ $_->[0] }->{ $_->[1] // '' }++;
-                               next if $count >= ($opt{min} // 0) + ($opt{max} || 1);
-                               next if $count < ($opt{min} // 0);
+                       my $line = $_->[2] // join(': ', @$_);
+                       $line =~ s/^/$hash / if defined $hash;
+
+                       if (defined $opt{min} or $opt{max} or $opt{count}) {
+                               my $counter = \$headercount{ $_->[0] }->{ $_->[1] // '' };
+                               my $excess = $$counter++ - ($opt{min} // 0);
+                               next if $excess >= ($opt{max} || 1);
+                               next if $excess <  0;
+                               if ($opt{count}) {
+                                       push @headercache, [ $line, $excess ? \undef : $counter ];
+                                       next;
+                               }
                        }
                        }
-                       say $_->[2] // join(': ', @$_);
+                       say $line;
                }
 
                last BLOCK;
        }
 }
 
                }
 
                last BLOCK;
        }
 }
 
+for (@headercache) {
+       say ${$_->[1]} // '', "\t", $_->[0];
+}
+
 __END__
 
 =head1 NAME
 __END__
 
 =head1 NAME
@@ -135,6 +163,7 @@ a common convention to list custom metadata such as
 C<Signed-off-by> and C<Acked-by>.
 
 Sections are identified by at least one leading keyword containing a dash
 C<Signed-off-by> and C<Acked-by>.
 
 Sections are identified by at least one leading keyword containing a dash
+(or exceptionally recognised)
 followed by a colon.
 
 =head1 OPTIONS
 followed by a colon.
 
 =head1 OPTIONS
@@ -182,6 +211,11 @@ The original line is given for each match,
 but simplifications still apply for duplicate determination.
 Additional samples are optionally given upto the given maximum.
 
 but simplifications still apply for duplicate determination.
 Additional samples are optionally given upto the given maximum.
 
+=item -c, --count
+
+Prefixes (unique) lines by the number of occurrences.
+Causes output to be buffered until all input has been read (obviously).
+
 =back
 
 =head1 AUTHOR
 =back
 
 =head1 AUTHOR