sample options (original line(s), unique threshold)
[git-grep-footer.git] / git-grep-footer
index 6952508cdb94a4ca84c97857fe60fbbc8c481b5c..906ae929f56f1cb439af2dcb0cc9c7dbbace0e4e 100755 (executable)
@@ -1,16 +1,22 @@
-#!/usr/bin/perl -0 -CO
+#!/usr/bin/perl
 use 5.010;
 use strict;
 use warnings;
+use open ':std', OUT => ':utf8';
 use Encode 'decode';
 use Data::Dump 'pp';
 use Getopt::Long;
 
 GetOptions(\my %opt,
        'debug!',
+       'simplify|s:s',
+       'ignore-case|i!',
+       'min|min-count|unique|u:i',
+       'max|max-count|show|n:i',
 ) or die;
 
 local $| = 1;
+local $/ = "\0";
 
 my $HEADERMATCH = qr/ [a-z]+ (?: (?:-\w+)+ | \ by ) /ix;
 
@@ -48,6 +54,44 @@ while (readline) {
                                next LINE;
                        };
 
+                       push @header, $_ if defined $opt{max};
+
+                       given ($opt{simplify} // 'no') {
+                               when ('strict') {
+                                       $header[1] =~ s{
+                                               \A
+                                               (?: [^:]+ )?
+                                               < [^@>]+ (?: @ | \h?\W? at \W?\h? ) [a-z0-9.-]+ >
+                                               \Z
+                                       }{<...>}imsx;
+                               }
+                               when (['text', '']) {
+                                       when ($header[0] =~ /[ _-] (?: by | to ) $/imsx) {
+                                               $header[1] = undef;
+                                       }
+                                       for ($header[1]) {
+                                               s{\b (https?)://\S+ }{[$1]}gmsx;  # url
+                                               s{(?: < | \A ) [^@>\s]+ @ [^>]+ (?: > | \Z )}{<...>}igmsx;  # address
+                                               s{\b [0-9]+ \b}{[num]}gmsx;  # number
+                                               s{\b I? [0-9a-f]{40} \b}{[sha1]}gmsx;  # hash
+                                       }
+                               }
+                               when (['all', 'any']) {
+                                       $header[1] = undef;
+                               }
+                               when ('no') {
+                               }
+                               default {
+                                       die "Unknown simplify option: '$_'\n";
+                               }
+                       }
+
+                       if ($opt{'ignore-case'}) {
+                               $_ = lc for $header[0], $header[1] // ();
+                       }
+
+                       pop @header if not defined $header[-1];
+
                        push @headers, \@header;
                }
 
@@ -58,7 +102,13 @@ while (readline) {
                }
 
                for (@headers) {
-                       say join ': ', @$_;
+                       if (defined $opt{min} or $opt{max}) {
+                               state $seen;
+                               my $count = $seen->{ $_->[0] }->{ $_->[1] // '' }++;
+                               next if $count >= ($opt{min} // 0) + ($opt{max} || 1);
+                               next if $count < ($opt{min} // 0);
+                       }
+                       say $_->[2] // join(': ', @$_);
                }
 
                last BLOCK;