grep option
[git-grep-footer.git] / git-grep-footer
1 #!/usr/bin/perl
2 use 5.010;
3 use strict;
4 use warnings;
5 use open ':std', OUT => ':utf8';
6 use Encode 'decode';
7 use Data::Dump 'pp';
8 use Getopt::Long qw(:config bundling);
9
10 GetOptions(\my %opt,
11         'debug!',
12         'count|c!',
13         'simplify|s:s',
14         'ignore-case|i!',
15         'fuzzy!',
16         'grep|S=s',
17         'min|min-count|unique|u:i',
18         'max|max-count|show|n:i',
19         'version|V'  => sub { Getopt::Long::VersionMessage() },
20         'usage|h'    => sub { Getopt::Long::HelpMessage() },
21         'help|man|?' => sub { Getopt::Long::HelpMessage(-verbose => 2) },
22 ) or exit 129;
23
24 local $| = 1;
25 local $/ = "\0";
26
27 my $HEADERMATCH = qr/ [a-z]+ (?: (?:-\w+)+ | \ by ) | cc | reference /ix;
28
29 my (%headercount, @headercache);
30
31 while (readline) {
32         s/^([0-9a-f]{4,40})\n//m and
33         my $hash = $1;
34
35         # strip commit seperator
36         chomp;
37         # skip expensive checks without potential identifier
38         m/:/ or next;
39         # try to parse as UTF-8
40         eval { $_ = decode(utf8   => $_, Encode::FB_CROAK()) };
41         # if invalid, assume it's latin1
42                $_ = decode(cp1252 => $_) if $@;
43
44         my %attr;
45
46         BLOCK:
47         for (reverse split /\n\n/) {
48                 my @headers;
49                 my $prefix = 0;
50
51                 LINE:
52                 for (split /\n/) {
53                         next if not /\S/;
54                         my @header = m{
55                                 ^
56                                 (?<key> $HEADERMATCH)
57                                 : \s*
58                                 (?<val> \S .+)
59                                 $
60                         }imx or do {
61                                 $prefix++;
62                                 next LINE;
63                         };
64
65                         push @header, $_ if defined $opt{max};
66
67                         if ($opt{fuzzy}) {
68                                 for ($header[0]) {
69                                         tr/ _/-/;
70
71                                         state $BY = qr{ (?: -? b[yu] )? \Z }ix;
72                                         s{^ si (?:ge?n|n?g) (?:e?[dt])? -? (?:of+)? $BY}{Signed-off-by}ix;
73                                         s{^ ack (?:ed|de)?  $BY}{Acked-by}ix;
74                                         s{^ review (?:e?d)? $BY}{Reviewed-by}ix;
75                                         s{^ teste[dt]       $BY}{Tested-by}ix;
76                                 }
77                         }
78
79                         if (defined $opt{grep}) {
80                                 $_ ~~ qr/$opt{grep}/i or next LINE;
81                         }
82
83                         given ($opt{simplify} // 'none') {
84                                 when (['email', 'authors']) {
85                                         $header[1] =~ s{
86                                                 \A
87                                                 (?: [^:;]+ )?
88                                                 < [^@>]+ (?: @ | \h?\W? at \W?\h? ) [a-z0-9.-]+ >
89                                                 \Z
90                                         }{<...>}imsx;
91                                 }
92                                 when (['var', 'vars', '']) {
93                                         when ($header[0] =~ /[ _-] (?: by | to ) $ | ^cc$/imsx) {
94                                                 $header[1] = undef;
95                                         }
96                                         for ($header[1]) {
97                                                 s{\b (https?)://\S+ }{[$1]}gmsx;  # url
98                                                 s{(?: < | \A ) [^@>\s]+ @ [^>]+ (?: > | \Z )}{<...>}igmsx;  # address
99                                                 s{\b [0-9]+ \b}{[num]}gmsx;  # number
100                                                 s{\b [Ig]? [0-9a-f]{  40} \b}{[sha1]}gmsx;  # hash
101                                                 s{\b [Ig]? [0-9a-f]{6,40} \b}{[hash]}gmsx;  # abbrev
102                                         }
103                                 }
104                                 when (['all', 'contents']) {
105                                         $header[1] = undef;
106                                 }
107                                 when (['none', 'no', '0']) {
108                                 }
109                                 default {
110                                         die "Unknown simplify option: '$_'\n";
111                                 }
112                         }
113
114                         if ($opt{'ignore-case'}) {
115                                 $_ = lc for $header[0], $header[1] // ();
116                         }
117
118                         pop @header if not defined $header[-1];
119
120                         push @headers, \@header;
121                 }
122
123                 next BLOCK if not @headers;
124
125                 if ($opt{debug} and $prefix) {
126                         say sprintf ': invalid lines in %s (%s)', $hash // 'block', $prefix;
127                 }
128
129                 for (@headers) {
130                         my $line = $_->[2] // join(': ', @$_);
131                         $line =~ s/^/$hash / if defined $hash;
132
133                         if (defined $opt{min} or $opt{max} or $opt{count}) {
134                                 my $counter = \$headercount{ $_->[0] }->{ $_->[1] // '' };
135                                 my $excess = $$counter++ - ($opt{min} // 0);
136                                 next if $excess >= ($opt{max} || 1);
137                                 next if $excess <  0;
138                                 if ($opt{count}) {
139                                         push @headercache, [ $line, $excess ? \undef : $counter ];
140                                         next;
141                                 }
142                         }
143                         say $line;
144                 }
145
146                 last BLOCK;
147         }
148 }
149
150 for (@headercache) {
151         say ${$_->[1]} // '', "\t", $_->[0];
152 }
153
154 __END__
155
156 =head1 NAME
157
158 git-grep-footer - Find custom header lines in commit messages
159
160 =head1 SYNOPSIS
161
162 F<git> log --pretty=%b%x00 | F<git-grep-footer> [OPTIONS]
163
164 =head1 DESCRIPTION
165
166 Filters out header sections near the end of a commit body,
167 a common convention to list custom metadata such as
168 C<Signed-off-by> and C<Acked-by>.
169
170 Sections are identified by at least one leading keyword containing a dash
171 (or exceptionally recognised)
172 followed by a colon.
173
174 =head1 OPTIONS
175
176 =over
177
178 =item -i, --ignore-case
179
180 Lowercases everything.
181
182 =item -s, --simplify[=<rule>]
183
184 Modifies values to hide specific details.
185 Several different rules are supported:
186
187 =over
188
189 =item I<var> (default)
190
191 Replaces highly variable contents such as numbers, hashes, and addresses,
192 leaving only exceptional annotations as distinct text.
193 Attributes ending in I<-to> or I<-by> are assumed variable author names
194 and omitted entirely,
195 unless they contain a colon indicating possible attribute exceptions.
196
197 =item I<email>
198
199 Filters out author lines following the git signoff convention,
200 i.e. an <email address> optionally preceded by a name.
201
202 =item I<all>
203
204 Values will be hidden entirely, so only attribute names remain.
205
206 =back
207
208 =item --grep=<pattern>
209
210 Only include lines matching the specified regular expression.
211 Case insensitivity can be disabled by prepending C<(?-i)>.
212
213 =item -u, --unique[=<threshold>]
214
215 Each match is only shown once,
216 optionally after it has already occurred a given amount of times.
217
218 =item -n, --show[=<limit>]
219
220 The original line is given for each match,
221 but simplifications still apply for duplicate determination.
222 Additional samples are optionally given upto the given maximum.
223
224 =item -c, --count
225
226 Prefixes (unique) lines by the number of occurrences.
227 Causes output to be buffered until all input has been read (obviously).
228
229 =back
230
231 =head1 AUTHOR
232
233 Mischa POSLAWSKY <perl@shiar.org>
234
235 =head1 LICENSE
236
237 Copyright. All rights reserved.
238