explicit implementation of shebang run parameters
[git-grep-footer.git] / git-grep-footer
1 #!/usr/bin/perl
2 use 5.010;
3 use strict;
4 use warnings;
5 use open ':std', OUT => ':utf8';
6 use Encode 'decode';
7 use Data::Dump 'pp';
8 use Getopt::Long;
9
10 GetOptions(\my %opt,
11         'debug!',
12         'simplify|s:s',
13         'unique|u!',
14         'ignore-case|i!',
15 ) or die;
16
17 local $| = 1;
18 local $/ = "\0";
19
20 my $HEADERMATCH = qr/ [a-z]+ (?: (?:-\w+)+ | \ by ) /ix;
21
22 while (readline) {
23         s/(.+)\n//m;
24         my $hash = $1;
25
26         # strip commit seperator
27         chomp;
28         # skip expensive checks without potential identifier
29         m/:/ or next;
30         # try to parse as UTF-8
31         eval { $_ = decode(utf8   => $_, Encode::FB_CROAK()) };
32         # if invalid, assume it's latin1
33                $_ = decode(cp1252 => $_) if $@;
34
35         my $prefix = 0;
36         my %attr;
37
38         BLOCK:
39         for (reverse split /\n\n/) {
40                 my @headers;
41
42                 LINE:
43                 for (split /\n/) {
44                         next if not /\S/;
45                         my @header = m{
46                                 ^
47                                 (?<key> $HEADERMATCH)
48                                 : \s*
49                                 (?<val> \S .+)
50                                 $
51                         }imx or do {
52                                 $prefix++;
53                                 next LINE;
54                         };
55
56                         given ($opt{simplify} // 'no') {
57                                 when ('strict') {
58                                         $header[1] =~ s{
59                                                 \A
60                                                 (?: [^:]+ )?
61                                                 < [^@>]+ (?: @ | \h?\W? at \W?\h? ) [a-z0-9.-]+ >
62                                                 \Z
63                                         }{<...>}imsx;
64                                 }
65                                 when (['text', '']) {
66                                         when ($header[0] =~ /[ _-] (?: by | to ) $/imsx) {
67                                                 pop @header;
68                                         }
69                                         for ($header[1]) {
70                                                 s{\b (https?)://\S+ }{[$1]}gmsx;  # url
71                                                 s{(?: < | \A ) [^@>\s]+ @ [^>]+ (?: > | \Z )}{<...>}igmsx;  # address
72                                                 s{\b [0-9]+ \b}{[num]}gmsx;  # number
73                                                 s{\b I? [0-9a-f]{40} \b}{[sha1]}gmsx;  # hash
74                                         }
75                                 }
76                                 when (['all', 'any']) {
77                                         pop @header;
78                                 }
79                                 when ('no') {
80                                 }
81                                 default {
82                                         die "Unknown simplify option: '$_'\n";
83                                 }
84                         }
85
86                         if ($opt{'ignore-case'}) {
87                                 $_ = lc for @header;
88                         }
89
90                         push @headers, \@header;
91                 }
92
93                 next BLOCK if not @headers;
94
95                 if ($opt{debug} and $prefix) {
96                         say "infix junk in commit $hash";
97                 }
98
99                 for (@headers) {
100                         if ($opt{unique}) {
101                                 state $seen;
102                                 next if $seen->{ $_->[0] }->{ $_->[1] // '' }++;
103                         }
104                         say join ': ', @$_;
105                 }
106
107                 last BLOCK;
108         }
109 }