5 use open ':std', OUT => ':utf8';
20 my $HEADERMATCH = qr/ [a-z]+ (?: (?:-\w+)+ | \ by ) /ix;
26 # strip commit seperator
28 # skip expensive checks without potential identifier
30 # try to parse as UTF-8
31 eval { $_ = decode(utf8 => $_, Encode::FB_CROAK()) };
32 # if invalid, assume it's latin1
33 $_ = decode(cp1252 => $_) if $@;
39 for (reverse split /\n\n/) {
56 given ($opt{simplify} // 'no') {
61 < [^@>]+ (?: @ | \h?\W? at \W?\h? ) [a-z0-9.-]+ >
66 when ($header[0] =~ /[ _-] (?: by | to ) $/imsx) {
70 s{\b (https?)://\S+ }{[$1]}gmsx; # url
71 s{(?: < | \A ) [^@>\s]+ @ [^>]+ (?: > | \Z )}{<...>}igmsx; # address
72 s{\b [0-9]+ \b}{[num]}gmsx; # number
73 s{\b I? [0-9a-f]{40} \b}{[sha1]}gmsx; # hash
76 when (['all', 'any']) {
82 die "Unknown simplify option: '$_'\n";
86 if ($opt{'ignore-case'}) {
90 push @headers, \@header;
93 next BLOCK if not @headers;
95 if ($opt{debug} and $prefix) {
96 say "infix junk in commit $hash";
102 next if $seen->{ $_->[0] }->{ $_->[1] // '' }++;