-#!/usr/bin/perl -0 -CO
+#!/usr/bin/perl
use 5.010;
use strict;
use warnings;
+use open ':std', OUT => ':utf8';
use Encode 'decode';
use Data::Dump 'pp';
-use Getopt::Long;
+use Getopt::Long qw(:config bundling);
GetOptions(\my %opt,
'debug!',
-) or die;
+ 'count|c!',
+ 'simplify|s:s',
+ 'ignore-case|i!',
+ 'min|min-count|unique|u:i',
+ 'max|max-count|show|n:i',
+ 'version|V' => sub { Getopt::Long::VersionMessage() },
+ 'usage|h' => sub { Getopt::Long::HelpMessage() },
+ 'help|man|?' => sub { Getopt::Long::HelpMessage(-verbose => 2) },
+) or exit 129;
local $| = 1;
+local $/ = "\0";
my $HEADERMATCH = qr/ [a-z]+ (?: (?:-\w+)+ | \ by ) /ix;
+my (%headercount, @headercache);
+
while (readline) {
s/(.+)\n//m;
my $hash = $1;
next LINE;
};
+ push @header, $_ if defined $opt{max};
+
+ given ($opt{simplify} // 'none') {
+ when (['email', 'authors']) {
+ $header[1] =~ s{
+ \A
+ (?: [^:;]+ )?
+ < [^@>]+ (?: @ | \h?\W? at \W?\h? ) [a-z0-9.-]+ >
+ \Z
+ }{<...>}imsx;
+ }
+ when (['var', 'vars', '']) {
+ when ($header[0] =~ /[ _-] (?: by | to ) $/imsx) {
+ $header[1] = undef;
+ }
+ for ($header[1]) {
+ s{\b (https?)://\S+ }{[$1]}gmsx; # url
+ s{(?: < | \A ) [^@>\s]+ @ [^>]+ (?: > | \Z )}{<...>}igmsx; # address
+ s{\b [0-9]+ \b}{[num]}gmsx; # number
+ s{\b I? [0-9a-f]{40} \b}{[sha1]}gmsx; # hash
+ }
+ }
+ when (['all', 'contents']) {
+ $header[1] = undef;
+ }
+ when (['none', 'no', '0']) {
+ }
+ default {
+ die "Unknown simplify option: '$_'\n";
+ }
+ }
+
+ if ($opt{'ignore-case'}) {
+ $_ = lc for $header[0], $header[1] // ();
+ }
+
+ pop @header if not defined $header[-1];
+
push @headers, \@header;
}
}
for (@headers) {
- say join ': ', @$_;
+ my $line = $_->[2] // join(': ', @$_);
+ if (defined $opt{min} or $opt{max}) {
+ my $counter = \$headercount{ $_->[0] }->{ $_->[1] // '' };
+ my $excess = $$counter++ - ($opt{min} // 0);
+ next if $excess >= ($opt{max} || 1);
+ next if $excess < 0;
+ if ($opt{count}) {
+ push @headercache, [ $line, $excess ? \undef : $counter ];
+ next;
+ }
+ }
+ say $line;
}
last BLOCK;
}
}
+
+for (@headercache) {
+ say ${$_->[1]} // '', "\t", $_->[0];
+}
+
+__END__
+
+=head1 NAME
+
+git-grep-footer - Find custom header lines in commit messages
+
+=head1 SYNOPSIS
+
+F<git> log --pretty=%b%x00 | F<git-grep-footer> [OPTIONS]
+
+=head1 DESCRIPTION
+
+Filters out header sections near the end of a commit body,
+a common convention to list custom metadata such as
+C<Signed-off-by> and C<Acked-by>.
+
+Sections are identified by at least one leading keyword containing a dash
+followed by a colon.
+
+=head1 OPTIONS
+
+=over
+
+=item -i, --ignore-case
+
+Lowercases everything.
+
+=item -s, --simplify[=<rule>]
+
+Modifies values to hide specific details.
+Several different rules are supported:
+
+=over
+
+=item I<var> (default)
+
+Replaces highly variable contents such as numbers, hashes, and addresses,
+leaving only exceptional annotations as distinct text.
+Attributes ending in I<-to> or I<-by> are assumed variable author names
+and omitted entirely,
+unless they contain a colon indicating possible attribute exceptions.
+
+=item I<email>
+
+Filters out author lines following the git signoff convention,
+i.e. an <email address> optionally preceded by a name.
+
+=item I<all>
+
+Values will be hidden entirely, so only attribute names remain.
+
+=back
+
+=item -u, --unique[=<threshold>]
+
+Each match is only shown once,
+optionally after it has already occurred a given amount of times.
+
+=item -n, --show[=<limit>]
+
+The original line is given for each match,
+but simplifications still apply for duplicate determination.
+Additional samples are optionally given upto the given maximum.
+
+=back
+
+=head1 AUTHOR
+
+Mischa POSLAWSKY <perl@shiar.org>
+
+=head1 LICENSE
+
+Copyright. All rights reserved.
+