X-Git-Url: http://git.shiar.net/git-grep-footer.git/blobdiff_plain/8c39968d651166e9d325edaf2fb48c54c3bd8b39..d1939a281787a94f5694cdb119baabdf2889ac3f:/git-grep-footer diff --git a/git-grep-footer b/git-grep-footer index 026f5bc..c5d9440 100755 --- a/git-grep-footer +++ b/git-grep-footer @@ -1,11 +1,238 @@ -#!/bin/sh -git log --pretty=%b%x00 "$@" | -perl -n0 -wMstrict -E ' +#!/usr/bin/perl +use 5.010; +use strict; +use warnings; +use open ':std', OUT => ':utf8'; +use Encode 'decode'; +use Data::Dump 'pp'; +use Getopt::Long qw(:config bundling); + +GetOptions(\my %opt, + 'debug!', + 'count|c!', + 'simplify|s:s', + 'ignore-case|i!', + 'fuzzy!', + 'grep|S=s', + 'min|min-count|unique|u:i', + 'max|max-count|show|n:i', + 'version|V' => sub { Getopt::Long::VersionMessage() }, + 'usage|h' => sub { Getopt::Long::HelpMessage() }, + 'help|man|?' => sub { Getopt::Long::HelpMessage(-verbose => 2) }, +) or exit 129; + +local $| = 1; +local $/ = "\0"; + +my $HEADERMATCH = qr/ [a-z]+ (?: (?:-\w+)+ | \ by ) | cc | reference /ix; + +my (%headercount, @headercache); + +while (readline) { + s/^([0-9a-f]{4,40})\n//m and + my $hash = $1; + + # strip commit seperator + chomp; + # skip expensive checks without potential identifier + m/:/ or next; + # try to parse as UTF-8 + eval { $_ = decode(utf8 => $_, Encode::FB_CROAK()) }; + # if invalid, assume it's latin1 + $_ = decode(cp1252 => $_) if $@; + + my %attr; + + BLOCK: for (reverse split /\n\n/) { - my @headers = grep m{ - ^ (?: [a-z]+ (?: (?:-\w+)+ | \ by ) ) : \s* \S - }imx, split /\n/ or next; - say for @headers; - last; + my @headers; + my $prefix = 0; + + LINE: + for (split /\n/) { + next if not /\S/; + my @header = m{ + ^ + (? $HEADERMATCH) + : \s* + (? \S .+) + $ + }imx or do { + $prefix++; + next LINE; + }; + + push @header, $_ if defined $opt{max}; + + if ($opt{fuzzy}) { + for ($header[0]) { + tr/ _/-/; + + state $BY = qr{ (?: -? b[yu] )? \Z }ix; + s{^ si (?:ge?n|n?g) (?:e?[dt])? -? (?:of+)? $BY}{Signed-off-by}ix; + s{^ ack (?:ed|de)? $BY}{Acked-by}ix; + s{^ review (?:e?d)? $BY}{Reviewed-by}ix; + s{^ teste[dt] $BY}{Tested-by}ix; + } + } + + if (defined $opt{grep}) { + $_ ~~ qr/$opt{grep}/i or next LINE; + } + + given ($opt{simplify} // 'none') { + when (['email', 'authors']) { + $header[1] =~ s{ + \A + (?: [^:;]+ )? + < [^@>]+ (?: @ | \h?\W? at \W?\h? ) [a-z0-9.-]+ > + \Z + }{<...>}imsx; + } + when (['var', 'vars', '']) { + when ($header[0] =~ /[ _-] (?: by | to ) $ | ^cc$/imsx) { + $header[1] = undef; + } + for ($header[1]) { + s{\b (https?)://\S+ }{[$1]}gmsx; # url + s{(?: < | \A ) [^@>\s]+ @ [^>]+ (?: > | \Z )}{<...>}igmsx; # address + s{\b [0-9]+ \b}{[num]}gmsx; # number + s{\b [Ig]? [0-9a-f]{ 40} \b}{[sha1]}gmsx; # hash + s{\b [Ig]? [0-9a-f]{6,40} \b}{[hash]}gmsx; # abbrev + } + } + when (['all', 'contents']) { + $header[1] = undef; + } + when (['none', 'no', '0']) { + } + default { + die "Unknown simplify option: '$_'\n"; + } + } + + if ($opt{'ignore-case'}) { + $_ = lc for $header[0], $header[1] // (); + } + + pop @header if not defined $header[-1]; + + push @headers, \@header; + } + + next BLOCK if not @headers; + + if ($opt{debug} and $prefix) { + say sprintf ': invalid lines in %s (%s)', $hash // 'block', $prefix; + } + + for (@headers) { + my $line = $_->[2] // join(': ', @$_); + $line =~ s/^/$hash / if defined $hash; + + if (defined $opt{min} or $opt{max} or $opt{count}) { + my $counter = \$headercount{ $_->[0] }->{ $_->[1] // '' }; + my $excess = $$counter++ - ($opt{min} // 0); + next if $excess >= ($opt{max} || 1); + next if $excess < 0; + if ($opt{count}) { + push @headercache, [ $line, $excess ? \undef : $counter ]; + next; + } + } + say $line; + } + + last BLOCK; } -' +} + +for (@headercache) { + say ${$_->[1]} // '', "\t", $_->[0]; +} + +__END__ + +=head1 NAME + +git-grep-footer - Find custom header lines in commit messages + +=head1 SYNOPSIS + +F log --pretty=%b%x00 | F [OPTIONS] + +=head1 DESCRIPTION + +Filters out header sections near the end of a commit body, +a common convention to list custom metadata such as +C and C. + +Sections are identified by at least one leading keyword containing a dash +(or exceptionally recognised) +followed by a colon. + +=head1 OPTIONS + +=over + +=item -i, --ignore-case + +Lowercases everything. + +=item -s, --simplify[=] + +Modifies values to hide specific details. +Several different rules are supported: + +=over + +=item I (default) + +Replaces highly variable contents such as numbers, hashes, and addresses, +leaving only exceptional annotations as distinct text. +Attributes ending in I<-to> or I<-by> are assumed variable author names +and omitted entirely, +unless they contain a colon indicating possible attribute exceptions. + +=item I + +Filters out author lines following the git signoff convention, +i.e. an optionally preceded by a name. + +=item I + +Values will be hidden entirely, so only attribute names remain. + +=back + +=item --grep= + +Only include lines matching the specified regular expression. +Case insensitivity can be disabled by prepending C<(?-i)>. + +=item -u, --unique[=] + +Each match is only shown once, +optionally after it has already occurred a given amount of times. + +=item -n, --show[=] + +The original line is given for each match, +but simplifications still apply for duplicate determination. +Additional samples are optionally given upto the given maximum. + +=item -c, --count + +Prefixes (unique) lines by the number of occurrences. +Causes output to be buffered until all input has been read (obviously). + +=back + +=head1 AUTHOR + +Mischa POSLAWSKY + +=head1 LICENSE + +Copyright. All rights reserved. +