-#!/bin/sh
-git log --pretty=%b%x00 "$@" |
-perl -n0 -wMstrict -E '
+#!/usr/bin/perl -0 -CO
+use 5.010;
+use strict;
+use warnings;
+use Encode 'decode';
+use Data::Dump 'pp';
+use Getopt::Long;
+
+GetOptions(\my %opt,
+ 'debug!',
+ 'simplify|s:s',
+ 'unique|u!',
+ 'ignore-case|i!',
+) or die;
+
+local $| = 1;
+
+my $HEADERMATCH = qr/ [a-z]+ (?: (?:-\w+)+ | \ by ) /ix;
+
+while (readline) {
+ s/(.+)\n//m;
+ my $hash = $1;
+
+ # strip commit seperator
+ chomp;
+ # skip expensive checks without potential identifier
+ m/:/ or next;
+ # try to parse as UTF-8
+ eval { $_ = decode(utf8 => $_, Encode::FB_CROAK()) };
+ # if invalid, assume it's latin1
+ $_ = decode(cp1252 => $_) if $@;
+
+ my $prefix = 0;
+ my %attr;
+
+ BLOCK:
for (reverse split /\n\n/) {
- my @headers = grep m{
- ^ (?: [a-z]+ (?: (?:-\w+)+ | \ by ) ) : \s* \S
- }imx, split /\n/ or next;
- say for @headers;
- last;
+ my @headers;
+
+ LINE:
+ for (split /\n/) {
+ next if not /\S/;
+ my @header = m{
+ ^
+ (?<key> $HEADERMATCH)
+ : \s*
+ (?<val> \S .+)
+ $
+ }imx or do {
+ $prefix++;
+ next LINE;
+ };
+
+ given ($opt{simplify} // 'no') {
+ when ('strict') {
+ $header[1] =~ s{
+ \A
+ (?: [^:]+ )?
+ < [^@>]+ (?: @ | \h?\W? at \W?\h? ) [a-z0-9.-]+ >
+ \Z
+ }{<...>}imsx;
+ }
+ when (['text', '']) {
+ when ($header[0] =~ /[ _-] (?: by | to ) $/imsx) {
+ pop @header;
+ }
+ for ($header[1]) {
+ s{\b (https?)://\S+ }{[$1]}gmsx; # url
+ s{(?: < | \A ) [^@>\s]+ @ [^>]+ (?: > | \Z )}{<...>}igmsx; # address
+ s{\b [0-9]+ \b}{[num]}gmsx; # number
+ s{\b I? [0-9a-f]{40} \b}{[sha1]}gmsx; # hash
+ }
+ }
+ when (['all', 'any']) {
+ pop @header;
+ }
+ when ('no') {
+ }
+ default {
+ die "Unknown simplify option: '$_'\n";
+ }
+ }
+
+ if ($opt{'ignore-case'}) {
+ $_ = lc for @header;
+ }
+
+ push @headers, \@header;
+ }
+
+ next BLOCK if not @headers;
+
+ if ($opt{debug} and $prefix) {
+ say "infix junk in commit $hash";
+ }
+
+ for (@headers) {
+ if ($opt{unique}) {
+ state $seen;
+ next if $seen->{ $_->[0] }->{ $_->[1] // '' }++;
+ }
+ say join ': ', @$_;
+ }
+
+ last BLOCK;
}
-'
+}