#!/usr/bin/perl -0 -CO use 5.010; use strict; use warnings; use Encode 'decode'; use Data::Dump 'pp'; use Getopt::Long; GetOptions(\my %opt, 'debug!', ) or die; local $| = 1; my $HEADERMATCH = qr/ [a-z]+ (?: (?:-\w+)+ | \ by ) /ix; while (readline) { s/(.+)\n//m; my $hash = $1; # strip commit seperator chomp; # skip expensive checks without potential identifier m/:/ or next; # try to parse as UTF-8 eval { $_ = decode(utf8 => $_, Encode::FB_CROAK()) }; # if invalid, assume it's latin1 $_ = decode(cp1252 => $_) if $@; my $prefix = 0; my %attr; BLOCK: for (reverse split /\n\n/) { my @headers; LINE: for (split /\n/) { next if not /\S/; my @header = m{ ^ (? $HEADERMATCH) : \s* (? \S .+) $ }imx or do { $prefix++; next LINE; }; push @headers, \@header; } next BLOCK if not @headers; if ($opt{debug} and $prefix) { say "infix junk in commit $hash"; } for (@headers) { say join ': ', @$_; } last BLOCK; } }