convert non-utf8; avoid buffering; debug unmatched values
[git-grep-footer.git] / git-grep-footer
index 026f5bcc718e68b336007b19e2da5334201289b6..6952508cdb94a4ca84c97857fe60fbbc8c481b5c 100755 (executable)
@@ -1,11 +1,66 @@
-#!/bin/sh
-git log --pretty=%b%x00 "$@" |
-perl -n0 -wMstrict -E '
+#!/usr/bin/perl -0 -CO
+use 5.010;
+use strict;
+use warnings;
+use Encode 'decode';
+use Data::Dump 'pp';
+use Getopt::Long;
+
+GetOptions(\my %opt,
+       'debug!',
+) or die;
+
+local $| = 1;
+
+my $HEADERMATCH = qr/ [a-z]+ (?: (?:-\w+)+ | \ by ) /ix;
+
+while (readline) {
+       s/(.+)\n//m;
+       my $hash = $1;
+
+       # strip commit seperator
+       chomp;
+       # skip expensive checks without potential identifier
+       m/:/ or next;
+       # try to parse as UTF-8
+       eval { $_ = decode(utf8   => $_, Encode::FB_CROAK()) };
+       # if invalid, assume it's latin1
+              $_ = decode(cp1252 => $_) if $@;
+
+       my $prefix = 0;
+       my %attr;
+
+       BLOCK:
        for (reverse split /\n\n/) {
-               my @headers = grep m{
-                       ^ (?: [a-z]+ (?: (?:-\w+)+ | \ by ) ) : \s* \S
-               }imx, split /\n/ or next;
-               say for @headers;
-               last;
+               my @headers;
+
+               LINE:
+               for (split /\n/) {
+                       next if not /\S/;
+                       my @header = m{
+                               ^
+                               (?<key> $HEADERMATCH)
+                               : \s*
+                               (?<val> \S .+)
+                               $
+                       }imx or do {
+                               $prefix++;
+                               next LINE;
+                       };
+
+                       push @headers, \@header;
+               }
+
+               next BLOCK if not @headers;
+
+               if ($opt{debug} and $prefix) {
+                       say "infix junk in commit $hash";
+               }
+
+               for (@headers) {
+                       say join ': ', @$_;
+               }
+
+               last BLOCK;
        }
-'
+}