From c0314d02f10951833a0879a0755010a998c4b76c Mon Sep 17 00:00:00 2001 From: Mischa POSLAWSKY Date: Thu, 3 Feb 2011 19:56:28 +0100 Subject: [PATCH] convert non-utf8; avoid buffering; debug unmatched values --- git-grep-footer | 34 ++++++++++++++++++++++++++++++++-- 1 file changed, 32 insertions(+), 2 deletions(-) diff --git a/git-grep-footer b/git-grep-footer index 1e60d4e..6952508 100755 --- a/git-grep-footer +++ b/git-grep-footer @@ -1,12 +1,35 @@ -#!/usr/bin/perl -0 -CS +#!/usr/bin/perl -0 -CO use 5.010; use strict; use warnings; +use Encode 'decode'; use Data::Dump 'pp'; +use Getopt::Long; + +GetOptions(\my %opt, + 'debug!', +) or die; + +local $| = 1; my $HEADERMATCH = qr/ [a-z]+ (?: (?:-\w+)+ | \ by ) /ix; while (readline) { + s/(.+)\n//m; + my $hash = $1; + + # strip commit seperator + chomp; + # skip expensive checks without potential identifier + m/:/ or next; + # try to parse as UTF-8 + eval { $_ = decode(utf8 => $_, Encode::FB_CROAK()) }; + # if invalid, assume it's latin1 + $_ = decode(cp1252 => $_) if $@; + + my $prefix = 0; + my %attr; + BLOCK: for (reverse split /\n\n/) { my @headers; @@ -20,13 +43,20 @@ while (readline) { : \s* (? \S .+) $ - }imx or next LINE; + }imx or do { + $prefix++; + next LINE; + }; push @headers, \@header; } next BLOCK if not @headers; + if ($opt{debug} and $prefix) { + say "infix junk in commit $hash"; + } + for (@headers) { say join ': ', @$_; } -- 2.30.0