XXX: lookahead penalty
authorMischa POSLAWSKY <perl@shiar.org>
Sat, 14 Nov 2009 22:08:45 +0000 (23:08 +0100)
committerMischa POSLAWSKY <perl@shiar.org>
Sat, 14 Nov 2009 22:08:45 +0000 (23:08 +0100)
lib/List/Index.pm
t/10-ranges.t

index 339bd7d8710b6ede1e44fef6b207d28e0e26f766..4ce9c25b8dacb38693302421c1f2692220ed669d 100644 (file)
@@ -24,43 +24,52 @@ sub ranges {
 
        $pagesize = @$self / $pages;
        my $offset = $pagesize + .5;
-       my $penalty = 0;
+       my $shrunk = 0;
+       my $enlarged = 0;
        my @links = ('');
        while ($offset < @$self) {
                my $link = substr $self->[$offset], 0, $length;
                if ($context) {
-                       if ($offset > $context - 1 + $penalty) {
-                               # take a value slightly before the current offset
-                               my $before = $self->[$offset - $context - 1 + $penalty];
+                       my $penalty = 0;
+                       # take a value slightly before the current offset
+                       if ((my $before = $offset - $context + $shrunk) > 0) {
                                # see how much of it matches the current link
                                my $trim = 1;
-                               for my $match (split //, $before) {
+                               for my $match (split //, $self->[$before - 1]) {
                                        scalar $link =~ /\G\Q$match/g or last;
                                        $trim++;
                                }
                                # truncate link upto where the earlier value starts to differ
-                               substr($link, $trim) = '' unless $trim > length $link;
+                               if ($trim < length $link) {
+                                       substr($link, $trim) = '';
+                                       for (reverse $before .. $offset) {
+                                               $self->[$offset - $penalty] =~ /^\Q$link/ or last;
+                                               $penalty++;
+                                       }
+                               }
                        }
 
-                       $penalty = 0;
-                       if ($offset + $context < $#$self) {
-                               # take a value after the current offset
-                               my $after = $self->[$offset + $context];
+                       $shrunk = 0;
+                       # take a value after the current offset
+                       if ((my $after = $offset + $context - $enlarged) < $#$self) {
                                # see how much of it matches the current link
                                my $trim = 1;
-                               for my $match (split //, $after) {
+                               for my $match (split //, $self->[$after]) {
                                        scalar $link =~ /\G\Q$match/g or last;
                                        $trim++;
                                }
                                # use this link if it's shorter
                                if ($trim < length $link) {
-                                       $link = substr $after, 0, $trim;
+                                       $link = substr $self->[$after], 0, $trim;
+                                       # advance lookbehind offset on the next page
+                                       $penalty = 0;
                                        for ($offset .. $#$self) {
-                                               last if $self->[$offset + $penalty] =~ /^\Q$link/;
-                                               $penalty++;
+                                               last if $self->[$offset + $shrunk] =~ /^\Q$link/;
+                                               $shrunk++;
                                        }
                                }
                        }
+                       $enlarged = $penalty;
                }
 
                push @links, $link;
index cf3b0a43addfdcd94ccff68764967358e783f7ce..9cfce11ff29f10398d192d187afad08dfcd6deab 100644 (file)
@@ -37,7 +37,7 @@ subtest 'context' => sub {
        my $index = List::Index->new([qw(
                kkeg kl km kmlu knsy    koxb kpeo kuaa kuab kuac
                kuapa kuq kur kux kzb   lc lg lgu lgua lguc
-               lguq lgur lgus lgx lka  lkq lks lln llq llx
+               lguq lgur lgws lgx lka  lkq lks lln llq llx
        )]) or return;
        is_deeply(
                $index->ranges({ pagesize=>10, context=>0, length=>5 }),
@@ -109,13 +109,12 @@ subtest 'distribution' => sub {
                hnvtvpievbdlkrmb hs hvdvcqn hvn hyrybeur iaiaab ib ibavqyar idfniqvxpohbk idh
        )]) or return;
        is_deeply(
-               $index->ranges({ pagesize=>10, context=>6 }),
+               $index->ranges({ pagesize=>10, context=>8 }),
                [qw(-g h i-)],
                'large context'
        );
-{ local $TODO = '?';
        is_deeply(
-               $index->ranges({ pagesize=>10, context=>5 }),
+               $index->ranges({ pagesize=>10, context=>7 }),
                # after 2nd page is enlarged by lookbehind to 'h', limit subsequent lookahead
                # to prevent the page from getting too large (17 entries if forwarded to 'i')
                [qw(-g h-hm hn-)],
@@ -124,7 +123,6 @@ subtest 'distribution' => sub {
        # page #14 [gn-g] (8): gnihka gniub go gsearnrqns gtdvcxyt gwawkvmueovdjtfj gwoufolwcvmtueyg gysgphci
        # page #15 [h] (17): h habkdgifjfxoh hbbvjf hbqleexnqts hccgszftbaymfu hdaqzkow hdoeqwdmgqwaoya hfbegicieu hfmlpzzioqjbthz hj hkoysmws hmylu hnvtvpievbdlkrmb hsodfpkatk hvdvcqn hvn hyrybeurqtevjfmi
        # page #16 [i-ie] (5): i iaab ibiavqyar idfniqvxpohbk idh
-}
 };
 
 subtest 'context' => sub {