fix lookahead (reset \G, first match)
authorMischa POSLAWSKY <perl@shiar.org>
Sun, 15 Nov 2009 06:56:37 +0000 (07:56 +0100)
committerMischa POSLAWSKY <perl@shiar.org>
Sun, 15 Nov 2009 06:56:37 +0000 (07:56 +0100)
lib/List/Index.pm
t/10-ranges.t

index 6309d2e0d3413926b6063098444e17f471f796f6..f58e43e572be60f55bf05f972ea70ea97d5d7883 100644 (file)
@@ -60,18 +60,21 @@ sub ranges {
                        if ((my $after = $offset + $lookahead) < $#rows) {
                                # see how much of it matches the current link
                                my $trim = 1;
+                               pos $link = 0;
                                for my $match (split //, $rows[$after]) {
                                        scalar $link =~ /\G\Q$match/g or last;
                                        $trim++;
                                }
                                # use this link if it's shorter
                                if ($trim < length $link) {
-                                       $link = substr $rows[$after], 0, $trim;
-                                       # advance lookbehind offset on the next page
                                        $enlarged = 0;
                                        for ($offset + 1 .. $after) {
+                                               my $prefix = substr $rows[$_], 0, $trim;
+                                               # advance lookbehind offset on the next page
                                                $shrunk++;
-                                               last if $rows[$_] =~ /^\Q$link/;
+                                               next if $link =~ /^\Q$prefix/;
+                                               $link = $prefix;
+                                               last;
                                        }
                                }
                        }
index a3949453e05ef770b19bdadb884ff6a09feeb283..5c994c1946293bc36e1320c106b958a481b7d352 100644 (file)
@@ -40,8 +40,8 @@ subtest 'uniform alphanumeric' => sub {
        is_deeply(
                $index->ranges(\@data),
                [qw(
-                       -.
-                       .-bp bq-dm dn-fi fj-hf hg-i j-k l-m n-os ot-qp qq-sm sn-uj uk-wf wg-x y-
+                       -.. ..-...
+                       ...-bp bq-dm dn-fi fj-hf hg-i j-k l-m n-os ot-qp qq-sm sn-uj uk-wf wg-x y-
                )],
                'default ranges'
        );
@@ -57,7 +57,7 @@ subtest 'context' => sub {
        my @data = qw(
                kkeg kl km kmlu knsy    koxb kpeo kuaa kuab kuac
                kuapa kuq kur kux kzb   lc lg lgu lgua lguc
-               lguq lgur lgws lgx lka  lkq lks lln llq llx
+               lguq lgur lgus lgx lka  lkq lks lln llq llx
        );
        my $index = List::Index->new({ pagesize => 10 }) or return;
 
@@ -80,15 +80,12 @@ subtest 'context' => sub {
                [qw(-kup kuq-lgup lguq-)],
                'lookahead'
        );
-TODO: {
-       local $TODO = 'backtrack';
        is_deeply(
                $index->ranges(\@data, { context => 2 }),
                # allowed to advance to 'kur', but provides no benefits over 'kuq'
                [qw(-kup kuq-lgup lguq-)],
                'minimal lookahead'
        );
-}
        is_deeply(
                $index->ranges(\@data, { context => 3 }),
                # shorten 'kuap' to 'ku' because lookbehind is 'kp...'
@@ -151,23 +148,21 @@ subtest 'distribution' => sub {
 
 subtest 'modulo' => sub {
        plan tests => 2;
-       my @data = qw( a b ccb   ccd  cce gf gg   gh  i j );
+       my @data = qw( a b ccb   ccd  cce gf ggg   ggh  i j );
        my $index = List::Index->new({ pagesize => 4, context => 0 }) or return;
        # 10 entries at 4 per page requires 3 pages
        # so actual target page sizes should be 3,4,3 (not 4,4,2)
 
        is_deeply(
                $index->ranges(\@data),
-               [qw(-ccc ccd-gg gh-)],
+               [qw(-ccc ccd-ggg ggh-)],
                'uniform page sizes'
        );
-{ local $TODO = 'early lookbehind causing [c-gg]';
        is_deeply(
                $index->ranges(\@data, { context => 1 }),
-               [qw(-b c-h i-)],
+               [qw(-b c-gf gg-)],
                'context at new intervals'
        );
-}
 };
 
 subtest 'context' => sub {
@@ -182,13 +177,13 @@ subtest 'context' => sub {
        );
        is_deeply(
                $index->ranges(\@data, { context => undef }),
-               [qw(-a b c d e-)], #TODO
+               [qw(-baa. baa.-b c d e-)],
                'default context'  # context should be 1
        );
        is_deeply(
                $index->ranges(\@data, { context => 2 }),
                # first item equals second due to large context
-               [qw(-a b-c d e-)],
+               [qw(-ba bb-b c d e-)],
                'overlap'
        );
        is_deeply(