function __construct($path)
{
$this->page = preg_replace('{^\.(?:/|$)}', '', $path);
- $this->link = preg_replace('{(?:/index)?\.html$}', '', $this->page);
+ $this->link = preg_replace('{(?:(?:/|^)index)?\.html$}', '', $this->page);
$this->raw($this->page);
}
return $override;
}
+ # paragraph contents following the page header if any
+ $offset = strpos($this->raw, '</h2>');
+ $offset = $offset ? $offset + 5 : 0;
if (preg_match('{
- </h2> (?: \s+ | <p\sclass="nav\b.*?</p> | <div[^>]*> )* <p> \s* (.*?) </p>
- }sx', $this->raw, $bodyp, PREG_OFFSET_CAPTURE)) {
- # fallback paragraph contents following the page header
- if ($bodyp[1][1] < 512) {
- return $bodyp[1][0];
- }
- }
-
- # starting paragraph for documents without title (assumed simple/partial)
- if (strpos($this->raw, '<h2') === FALSE and preg_match('{
- \A (?: <div [^>]*> \s* )* <p> \s* (.*?) </p>
- }sx', $this->raw, $bodyp)) {
+ \G (?> \s+ | <aside\b.*?</aside> | <div [^>]*> | \[\[[^]]*\]\] )* <p> \s* (.*?) </p>
+ }sx', $this->raw, $bodyp, 0, $offset)) {
return $bodyp[1];
}
}