class ArchiveArticle
{
- public $raw, $preface, $title, $body;
- public $meta = [];
+ public $raw, $title, $body; # file contents
+ public $meta = []; # head metadata properties
function __construct($path)
{
$this->page = preg_replace('{^\.(?:/|$)}', '', $path);
- $this->link = preg_replace('{(?:/index)?\.html$}', '', $this->page);
+ $this->link = preg_replace('{(?:(?:/|^)index)?\.html$}', '', $this->page);
$this->raw($this->page);
}
$this->meta = array_combine($meta[1], $meta[2]); # [property => content]
}
- @list ($this->preface, $this->title, $this->body) =
- preg_split('{<h2>(.*?)</h2>\s*}s', $this->raw, 2, PREG_SPLIT_DELIM_CAPTURE);
+ // find significant contents
+ $this->body = preg_replace('{<aside\b.*?</aside>}s', '', $this->raw);
+ if (preg_match('{<h2>(.*?)</h2>\s*(.*)}s', $this->body, $titlematch)) {
+ list (, $this->title, $this->body) = $titlematch;
+ }
}
function __get($col)
function story()
{
if ( preg_match('{
- \n (?: < (?: p | figure [^>]* ) >\s* )+ (<img\ [^>]*>) | \n <hr\ />
+ (?: < (?: p | figure [^>]* ) >\s* )+ (<img\ [^>]*>) | \n <hr\ />
}x', $this->body, $img, PREG_OFFSET_CAPTURE) ) {
# strip part after matching divider (image)
if (isset($img[1])) {
return $override;
}
+ # paragraph contents following the page header if any
if (preg_match('{
- </h2> (?: \s+ | <p\sclass="nav\b.*?</p> | <div[^>]*> )* <p> \s* (.*?) </p>
- }sx', $this->raw, $bodyp, PREG_OFFSET_CAPTURE)) {
- # fallback paragraph contents following the page header
- if ($bodyp[1][1] < 256) {
- return $bodyp[1][0];
- }
- }
-
- # starting paragraph for documents without title (assumed simple/partial)
- if (strpos($this->raw, '<h2') === FALSE and preg_match('{
- \A (?: <div [^>]*> \s* )* <p> \s* (.*?) </p>
- }sx', $this->raw, $bodyp)) {
+ \G (?> \s+ | <div [^>]*> | \[\[[^]]*\]\] )* <p> \s* (.*?) </p>
+ }sx', $this->body, $bodyp, 0)) {
return $bodyp[1];
}
}