class ArchiveArticle
{
- public $raw, $preface, $title, $body;
- public $meta = [];
+ public $raw, $title, $body; # file contents
+ public $meta = []; # head metadata properties
function __construct($path)
{
$this->meta = array_combine($meta[1], $meta[2]); # [property => content]
}
- @list ($this->preface, $this->title, $this->body) =
- preg_split('{<h2>(.*?)</h2>\s*}s', $this->raw, 2, PREG_SPLIT_DELIM_CAPTURE);
+ // find significant contents
+ $this->body = preg_replace('{<aside\b.*?</aside>}s', '', $this->raw);
+ if (preg_match('{<h2>(.*?)</h2>\s*(.*)}s', $this->body, $titlematch)) {
+ list (, $this->title, $this->body) = $titlematch;
+ }
}
function __get($col)
}
# paragraph contents following the page header if any
- $offset = strpos($this->raw, '</h2>');
- $offset = $offset ? $offset + 5 : 0;
if (preg_match('{
- \G (?> \s+ | <p\sclass="nav\b.*?</p> | <div [^>]*> | \[\[[^]]*\]\] )* <p> \s* (.*?) </p>
- }sx', $this->raw, $bodyp, 0, $offset)) {
+ \G (?> \s+ | <div [^>]*> | \[\[[^]]*\]\] )* <p> \s* (.*?) </p>
+ }sx', $this->body, $bodyp, 0)) {
return $bodyp[1];
}
}