- @list ($this->preface, $this->title, $this->body) =
- preg_split('{<h2>(.*?)</h2>\s*}s', $this->raw, 2, PREG_SPLIT_DELIM_CAPTURE);
+ function raw($page)
+ {
+ if (!file_exists($page)) {
+ return;
+ }
+ $this->raw = file_get_contents($page);
+
+ if (preg_match_all('{
+ \G <meta \s+ property="( [^"]+ )" \s+ content="( [^"]* )" > \s*
+ }x', $this->raw, $meta)) {
+ $matchlen = array_sum(array_map('strlen', $meta[0]));
+ $this->raw = substr($this->raw, $matchlen); # delete matched contents
+ $this->meta = array_combine($meta[1], $meta[2]); # [property => content]
+ }
+
+ // find significant contents
+ $this->body = preg_replace('{<aside\b.*?</aside>}s', '', $this->raw);
+ if (preg_match('{<h2>(.*?)</h2>\s*(.*)}s', $this->body, $titlematch)) {
+ list (, $this->title, $this->body) = $titlematch;