Search html body top for all <meta /> html intended as page data, available from
object $meta, but not shown directly as contents. Matches for description
were returned already as teaser, but not removed. Similar overrides are now
considered for title and image methods as well.
class ArchiveArticle
{
public $raw, $preface, $title, $body;
class ArchiveArticle
{
public $raw, $preface, $title, $body;
function __construct($path)
{
function __construct($path)
{
$this->link = preg_replace('{(?:/index)?\.html$}', '', $path);
if (file_exists($this->page)) {
$this->raw = file_get_contents($this->page);
$this->link = preg_replace('{(?:/index)?\.html$}', '', $path);
if (file_exists($this->page)) {
$this->raw = file_get_contents($this->page);
+
+ if (preg_match_all('{
+ \G <meta \s+ property="( [^"]+ )" \s+ content="( [^"]* )" > \s*
+ }x', $this->raw, $meta)) {
+ $matchlen = array_sum(array_map('strlen', $meta[0]));
+ $this->raw = substr($this->raw, $matchlen); # delete matched contents
+ $this->meta = array_combine($meta[1], $meta[2]); # [property => content]
+ }
+
@list ($this->preface, $this->title, $this->body) =
preg_split('{<h2>(.*?)</h2>\s*}', $this->raw, 2, PREG_SPLIT_DELIM_CAPTURE);
}
@list ($this->preface, $this->title, $this->body) =
preg_split('{<h2>(.*?)</h2>\s*}', $this->raw, 2, PREG_SPLIT_DELIM_CAPTURE);
}
- return trim(strip_tags($this->title));
+ return trim($this->meta['og:title'] ?? strip_tags($this->title));
- if (preg_match('{
- <meta \s+ name="description" [^>]* content="([^">]*)"
- }x', $this->preface, $meta)) {
- # prefer specific page description if found (assume before title)
- #TODO: strip from body contents
- return $meta[1];
+ if ($override = @$this->meta['og:description']) {
+ # prefer specific page description if found in metadata
+ return $override;
+ if ($override = @$this->meta['og:image']) {
+ # prefer specific page image if found in metadata
+ return $override;
+ }
+
if ( preg_match('/\bsrc="([^"]*)"/', $this->img, $src) ) {
return $src[1];
}
if ( preg_match('/\bsrc="([^"]*)"/', $this->img, $src) ) {
return $src[1];
}