From d51bf78a63e3f313f3e5d2de46ffe6ecbf2e93c4 Mon Sep 17 00:00:00 2001 From: Mischa POSLAWSKY Date: Fri, 12 Jul 2019 04:00:57 +0200 Subject: [PATCH] page: strip metadata tags from article preface Search html body top for all html intended as page data, available from object $meta, but not shown directly as contents. Matches for description were returned already as teaser, but not removed. Similar overrides are now considered for title and image methods as well. --- article.inc.php | 27 ++++++++++++++++++++------- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/article.inc.php b/article.inc.php index a6a4ef8..3c65a97 100644 --- a/article.inc.php +++ b/article.inc.php @@ -17,6 +17,7 @@ function showdate($parts) class ArchiveArticle { public $raw, $preface, $title, $body; + public $meta = []; function __construct($path) { @@ -24,6 +25,15 @@ class ArchiveArticle $this->link = preg_replace('{(?:/index)?\.html$}', '', $path); if (file_exists($this->page)) { $this->raw = file_get_contents($this->page); + + if (preg_match_all('{ + \G \s* + }x', $this->raw, $meta)) { + $matchlen = array_sum(array_map('strlen', $meta[0])); + $this->raw = substr($this->raw, $matchlen); # delete matched contents + $this->meta = array_combine($meta[1], $meta[2]); # [property => content] + } + @list ($this->preface, $this->title, $this->body) = preg_split('{

(.*?)

\s*}', $this->raw, 2, PREG_SPLIT_DELIM_CAPTURE); } @@ -36,7 +46,7 @@ class ArchiveArticle function safetitle() { - return trim(strip_tags($this->title)); + return trim($this->meta['og:title'] ?? strip_tags($this->title)); } function name() { @@ -80,14 +90,12 @@ class ArchiveArticle } return $this->body; } + function teaser() { - if (preg_match('{ - ]* content="([^">]*)" - }x', $this->preface, $meta)) { - # prefer specific page description if found (assume before title) - #TODO: strip from body contents - return $meta[1]; + if ($override = @$this->meta['og:description']) { + # prefer specific page description if found in metadata + return $override; } if (preg_match('{ @@ -115,6 +123,11 @@ class ArchiveArticle } function image() { + if ($override = @$this->meta['og:image']) { + # prefer specific page image if found in metadata + return $override; + } + if ( preg_match('/\bsrc="([^"]*)"/', $this->img, $src) ) { return $src[1]; } -- 2.30.0