page: strip metadata tags from article preface
[minimedit.git] / article.inc.php
index a6a4ef800e9504f7440a5d34b42d774606c2fe96..3c65a97d871b7fb8245023785a40f74646dc73ad 100644 (file)
@@ -17,6 +17,7 @@ function showdate($parts)
 class ArchiveArticle
 {
        public $raw, $preface, $title, $body;
+       public $meta = [];
 
        function __construct($path)
        {
@@ -24,6 +25,15 @@ class ArchiveArticle
                $this->link = preg_replace('{(?:/index)?\.html$}', '', $path);
                if (file_exists($this->page)) {
                        $this->raw = file_get_contents($this->page);
+
+                       if (preg_match_all('{
+                               \G <meta \s+ property="( [^"]+ )" \s+ content="( [^"]* )" > \s*
+                       }x', $this->raw, $meta)) {
+                               $matchlen = array_sum(array_map('strlen', $meta[0]));
+                               $this->raw = substr($this->raw, $matchlen); # delete matched contents
+                               $this->meta = array_combine($meta[1], $meta[2]); # [property => content]
+                       }
+
                        @list ($this->preface, $this->title, $this->body) =
                                preg_split('{<h2>(.*?)</h2>\s*}', $this->raw, 2, PREG_SPLIT_DELIM_CAPTURE);
                }
@@ -36,7 +46,7 @@ class ArchiveArticle
 
        function safetitle()
        {
-               return trim(strip_tags($this->title));
+               return trim($this->meta['og:title'] ?? strip_tags($this->title));
        }
        function name()
        {
@@ -80,14 +90,12 @@ class ArchiveArticle
                }
                return $this->body;
        }
+
        function teaser()
        {
-               if (preg_match('{
-                       <meta \s+ name="description" [^>]* content="([^">]*)"
-               }x', $this->preface, $meta)) {
-                       # prefer specific page description if found (assume before title)
-                       #TODO: strip from body contents
-                       return $meta[1];
+               if ($override = @$this->meta['og:description']) {
+                       # prefer specific page description if found in metadata
+                       return $override;
                }
 
                if (preg_match('{
@@ -115,6 +123,11 @@ class ArchiveArticle
        }
        function image()
        {
+               if ($override = @$this->meta['og:image']) {
+                       # prefer specific page image if found in metadata
+                       return $override;
+               }
+
                if ( preg_match('/\bsrc="([^"]*)"/', $this->img, $src) ) {
                        return $src[1];
                }