page: ignore asides in article body (cq title, teaser)
[minimedit.git] / article.inc.php
index 98cd475b22194095fca6924c9ef4570b6eaed041..6c3ca935be15bf0318a377564ba8d136b4f8239b 100644 (file)
@@ -16,7 +16,7 @@ function showdate($parts)
 
 class ArchiveArticle
 {
-       public $raw, $preface, $title, $body;
+       public $raw, $title, $body; # file contents
        public $meta = [];
 
        function __construct($path)
@@ -41,8 +41,11 @@ class ArchiveArticle
                        $this->meta = array_combine($meta[1], $meta[2]); # [property => content]
                }
 
-               @list ($this->preface, $this->title, $this->body) =
-                       preg_split('{<h2>(.*?)</h2>\s*}s', $this->raw, 2, PREG_SPLIT_DELIM_CAPTURE);
+               // find significant contents
+               $this->body = preg_replace('{<aside\b.*?</aside>}s', '', $this->raw);
+               if (preg_match('{<h2>(.*?)</h2>\s*(.*)}s', $this->body, $titlematch)) {
+                       list (, $this->title, $this->body) = $titlematch;
+               }
        }
 
        function __get($col)
@@ -129,11 +132,9 @@ class ArchiveArticle
                }
 
                # paragraph contents following the page header if any
-               $offset = strpos($this->raw, '</h2>');
-               $offset = $offset ? $offset + 5 : 0;
                if (preg_match('{
-                       \G (?> \s+ | <aside\b.*?</aside> | <div [^>]*> | \[\[[^]]*\]\] )* <p> \s* (.*?) </p>
-               }sx', $this->raw, $bodyp, 0, $offset)) {
+                       \G (?> \s+ | <div [^>]*> | \[\[[^]]*\]\] )* <p> \s* (.*?) </p>
+               }sx', $this->body, $bodyp, 0)) {
                        return $bodyp[1];
                }
        }