widget/sitemap: order pages hierarchically and alphabetically
[minimedit.git] / article.inc.php
index bc8dbff21be1fc99969daebb3bc0bbfa2ea5d2c4..a6a4ef800e9504f7440a5d34b42d774606c2fe96 100644 (file)
@@ -16,10 +16,17 @@ function showdate($parts)
 
 class ArchiveArticle
 {
+       public $raw, $preface, $title, $body;
+
        function __construct($path)
        {
                $this->page = $path;
                $this->link = preg_replace('{(?:/index)?\.html$}', '', $path);
+               if (file_exists($this->page)) {
+                       $this->raw = file_get_contents($this->page);
+                       @list ($this->preface, $this->title, $this->body) =
+                               preg_split('{<h2>(.*?)</h2>\s*}', $this->raw, 2, PREG_SPLIT_DELIM_CAPTURE);
+               }
        }
 
        function __get($col)
@@ -27,20 +34,6 @@ class ArchiveArticle
                return $this->$col = $this->$col();  # run method and cache
        }
 
-       function file()
-       {
-               if (!file_exists($this->page)) return;
-               return fopen($this->page, 'r');
-       }
-
-       function rawtitle()
-       {
-               return fgets($this->file);
-       }
-       function title()
-       {
-               return preg_replace('{<h2>(.*)</h2>\s*}', '\1', $this->rawtitle);
-       }
        function safetitle()
        {
                return trim(strip_tags($this->title));
@@ -74,12 +67,6 @@ class ArchiveArticle
                return showdate($this->dateparts);
        }
 
-       function body()
-       {
-               if (!$this->file) return;
-               $this->rawtitle;
-               return fread($this->file, filesize($this->page) ?: 1);
-       }
        function story()
        {
                if ( preg_match('{
@@ -93,14 +80,29 @@ class ArchiveArticle
                }
                return $this->body;
        }
-
-       function raw()
-       {
-               return $this->rawtitle . $this->body;
-       }
        function teaser()
        {
-               if (preg_match('{<p>(.*?)</p>}s', $this->story, $bodyp)) {
+               if (preg_match('{
+                       <meta \s+ name="description" [^>]* content="([^">]*)"
+               }x', $this->preface, $meta)) {
+                       # prefer specific page description if found (assume before title)
+                       #TODO: strip from body contents
+                       return $meta[1];
+               }
+
+               if (preg_match('{
+                       </h2> (?: \s+ | <p\sclass="nav\b.*?</p> | <div[^>]*> )* <p> \s* (.*?) </p>
+               }sx', $this->raw, $bodyp, PREG_OFFSET_CAPTURE)) {
+                       # fallback paragraph contents following the page header
+                       if ($bodyp[1][1] < 256) {
+                               return $bodyp[1][0];
+                       }
+               }
+
+               # starting paragraph for documents without title (assumed simple/partial)
+               if (strpos($this->raw, '<h2') === FALSE and preg_match('{
+                       \A <p> \s* (.*?) </p>
+               }sx', $this->raw, $bodyp)) {
                        return $bodyp[1];
                }
        }