foto: fix colon separator after empty root title

[minimedit.git] / article.inc.php
diff --git a/article.inc.php b/article.inc.php

index c6f3be9680d34b5d331cbf1c52c0c2f342add3d1..93bbe3b79f46dfb11b6c631d053030a164590c5a 100644 (file)
--- a/article.inc.php
+++ b/article.inc.php
@@ -16,33 +16,71 @@ function showdate($parts)
  
  class ArchiveArticle
  {
+       public $raw, $title, $body; # file contents
+       public $meta = [];  # head metadata properties
+
         function __construct($path)
         {
-               $this->page = $path;
-               $this->link = preg_replace('{(?:/index)?\.html$}', '', $path);
+               $this->page = preg_replace('{^\.(?:/|$)}', '', $path);
+               $this->link = preg_replace('{(?:(?:/|^)index)?\.html$}', '', $this->page);
+               $this->raw($this->page);
         }
  
-       function __get($col)
+       function raw($page)
         {
-               return $this->$col = $this->$col();  # run method and cache
+               if (!file_exists($page)) {
+                       return;
+               }
+               $this->raw = file_get_contents($page);
+
+               if (preg_match_all('{
+                       \G <meta \s+ property="( [^"]+ )" \s+ content="( [^"]* )" > \s*
+               }x', $this->raw, $meta)) {
+                       $matchlen = array_sum(array_map('strlen', $meta[0]));
+                       $this->raw = substr($this->raw, $matchlen); # delete matched contents
+                       $this->meta = array_combine($meta[1], $meta[2]); # [property => content]
+               }
+
+               // find significant contents
+               $this->body = preg_replace('{<aside\b.*?</aside>}s', '', $this->raw);
+               if (preg_match('{<h2>(.*?)</h2>\s*(.*)}s', $this->body, $titlematch)) {
+                       list (, $this->title, $this->body) = $titlematch;
+               }
         }
  
-       function file()
+       function __get($col)
         {
-               if (!file_exists($this->page)) return;
-               return fopen($this->page, 'r');
+               return $this->$col = $this->$col();  # run method and cache
         }
  
-       function title()
+       function handler()
         {
-               return preg_replace('{<h2>(.*)</h2>\s*}', '\1', fgets($this->file));
+               $path = $this->link;
+               $this->path = '';
+               $this->restricted = FALSE;
+               while (TRUE) {
+                       if (file_exists("$path/.private")) {
+                               $this->restricted = $path;
+                       }
+
+                       if (file_exists("$path/index.php")) {
+                               return $path;
+                       }
+
+                       $up = strrpos($path, '/');
+                       $this->path = substr($path, $up) . $this->path;
+                       $path = substr($path, 0, $up);
+                       if ($up === FALSE) {
+                               break;
+                       }
+               }
+               return;
         }
  
         function safetitle()
         {
-               return trim(strip_tags($this->title));
+               return trim($this->meta['og:title'] ?? strip_tags($this->title));
         }
-
         function name()
         {
                 return $this->safetitle ?: $this->link;
@@ -52,7 +90,6 @@ class ArchiveArticle
         {
                 return filemtime($this->page);
         }
-
         function lastiso()
         {
                 return date(DATE_ATOM, $this->last);
@@ -64,35 +101,40 @@ class ArchiveArticle
                 array_shift($ymd);
                 return $ymd;
         }
-
         function dateiso()
         {
                 return implode('-', $this->dateparts()) . 'T12:00:00+02:00';
         }
-
         function date()
         {
                 return showdate($this->dateparts);
         }
  
-       function body()
+       function story()
         {
-               $this->title;
-               $rest = fread($this->file, filesize($this->page));
                 if ( preg_match('{
-                       \n (?: < (?: p | figure [^>]* ) >\s* )+ (<img\ [^>]*>) | \n <hr\ />
-               }x', $rest, $img, PREG_OFFSET_CAPTURE) ) {
+                       (?: < (?: p | figure [^>]* ) >\s* )+ (<img\ [^>]*>) | \n <hr\ />
+               }x', $this->body, $img, PREG_OFFSET_CAPTURE) ) {
+                       # strip part after matching divider (image)
                         if (isset($img[1])) {
                                 $this->img = $img[1][0];
                         }
-                       return substr($rest, 0, $img[0][1]);
+                       return substr($this->body, 0, $img[0][1]);
                 }
-               return $rest;
+               return $this->body;
         }
  
         function teaser()
         {
-               if (preg_match('{<p>(.*?)</p>}s', $this->body, $bodyp)) {
+               if ($override = @$this->meta['og:description']) {
+                       # prefer specific page description if found in metadata
+                       return $override;
+               }
+
+               # paragraph contents following the page header if any
+               if (preg_match('{
+                       \G (?> \s+ | <div [^>]*> | \[\[[^]]*\]\] )* <p> \s* (.*?) </p>
+               }sx', $this->body, $bodyp, 0)) {
                         return $bodyp[1];
                 }
         }
@@ -100,17 +142,20 @@ class ArchiveArticle
         function img()
         {
                 $this->img = NULL;
-               $this->body;
+               $this->story;
                 return $this->img;
         }
-
         function image()
         {
+               if ($override = @$this->meta['og:image']) {
+                       # prefer specific page image if found in metadata
+                       return $override;
+               }
+
                 if ( preg_match('/\bsrc="([^"]*)"/', $this->img, $src) ) {
                         return $src[1];
                 }
         }
-
         function thumb($size = '300x')
         {
                 if (!$this->image or $this->image[0] !== '/') return;
@@ -120,3 +165,41 @@ class ArchiveArticle
                 );
         }
  }
+
+class PageSearch
+{
+       function __construct($path = '.')
+       {
+               $this->iterator = new RecursiveCallbackFilterIterator(
+                       new RecursiveDirectoryIterator($path),
+                       function ($current) {
+                               if ($current->getFilename()[0] === '.') {
+                                       # skip hidden files and directories
+                                       return FALSE;
+                               }
+                               if ($current->isLink()) {
+                                       # ignore symlinks, original contents only
+                                       return FALSE;
+                               }
+                               if ($current->isDir()) {
+                                       # traverse subdirectories unless untracked in any amount
+                                       return !file_exists("$current/.gitignore");
+                               }
+                               # match **/*.html
+                               return preg_match('/(?<!\.inc)\.html$/', $current->getFilename());
+                       }
+               );
+       }
+
+       function files()
+       {
+               # order alphabetically by link
+               $dir = iterator_to_array(new RecursiveIteratorIterator($this->iterator));
+               array_walk($dir, function (&$row, $name) {
+                       # prepare values for sorting (directory index first)
+                       $row = preg_replace('{/index\.html$}', '', $name);
+               });
+               asort($dir);
+               return $dir;
+       }
+}