class ArchiveArticle
{
+ public $raw, $title, $body; # file contents
+ public $meta = []; # head metadata properties
+
function __construct($path)
{
- $this->page = $path;
- $this->link = preg_replace('{(?:/index)?\.html$}', '', $path);
+ $this->page = preg_replace('{^\.(?:/|$)}', '', $path);
+ $this->link = preg_replace('{(?:(?:/|^)index)?\.html$}', '', $this->page);
+ $this->raw($this->page);
}
- function __get($col)
+ function raw($page)
{
- return $this->$col = $this->$col(); # run method and cache
+ if (!file_exists($page)) {
+ return;
+ }
+ $this->raw = file_get_contents($page);
+
+ if (preg_match_all('{
+ \G <meta \s+ property="( [^"]+ )" \s+ content="( [^"]* )" > \s*
+ }x', $this->raw, $meta)) {
+ $matchlen = array_sum(array_map('strlen', $meta[0]));
+ $this->raw = substr($this->raw, $matchlen); # delete matched contents
+ $this->meta = array_combine($meta[1], $meta[2]); # [property => content]
+ }
+
+ // find significant contents
+ $this->body = preg_replace('{<aside\b.*?</aside>}s', '', $this->raw);
+ if (preg_match('{<h2>(.*?)</h2>\s*(.*)}s', $this->body, $titlematch)) {
+ list (, $this->title, $this->body) = $titlematch;
+ }
}
- function file()
+ function __get($col)
{
- if (!file_exists($this->page)) return;
- return fopen($this->page, 'r');
+ return $this->$col = $this->$col(); # run method and cache
}
- function title()
+ function handler()
{
- return preg_replace('{<h2>(.*)</h2>\s*}', '\1', fgets($this->file));
+ $path = $this->link;
+ $this->path = '';
+ $this->restricted = FALSE;
+ while (TRUE) {
+ if (file_exists("$path/.private")) {
+ $this->restricted = $path;
+ }
+
+ if (file_exists("$path/index.php")) {
+ return $path;
+ }
+
+ $up = strrpos($path, '/');
+ $this->path = substr($path, $up) . $this->path;
+ $path = substr($path, 0, $up);
+ if ($up === FALSE) {
+ break;
+ }
+ }
+ return;
}
function safetitle()
{
- return trim(strip_tags($this->title));
+ return trim($this->meta['og:title'] ?? strip_tags($this->title));
}
-
function name()
{
return $this->safetitle ?: $this->link;
{
return filemtime($this->page);
}
-
function lastiso()
{
return date(DATE_ATOM, $this->last);
array_shift($ymd);
return $ymd;
}
-
function dateiso()
{
return implode('-', $this->dateparts()) . 'T12:00:00+02:00';
}
-
function date()
{
return showdate($this->dateparts);
}
- function body()
+ function story()
{
- $this->title;
- $rest = fread($this->file, filesize($this->page));
if ( preg_match('{
- \n (?: < (?: p | figure [^>]* ) >\s* )+ (<img\ [^>]*>) | \n <hr\ />
- }x', $rest, $img, PREG_OFFSET_CAPTURE) ) {
+ (?: < (?: p | figure [^>]* ) >\s* )+ (<img\ [^>]*>) | \n <hr\ />
+ }x', $this->body, $img, PREG_OFFSET_CAPTURE) ) {
+ # strip part after matching divider (image)
if (isset($img[1])) {
$this->img = $img[1][0];
}
- return substr($rest, 0, $img[0][1]);
+ return substr($this->body, 0, $img[0][1]);
}
- return $rest;
+ return $this->body;
}
function teaser()
{
- if (preg_match('{<p>(.*?)</p>}s', $this->body, $bodyp)) {
+ if ($override = @$this->meta['og:description']) {
+ # prefer specific page description if found in metadata
+ return $override;
+ }
+
+ # paragraph contents following the page header if any
+ if (preg_match('{
+ \G (?> \s+ | <div [^>]*> | \[\[[^]]*\]\] )* <p> \s* (.*?) </p>
+ }sx', $this->body, $bodyp, 0)) {
return $bodyp[1];
}
}
function img()
{
$this->img = NULL;
- $this->body;
+ $this->story;
return $this->img;
}
-
function image()
{
+ if ($override = @$this->meta['og:image']) {
+ # prefer specific page image if found in metadata
+ return $override;
+ }
+
if ( preg_match('/\bsrc="([^"]*)"/', $this->img, $src) ) {
return $src[1];
}
}
-
function thumb($size = '300x')
{
if (!$this->image or $this->image[0] !== '/') return;
);
}
}
+
+class PageSearch
+{
+ function __construct($path = '.')
+ {
+ $this->iterator = new RecursiveCallbackFilterIterator(
+ new RecursiveDirectoryIterator($path),
+ function ($current) {
+ if ($current->getFilename()[0] === '.') {
+ # skip hidden files and directories
+ return FALSE;
+ }
+ if ($current->isLink()) {
+ # ignore symlinks, original contents only
+ return FALSE;
+ }
+ if ($current->isDir()) {
+ # traverse subdirectories unless untracked in any amount
+ return !file_exists("$current/.gitignore");
+ }
+ # match **/*.html
+ return preg_match('/(?<!\.inc)\.html$/', $current->getFilename());
+ }
+ );
+ }
+
+ function files()
+ {
+ # order alphabetically by link
+ $dir = iterator_to_array(new RecursiveIteratorIterator($this->iterator));
+ array_walk($dir, function (&$row, $name) {
+ # prepare values for sorting (directory index first)
+ $row = preg_replace('{/index\.html$}', '', $name);
+ });
+ asort($dir);
+ return $dir;
+ }
+}