X-Git-Url: http://git.shiar.net/minimedit.git/blobdiff_plain/022d44c8ddf33f855e6ed33cae6ad9ae103e074d..251616969893b447852aad81a4e68be6853e202e:/article.inc.php
diff --git a/article.inc.php b/article.inc.php
index 5de1492..3c65a97 100644
--- a/article.inc.php
+++ b/article.inc.php
@@ -16,10 +16,27 @@ function showdate($parts)
class ArchiveArticle
{
+ public $raw, $preface, $title, $body;
+ public $meta = [];
+
function __construct($path)
{
$this->page = $path;
$this->link = preg_replace('{(?:/index)?\.html$}', '', $path);
+ if (file_exists($this->page)) {
+ $this->raw = file_get_contents($this->page);
+
+ if (preg_match_all('{
+ \G \s*
+ }x', $this->raw, $meta)) {
+ $matchlen = array_sum(array_map('strlen', $meta[0]));
+ $this->raw = substr($this->raw, $matchlen); # delete matched contents
+ $this->meta = array_combine($meta[1], $meta[2]); # [property => content]
+ }
+
+ @list ($this->preface, $this->title, $this->body) =
+ preg_split('{
(.*?)
\s*}', $this->raw, 2, PREG_SPLIT_DELIM_CAPTURE);
+ }
}
function __get($col)
@@ -27,22 +44,10 @@ class ArchiveArticle
return $this->$col = $this->$col(); # run method and cache
}
- function file()
- {
- if (!file_exists($this->page)) return;
- return fopen($this->page, 'r');
- }
-
- function title()
- {
- return preg_replace('{(.*)
\s*}', '\1', fgets($this->file));
- }
-
function safetitle()
{
- return trim(strip_tags($this->title));
+ return trim($this->meta['og:title'] ?? strip_tags($this->title));
}
-
function name()
{
return $this->safetitle ?: $this->link;
@@ -52,7 +57,6 @@ class ArchiveArticle
{
return filemtime($this->page);
}
-
function lastiso()
{
return date(DATE_ATOM, $this->last);
@@ -64,12 +68,10 @@ class ArchiveArticle
array_shift($ymd);
return $ymd;
}
-
function dateiso()
{
return implode('-', $this->dateparts()) . 'T12:00:00+02:00';
}
-
function date()
{
return showdate($this->dateparts);
@@ -77,23 +79,38 @@ class ArchiveArticle
function story()
{
- $this->title;
- $rest = fread($this->file, filesize($this->page));
if ( preg_match('{
\n (?: < (?: p | figure [^>]* ) >\s* )+ (]*>) | \n
- }x', $rest, $img, PREG_OFFSET_CAPTURE) ) {
+ }x', $this->body, $img, PREG_OFFSET_CAPTURE) ) {
# strip part after matching divider (image)
if (isset($img[1])) {
$this->img = $img[1][0];
}
- return substr($rest, 0, $img[0][1]);
+ return substr($this->body, 0, $img[0][1]);
}
- return $rest;
+ return $this->body;
}
function teaser()
{
- if (preg_match('{(.*?)
}s', $this->story, $bodyp)) {
+ if ($override = @$this->meta['og:description']) {
+ # prefer specific page description if found in metadata
+ return $override;
+ }
+
+ if (preg_match('{
+ (?: \s+ | |
]*> )*
\s* (.*?)
+ }sx', $this->raw, $bodyp, PREG_OFFSET_CAPTURE)) {
+ # fallback paragraph contents following the page header
+ if ($bodyp[1][1] < 256) {
+ return $bodyp[1][0];
+ }
+ }
+
+ # starting paragraph for documents without title (assumed simple/partial)
+ if (strpos($this->raw, '
\s* (.*?)
+ }sx', $this->raw, $bodyp)) {
return $bodyp[1];
}
}
@@ -104,14 +121,17 @@ class ArchiveArticle
$this->story;
return $this->img;
}
-
function image()
{
+ if ($override = @$this->meta['og:image']) {
+ # prefer specific page image if found in metadata
+ return $override;
+ }
+
if ( preg_match('/\bsrc="([^"]*)"/', $this->img, $src) ) {
return $src[1];
}
}
-
function thumb($size = '300x')
{
if (!$this->image or $this->image[0] !== '/') return;