From 9cb1b773b16baa0a13c8c27b36b30e8509fd248c Mon Sep 17 00:00:00 2001 From: Mischa POSLAWSKY Date: Tue, 28 Dec 2021 16:24:38 +0100 Subject: [PATCH] charset: legacy encodings petscii, atascii, msx, zx-spectrum Sources now represented by unicode 13.0 legacy computing symbols. Copied from Wikipedia. --- charset-encoding.inc.pl | 45 +++++++++++++++++++++++++++++++++++++++++ charset.plp | 4 +++- 2 files changed, 48 insertions(+), 1 deletion(-) diff --git a/charset-encoding.inc.pl b/charset-encoding.inc.pl index 1414f39..2da7614 100644 --- a/charset-encoding.inc.pl +++ b/charset-encoding.inc.pl @@ -218,6 +218,51 @@ use utf8; 'cp1026' => {inherit => ['cp37' => '40']}, 'cp875' => {inherit => ['cp37' => '30']}, + legacy => [qw( cp437 ATASCII PETSCII MSX ZX-Spectrum )], + 'petscii' => {inherit => ['' => '40-7F+A0-BF'], setup => sub { + $_[0]->{table} = [(map {chr} 0 .. 0x3F), qw( + @ a b c d e f g h i j k l m n o p q r s t u v w x y z [ £ ] ↑ ← + 🭹 A B C D E F G H I J K L M N O P Q R S T U V W X Y Z ┼ 🮌 │ 🮖 🮘 + . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . +   ▌ ▄ ▔ ▁ ▏ ▒ ▕ 🮏 🮙 🮇 ├ ▗ └ ┐ ▂ ┌ ┴ ┬ ┤ ▎ ▍ 🮈 🮂 🮃 ▃ ✓ ▖ ▝ ┘ ▘ ▚ + )]; + }}, + 'atascii' => {inherit => ['' => '0-1F+60-7F'], setup => sub { + $_[0]->{table} = [qw( + ♥ ├ 🮇 ┘ ┤ ┐ ╱ ╲ ◢ ▗ ◣ ▝ ▘ 🮂 ▂ ▖ ♣ ┌ ─ ┼ • ▄ ▎ ┬ ┴ ▌ └ ␛ ↑ ↓ ← → + _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ + _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ + ♦ a b c d e f g h i j k l m n o p q r s t u v w x y z ♠ | 🢰 ◀ ▶ + )]; + }}, + 'zx-spectrum' => { + inherit => ['' => '50-8F'], + set => 'ascii', + replace => { + ord('^') => '↑', + ord('`') => '£', + 0x7F => '© ▝▘▀▗▐▚▜▖▞▌▛▄▟▙█', + }, + }, + 'msx' => {inherit => ['cp437' => '80-FF'], setup => sub { + $_[0]->{table} = [(map {chr} 0 .. 0x7F), qw( + Ç ü é â ä à å ç ê ë è ï î ì Ä Å É æ Æ ô ö ò û ù ÿ Ö Ü ¢ £ ¥ ₧ ƒ + á í ó ú ñ Ñ ª º ¿ ⌐ ¬ ½ ¼ ¡ « » à ã Ĩ ĩ Õ õ Ũ ũ IJ ij ¾ ∽ ◊ ‰ ¶ § + ▂ ▚ ▆ 🮂 ▬ 🮅 ▎ ▞ ▊ 🮇 🮊 🮙 🮘 🭭 🭯 🭬 🭮 🮚 🮛 ▘ ▗ ▝ ▖ 🮖 Δ ‡ ω █ ▄ ▌ ▐ ▀ + α ß Γ π Σ σ µ τ Φ Θ Ω δ ∞ ⌀ ∈ ∩ ≡ ± ≥ ≤ ⌠ ⌡ ÷ ≈ ° ∙ · √ ⁿ ² ■ + )]; + }}, + 'brascii' => { + inherit => ['' => 'D0-DF+F0-FF'], + setup => sub { + $_[0]->{table} = [(map {chr} 0 .. 0xFF)]; + }, + replace => { + 0xD7 => 'Œ', + 0xF7 => 'œ', + }, + }, + '' => {setup => sub { my $row = shift; $row->{offset} = delete $row->{startpoint}; diff --git a/charset.plp b/charset.plp index d8527f0..ccbf145 100644 --- a/charset.plp +++ b/charset.plp @@ -55,6 +55,7 @@ print join " •\n", ( dos => 'DOS', mac => 'Apple', ebcdic => 'EBCDIC', + legacy => 'legacy', $tablist[0] eq 'default' ? () : ('' => 'common'), ], [ @@ -153,7 +154,7 @@ sub tabinput { if (defined $row{table} or defined $row{cell}) { $row{set} //= $input; } - elsif ($row{set} = Encode::resolve_alias($input)) { + elsif ($row{set} = Encode::resolve_alias($charset->{set} // $input)) { $row{offset} = delete $row{startpoint}; if ($charset->{varchar}) { # array of possibly multiple characters per code point @@ -169,6 +170,7 @@ sub tabinput { $row{endpoint} -= $row{offset}; $visible->{ascii}++; # assume common base + $row{set} = $input if $charset->{set}; # base override } else { Alert("Encoding $input unknown"); -- 2.30.0