From: Mischa POSLAWSKY Date: Mon, 11 Feb 2008 02:44:29 +0000 (+0000) Subject: EscapeHTML function X-Git-Tag: 3.23~4 X-Git-Url: http://git.shiar.net/perl/plp/.git/commitdiff_plain/958d374e19b67a3c68050a1dd29f7a10a44c74c9 EscapeHTML function Another function to encode html/xml, this time (imho) the "right" way by only quoting reserved characters. The provided Entity() function would often be unusable because of its overcomplete whitespace formatting. This is a feature which I (have to) set up manually in many cases, which seems very unfriendly for a module optimised for outputting HTML. According to personal preferences, it only substitutes a minimal set of entities: - & and < (both required to prevent html interpretation) - > (for xml or otherwise to ease document parsing) - " (to make it usable in attribute values). Single quotes (' or ') are left unquoted, assuming attributes are always in double quotes (no reason to do otherwise). Unlike Entity, it only handles a single argument, to allow for possible options in the future (hopefully supporting a custom range of unsafe chars). It also dies on failure (like trying to change read-only input), because that is a user mistake which should not go unnoticed. The name was devised to be more consistent with other environments (also anticipating new URI encoding and decoding): * php htmlspecialchars html_entity_decode urlrawencode urldecode * javascript encodeURIComponent decodeURIComponent * ruby CGI escapeHTML unescapeHTML escape unescape - CGI::Simple::Util escapeHTML unescapeHTML escape unescape - CGI::Util (simple_escape) escape unescape - HTML::Mason::Escapes basic_html_escape url_escape - HTML::Tiny entity_encode url_encode url_decode * URI::Escape uri_escape_utf8 uri_unescape * XML::Quote xml_quote xml_dequote - PLP (legacy) Entity EncodeURI DecodeURI - PLP (redesign) EscapeHTML UnescapeHTML EscapeURI UnescapeURI HTML: - Escape etc used nearly everywhere (so the obvious choice). - Decode is only used by php, but uglily and inconsistently. - Quote seems most appropriate linguistically, but only used in one minority module. URIs: - Encode etc common in php and javascript. - Escape etc used by ruby and several perl modules (including URI::Escape), and is still familiar to javascript users - URI used in all significant environemnts; URL only in minor modules. --- diff --git a/MANIFEST b/MANIFEST index bc70809..ac86acd 100644 --- a/MANIFEST +++ b/MANIFEST @@ -19,6 +19,7 @@ lib/PLP/HowTo.pod bin/plp.cgi bin/plp.fcgi t/10-functions.t +t/15-escape.t t/50-cgi.t t/91-meta.t t/92-pod.t diff --git a/lib/PLP/Functions.pm b/lib/PLP/Functions.pm index 8c3886f..2a4bb99 100644 --- a/lib/PLP/Functions.pm +++ b/lib/PLP/Functions.pm @@ -4,10 +4,12 @@ use strict; use warnings; use base 'Exporter'; +use Carp; use Fcntl qw(:flock); -our $VERSION = '1.00'; +our $VERSION = '1.01'; our @EXPORT = qw/Entity DecodeURI EncodeURI Include include PLP_END + EscapeHTML AddCookie ReadFile WriteFile AutoURL Counter exit/; sub Include ($) { @@ -35,6 +37,19 @@ sub PLP_END (&) { push @PLP::END, shift; } +sub EscapeHTML { + @_ == 1 or croak "Unsupported parameters given to EscapeHTML"; + unshift @_, shift if defined wantarray; # dereference if not void + for ($_[0]) { + defined or next; + s/&/&/g; + s/"/"/g; + s//>/g; + } + return $_[0]; +} + sub Entity (@) { my $ref = defined wantarray ? [@_] : \@_; for (@$ref) { @@ -210,17 +225,36 @@ Adds a piece of code that is executed when at the end of the PLP document. This You should use this function instead of Perl's built-in C blocks, because those do not work properly with mod_perl. +=item EscapeHTML STRING + +Replaces HTML syntax characters by HTML entities, so the text can be output safely. +You should always use this when displaying user input (or database output), +to avoid cross-site-scripting vurnerabilities. + +In void context, B the value of the given variable. + + <: EscapeHTML($user_input); print "
$user_input
"; :> + +In other contexts, returns the changed version. + + + +Be warned that single quotes are not substituted, so always use double quotes for attributes. +Also does not convert whitespace for formatted output; use Entity() for that. + +To escape high-bit characters as well, refer to L. + =item Entity LIST -Replaces HTML syntax characters by HTML entities, so they can be displayed literally. You should always use this when displaying user input (or database output), to avoid cross-site-scripting vurnerabilities. +Formats given arguments for literal display in HTML documents. +Similar to EscapeHTML(), but also preserves newlines and consecutive spaces +using corresponding C<<
>> and C< > respectively. In void context, B the values of the given variables. In other contexts, returns the changed versions. - <: print Entity($user_input); :> + <: print '

' . Entity($user_input) . '

'; :> -Be warned that this function also HTMLizes consecutive whitespace and newlines (using   and
respectively). -For simple escaping, use L. -To escape high-bit characters as well, use L. +Inside attributes, always use EscapeHTML() instead. =item EncodeURI LIST @@ -235,7 +269,7 @@ Note that the following reserved characters are I percent-encoded, even tho / ? : @ $ This should be safe for escaping query values (as in the example above), -but it may be a better idea to use L instead. +but otherwise it may be a better idea to use L instead. =item DecodeURI LIST diff --git a/plp.vim b/plp.vim index bc0fe83..9605a67 100644 --- a/plp.vim +++ b/plp.vim @@ -1,7 +1,7 @@ " Vim syntax file " Language: PLP (Perl in HTML) " Maintainer: Shiar -" Last Change: 2002 May 20 +" Last Change: 2009 October 19 " Cloned From: aspperl.vim " Author: Juerd @@ -34,7 +34,7 @@ syn cluster htmlPreproc add=PLPperlblock syn keyword perlControl PLP_END syn keyword perlStatementInclude include Include syn keyword perlStatementFiles ReadFile WriteFile Counter -syn keyword perlStatementScalar Entity AutoURL DecodeURI EncodeURI +syn keyword perlStatementScalar EscapeHTML Entity AutoURL DecodeURI EncodeURI syn cluster PLPperlcode contains=perlStatement.*,perlFunction,perlOperator,perlVarPlain,perlVarNotInMatches,perlShellCommand,perlFloat,perlNumber,perlStringUnexpanded,perlString,perlQQ,perlControl,perlConditional,perlRepeat,perlComment,perlPOD,perlHereDoc,perlPackageDecl,perlElseIfError,perlFiledescRead,perlMatch diff --git a/t/10-functions.t b/t/10-functions.t index c921339..ea76863 100644 --- a/t/10-functions.t +++ b/t/10-functions.t @@ -4,6 +4,8 @@ use Test::More tests => 6; BEGIN { use_ok('PLP::Functions') } +# legacy + is( Entity(q{
}), "<a test="'&'"/>", diff --git a/t/15-escape.t b/t/15-escape.t new file mode 100644 index 0000000..cbc9f90 --- /dev/null +++ b/t/15-escape.t @@ -0,0 +1,38 @@ +use strict; + +use Test::More tests => 6; + +BEGIN { use_ok('PLP::Functions', 1.01) } + +# EscapeHTML + +is( + EscapeHTML(qq{\t\n}), + "\t<a test="'&'"/>\n", + 'EscapeHTML' +); + +is( + EscapeHTML(undef), + undef, + 'EscapeHTML undef' +); + +is( + eval { EscapeHTML('output', '') }, + undef, + 'EscapeHTML parameters' +); + +is( + eval { my $val = qq{ ><"\n}; EscapeHTML($val); $val }, + " ><"\n", + 'EscapeHTML replace' +); + +is( + eval { EscapeHTML('output'); return 'no error' }, + undef, + 'EscapeHTML read-only modification' +); +