+ *
+ * HTML Validation (i.e. PHP HTML Purifier)
+ */
+ /*public static function validate_html($untrusted_html)
+ {
+
+ }*/
+
+ /**
+ * Context: Safe HTML attributes
+ * e.g.
+ *
+ * Limit to whitelisted attributes:
+ * align, alink, alt, bgcolor, border, cellpadding, cellspacing, class,
+ * color, cols, colspan, coords, dir, face, height, hspace, ismap, lang,
+ * marginheight, marginwidth, multiple, nohref, noresize, noshade,
+ * nowrap, ref, rel, rev, rows, rowspan, scrolling, shape, span, summary,
+ * tabindex, title, usemap, valign, value, vlink, vspace, width.
+ *
+ * Except for alphanumeric characters, escape all characters with the
+ * HH; HTML entity format, including spaces
+ *
+ * Apply additional validation to href and src attributes
+ */
+ public const HTML_ATTR_WHITELIST = [
+ 'align', 'alink', 'alt', 'bgcolor', 'border', 'cellpadding',
+ 'cellspacing', 'class', 'color', 'cols', 'colspan', 'coords', 'dir',
+ 'face', 'height', 'hspace', 'ismap', 'lang', 'marginheight',
+ 'marginwidth', 'multiple', 'nohref', 'noresize', 'noshade', 'nowrap',
+ 'ref', 'rel', 'rev', 'rows', 'rowspan', 'scrolling', 'shape', 'span',
+ 'summary', 'tabindex', 'title', 'usemap', 'valign', 'value', 'vlink',
+ 'vspace', 'width',
+
+ 'href', 'src',
+ ];
+ public static function htmlAttr(
+ string $attr,
+ mixed $untrusted_data,
+ bool $wrap = true
+ ): string {
+ $attr = mb_strtolower($attr);
+ if (!in_array($attr, static::HTML_ATTR_WHITELIST, true)) {
+ throw new \InvalidArgumentException('HTML attribute is not whitelisted');
+ }
+ if ($attr === 'href' || $attr === 'src') {
+ $validated = static::validateUrl($untrusted_data);
+ }
+
+ $encoded_data = static::encode($untrusted_data, 'html');
+ return $wrap ? ' ' . $attr . '="' . $encoded_data . '"' : $encoded_data;
+ }
+
+ /**
+ * Context: Untrusted URL in a `src` or `href` attribute
+ * e.g.
+ * e.g. link
+ *
+ * Whitelist https URLs only
+ *
+ * Apply additional whitelisting, canonicalization and anti-virus checks
+ * depending on the use-case
+ */
+ public static function validateUrl(mixed $untrusted_data): bool
+ {
+ if (!is_string($untrusted_data)) {
+ $untrusted_data = strval($untrusted_data);
+ }
+ $protocol = mb_substr($untrusted_data, 0, 8);
+ if ($protocol !== 'https://') {
+ throw new \InvalidArgumentException('URL is not HTTPS');
+ }
+
+ return true;
+ }
+
+ /**
+ * Context: JavaScript variable
+ * e.g.
+ * e.g.
+ *
+ * Do not use this when outputting JSON in HTML. Instead, use the dedicated
+ * jsonInHtml method
+ *
+ * Ensure JavaScript variables are quoted
+ *
+ * Except for alphanumeric characters, escape all characters with the
+ * \uXXXX unicode escaping format
+ *
+ * Avoid backslash encoding
+ */
+ public static function jsVar(mixed $untrusted_data): string
+ {
+ $encoded_data = static::encode($untrusted_data, 'unicode');
+ return $encoded_data;
+ }
+
+ /**
+ * Context: CSS value
+ * e.g.
+ *
+ * CSS escaping supports \XX and \XXXXXX. Zero-pad to 6 characters
+ */
+ public static function cssValue(mixed $untrusted_data): string
+ {
+ $encoded_data = static::encode($untrusted_data, 'css');
+ return $encoded_data;
+ }
+
+ /**
+ * Context: URL parameter
+ * e.g. link
+ *
+ * Except for alphanumeric characters, escape all characters with the
+ * %HH escaping format
+ */
+ public static function urlParam(mixed $untrusted_data): string
+ {
+ $encoded_data = static::encode($untrusted_data, 'url');
+ return $encoded_data;
+ }
+
+ /**
+ * Context: JSON in HTML
+ * e.g.
+ * e.g. var data = JSON.parse(document.getElementById('data').textContent);
+ *
+ * Encode entities: & < > " '
+ *
+ * Output JSON inside a hidden element before calling JSON.parse(el.textContent)
+ *
+ * @param mixed $untrusted_data
+ */
+ public static function jsonInHtml(mixed $untrusted_data): string
+ {
+ $flags = \JSON_HEX_AMP|\JSON_HEX_TAG|\JSON_HEX_QUOT|\JSON_HEX_APOS|\JSON_THROW_ON_ERROR;
+ return json_encode($untrusted_data, $flags) ?: '[]';
+ }
+}
diff --git a/LICENSE.txt b/LICENSE.txt
new file mode 100644
index 0000000..0a04128
--- /dev/null
+++ b/LICENSE.txt
@@ -0,0 +1,165 @@
+ GNU LESSER GENERAL PUBLIC LICENSE
+ Version 3, 29 June 2007
+
+ Copyright (C) 2007 Free Software Foundation, Inc.
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+
+ This version of the GNU Lesser General Public License incorporates
+the terms and conditions of version 3 of the GNU General Public
+License, supplemented by the additional permissions listed below.
+
+ 0. Additional Definitions.
+
+ As used herein, "this License" refers to version 3 of the GNU Lesser
+General Public License, and the "GNU GPL" refers to version 3 of the GNU
+General Public License.
+
+ "The Library" refers to a covered work governed by this License,
+other than an Application or a Combined Work as defined below.
+
+ An "Application" is any work that makes use of an interface provided
+by the Library, but which is not otherwise based on the Library.
+Defining a subclass of a class defined by the Library is deemed a mode
+of using an interface provided by the Library.
+
+ A "Combined Work" is a work produced by combining or linking an
+Application with the Library. The particular version of the Library
+with which the Combined Work was made is also called the "Linked
+Version".
+
+ The "Minimal Corresponding Source" for a Combined Work means the
+Corresponding Source for the Combined Work, excluding any source code
+for portions of the Combined Work that, considered in isolation, are
+based on the Application, and not on the Linked Version.
+
+ The "Corresponding Application Code" for a Combined Work means the
+object code and/or source code for the Application, including any data
+and utility programs needed for reproducing the Combined Work from the
+Application, but excluding the System Libraries of the Combined Work.
+
+ 1. Exception to Section 3 of the GNU GPL.
+
+ You may convey a covered work under sections 3 and 4 of this License
+without being bound by section 3 of the GNU GPL.
+
+ 2. Conveying Modified Versions.
+
+ If you modify a copy of the Library, and, in your modifications, a
+facility refers to a function or data to be supplied by an Application
+that uses the facility (other than as an argument passed when the
+facility is invoked), then you may convey a copy of the modified
+version:
+
+ a) under this License, provided that you make a good faith effort to
+ ensure that, in the event an Application does not supply the
+ function or data, the facility still operates, and performs
+ whatever part of its purpose remains meaningful, or
+
+ b) under the GNU GPL, with none of the additional permissions of
+ this License applicable to that copy.
+
+ 3. Object Code Incorporating Material from Library Header Files.
+
+ The object code form of an Application may incorporate material from
+a header file that is part of the Library. You may convey such object
+code under terms of your choice, provided that, if the incorporated
+material is not limited to numerical parameters, data structure
+layouts and accessors, or small macros, inline functions and templates
+(ten or fewer lines in length), you do both of the following:
+
+ a) Give prominent notice with each copy of the object code that the
+ Library is used in it and that the Library and its use are
+ covered by this License.
+
+ b) Accompany the object code with a copy of the GNU GPL and this license
+ document.
+
+ 4. Combined Works.
+
+ You may convey a Combined Work under terms of your choice that,
+taken together, effectively do not restrict modification of the
+portions of the Library contained in the Combined Work and reverse
+engineering for debugging such modifications, if you also do each of
+the following:
+
+ a) Give prominent notice with each copy of the Combined Work that
+ the Library is used in it and that the Library and its use are
+ covered by this License.
+
+ b) Accompany the Combined Work with a copy of the GNU GPL and this license
+ document.
+
+ c) For a Combined Work that displays copyright notices during
+ execution, include the copyright notice for the Library among
+ these notices, as well as a reference directing the user to the
+ copies of the GNU GPL and this license document.
+
+ d) Do one of the following:
+
+ 0) Convey the Minimal Corresponding Source under the terms of this
+ License, and the Corresponding Application Code in a form
+ suitable for, and under terms that permit, the user to
+ recombine or relink the Application with a modified version of
+ the Linked Version to produce a modified Combined Work, in the
+ manner specified by section 6 of the GNU GPL for conveying
+ Corresponding Source.
+
+ 1) Use a suitable shared library mechanism for linking with the
+ Library. A suitable mechanism is one that (a) uses at run time
+ a copy of the Library already present on the user's computer
+ system, and (b) will operate properly with a modified version
+ of the Library that is interface-compatible with the Linked
+ Version.
+
+ e) Provide Installation Information, but only if you would otherwise
+ be required to provide such information under section 6 of the
+ GNU GPL, and only to the extent that such information is
+ necessary to install and execute a modified version of the
+ Combined Work produced by recombining or relinking the
+ Application with a modified version of the Linked Version. (If
+ you use option 4d0, the Installation Information must accompany
+ the Minimal Corresponding Source and Corresponding Application
+ Code. If you use option 4d1, you must provide the Installation
+ Information in the manner specified by section 6 of the GNU GPL
+ for conveying Corresponding Source.)
+
+ 5. Combined Libraries.
+
+ You may place library facilities that are a work based on the
+Library side by side in a single library together with other library
+facilities that are not Applications and are not covered by this
+License, and convey such a combined library under terms of your
+choice, if you do both of the following:
+
+ a) Accompany the combined library with a copy of the same work based
+ on the Library, uncombined with any other library facilities,
+ conveyed under the terms of this License.
+
+ b) Give prominent notice with the combined library that part of it
+ is a work based on the Library, and explaining where to find the
+ accompanying uncombined form of the same work.
+
+ 6. Revised Versions of the GNU Lesser General Public License.
+
+ The Free Software Foundation may publish revised and/or new versions
+of the GNU Lesser General Public License from time to time. Such new
+versions will be similar in spirit to the present version, but may
+differ in detail to address new problems or concerns.
+
+ Each version is given a distinguishing version number. If the
+Library as you received it specifies that a certain numbered version
+of the GNU Lesser General Public License "or any later version"
+applies to it, you have the option of following the terms and
+conditions either of that published version or of any later version
+published by the Free Software Foundation. If the Library as you
+received it does not specify a version number of the GNU Lesser
+General Public License, you may choose any version of the GNU Lesser
+General Public License ever published by the Free Software Foundation.
+
+ If the Library as you received it specifies that a proxy can decide
+whether future versions of the GNU Lesser General Public License shall
+apply, that proxy's public statement of acceptance of any version is
+permanent authorization for you to choose that version for the
+Library.
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..9900874
--- /dev/null
+++ b/README.md
@@ -0,0 +1,150 @@
+# \thisispiers\Xss\Escape
+
+A PHP implementation of [OWASP's Cross Site Scripting Prevention Cheat Sheet](https://cheatsheetseries.owasp.org/cheatsheets/Cross_Site_Scripting_Prevention_Cheat_Sheet.html)
+
+Released under LGPL v3.0. Requires PHP >= 7.1 and mbstring extension
+
+Install with Composer `composer require thisispiers/xss-escape`
+
+## Usage
+
+Untrusted data should be encoded differently depending on context. This library provides a static method for each context.
+
+### Text in HTML Body
+
+i.e. `UNTRUSTED DATA`
+
+```
+htmlBody(mixed $untrusted_data): string
+```
+
+`$untrusted_data` is cast to string
+
+### HTML in HTML body
+
+i.e. `
UNTRUSTED HTML
`
+
+Use a full HTML validator in this context, such as [HTML Purifier](https://github.com/ezyang/htmlpurifier) or [DOMPurify](https://github.com/cure53/DOMPurify)
+
+### Safe HTML attributes
+
+i.e. ``
+
+```
+htmlAttr(string $attr, mixed $untrusted_data, bool $wrap = true): string
+```
+
+`$attr` must be one of
+- align
+- alink
+- alt
+- bgcolor
+- border
+- cellpadding
+- cellspacing
+- class
+- color
+- cols
+- colspan
+- coords
+- dir
+- face
+- height
+- href (see [URLs](#URLs))
+- hspace
+- ismap
+- lang
+- marginheight
+- marginwidth
+- multiple
+- nohref
+- noresize
+- noshade
+- nowrap
+- ref
+- rel
+- rev
+- rows
+- rowspan
+- scrolling
+- shape
+- span
+- src (see [URLs](#URLs))
+- summary
+- tabindex
+- title
+- usemap
+- valign
+- value
+- vlink
+- vspace
+- width
+
+`$untrusted_data` is cast to string
+
+If `$wrap` is `true`, the returned string is prefixed by a space, the attribute name, an equal sign and wrapped in double quote marks i.e. `` value="ENCODED DATA"``.
+
+### URLs
+
+URLs in `src` or `href` HTML attributes i.e. `` or `link`
+
+```
+validateUrl(mixed $untrusted_data): bool
+```
+
+`$untrusted_data` is cast to string
+
+Untrusted URLs are currently only checked to be HTTPS. This is a crude check to avoid becoming a full URL parsing library. It is highly recommended that you run more sophisticated validation on your untrusted URLs, such as rejecting URLs by hostname.
+
+### JavaScript variables
+
+i.e. `` or ``
+
+```
+jsVar(mixed $untrusted_data): string
+```
+
+`$untrusted_data` is cast to string
+
+### CSS values
+
+i.e. `
`
+
+```
+cssValue(mixed $untrusted_data): string
+```
+
+`$untrusted_data` is cast to string
+
+### URL parameters
+
+i.e. `link`
+
+```
+urlParam(mixed $untrusted_data): string
+```
+
+`$untrusted_data` is cast to string
+
+### JSON in HTML
+
+```
+jsonInHtml(mixed $untrusted_data): string
+```
+
+`$untrusted_data` is cast to string
+
+Output JSON inside a hidden element before calling `JSON.parse` e.g.
+```
+
+
+```
+
+## Contributing & Help
+
+Don't expect frequent updates, but pull requests for security and performance improvements are welcome!
+
+There is no guarantee this library complies with the latest OWASP cheat sheet recommendations. Create an issue if you think it's out of date, or start a pull request.
+
+To save keystrokes, you might want to create an alias for this class
+e.g. `class_alias('\\thisispiers\Xss\\Escape', '\\esc');`
\ No newline at end of file
diff --git a/composer.json b/composer.json
new file mode 100644
index 0000000..3d37014
--- /dev/null
+++ b/composer.json
@@ -0,0 +1,14 @@
+{
+ "name": "thisispiers/xss-escape",
+ "description": "A PHP implementation of OWASP Cross Site Scripting Prevention Cheat Sheet",
+ "license": "LGPL-3.0-only",
+ "require": {
+ "php": ">=7.1",
+ "ext-mbstring": "*"
+ },
+ "autoload": {
+ "psr-4": {
+ "thisispiers\\Xss\\": ""
+ }
+ }
+}
\ No newline at end of file