diff --git a/Escape.php b/Escape.php new file mode 100644 index 0000000..346e9d5 --- /dev/null +++ b/Escape.php @@ -0,0 +1,218 @@ += 256 + || ($ord >= 48 && $ord <= 57) + || ($ord >= 65 && $ord <= 90) + || ($ord >= 97 && $ord <= 122) + ) { + $encoded_data .= $char; + } else { + $hex = mb_strtoupper(dechex($ord)); + if ($format === 'html') { + $encoded_data .= '&#x' . $hex . ';'; + } else if ($format === 'unicode') { + $encoded_data .= '\\u' . $ord; + } else if ($format === 'css') { + $hex = str_pad($hex, 6, '0', \STR_PAD_LEFT); + $encoded_data .= '\\' . $hex; + } else if ($format === 'url') { + $encoded_data .= '%' . $hex; + } + } + } + + return $encoded_data; + } + + /** + * Context: Text in HTML body + * e.g. UNTRUSTED DATA + * + * Encode entities: + * & to & + * < to < + * > to > + * " to " + * ' to ' + */ + public static function htmlBody(mixed $untrusted_data): string + { + if (!is_string($untrusted_data)) { + $untrusted_data = strval($untrusted_data); + } + $untrusted_data = str_replace('&', '&', $untrusted_data); + $untrusted_data = str_replace('<', '<', $untrusted_data); + $untrusted_data = str_replace('>', '>', $untrusted_data); + $untrusted_data = str_replace('"', '"', $untrusted_data); + $untrusted_data = str_replace("'", ''', $untrusted_data); + + return $untrusted_data; + } + + /** + * Context: HTML in HTML body + * e.g.
UNTRUSTED HTML
+ * + * HTML Validation (i.e. PHP HTML Purifier) + */ + /*public static function validate_html($untrusted_html) + { + + }*/ + + /** + * Context: Safe HTML attributes + * e.g. + * + * Limit to whitelisted attributes: + * align, alink, alt, bgcolor, border, cellpadding, cellspacing, class, + * color, cols, colspan, coords, dir, face, height, hspace, ismap, lang, + * marginheight, marginwidth, multiple, nohref, noresize, noshade, + * nowrap, ref, rel, rev, rows, rowspan, scrolling, shape, span, summary, + * tabindex, title, usemap, valign, value, vlink, vspace, width. + * + * Except for alphanumeric characters, escape all characters with the + * &#xHH; HTML entity format, including spaces + * + * Apply additional validation to href and src attributes + */ + public const HTML_ATTR_WHITELIST = [ + 'align', 'alink', 'alt', 'bgcolor', 'border', 'cellpadding', + 'cellspacing', 'class', 'color', 'cols', 'colspan', 'coords', 'dir', + 'face', 'height', 'hspace', 'ismap', 'lang', 'marginheight', + 'marginwidth', 'multiple', 'nohref', 'noresize', 'noshade', 'nowrap', + 'ref', 'rel', 'rev', 'rows', 'rowspan', 'scrolling', 'shape', 'span', + 'summary', 'tabindex', 'title', 'usemap', 'valign', 'value', 'vlink', + 'vspace', 'width', + + 'href', 'src', + ]; + public static function htmlAttr( + string $attr, + mixed $untrusted_data, + bool $wrap = true + ): string { + $attr = mb_strtolower($attr); + if (!in_array($attr, static::HTML_ATTR_WHITELIST, true)) { + throw new \InvalidArgumentException('HTML attribute is not whitelisted'); + } + if ($attr === 'href' || $attr === 'src') { + $validated = static::validateUrl($untrusted_data); + } + + $encoded_data = static::encode($untrusted_data, 'html'); + return $wrap ? ' ' . $attr . '="' . $encoded_data . '"' : $encoded_data; + } + + /** + * Context: Untrusted URL in a `src` or `href` attribute + * e.g.