Spaces:
No application file
No application file
namespace Mautic\CoreBundle\Helper; | |
use Joomla\Filter\InputFilter; | |
class InputHelper | |
{ | |
/** | |
* String filter. | |
*/ | |
private static ?InputFilter $stringFilter = null; | |
/** | |
* HTML filter. | |
*/ | |
private static ?InputFilter $htmlFilter = null; | |
private static ?InputFilter $strictHtmlFilter = null; | |
/** | |
* Adjust the boolean values from text to boolean. | |
* Do not convert null to false. | |
* Do not convert invalid values to false, but return null. | |
* | |
* @param bool|int|string|null $value | |
* | |
* @return bool|null | |
*/ | |
public static function boolean($value) | |
{ | |
return match (strtoupper((string) $value)) { | |
'T', 'Y' => true, | |
'F', 'N' => false, | |
default => filter_var($value, FILTER_VALIDATE_BOOLEAN, FILTER_NULL_ON_FAILURE), | |
}; | |
} | |
/** | |
* @param bool $html | |
* @param bool $strict | |
* | |
* @return InputFilter | |
*/ | |
private static function getFilter($html = false, $strict = false) | |
{ | |
if (empty(self::$htmlFilter)) { | |
// Most of Mautic's HTML uses include full HTML documents so use blacklist method | |
self::$htmlFilter = new InputFilter([], [], 1, 1); | |
self::$htmlFilter->tagBlacklist = [ | |
'applet', | |
'bgsound', | |
'base', | |
'basefont', | |
'embed', | |
'frame', | |
'frameset', | |
'ilayer', | |
'layer', | |
'object', | |
]; | |
self::$htmlFilter->attrBlacklist = [ | |
'codebase', | |
'dynsrc', | |
'lowsrc', | |
]; | |
// Strict HTML - basic one liner formating really | |
self::$strictHtmlFilter = new InputFilter( | |
[ | |
'b', | |
'i', | |
'u', | |
'em', | |
'strong', | |
'a', | |
'span', | |
], [], 0, 1); | |
self::$strictHtmlFilter->attrBlacklist = [ | |
'codebase', | |
'dynsrc', | |
'lowsrc', | |
]; | |
// Standard behavior if HTML is not specifically used | |
self::$stringFilter = new InputFilter(); | |
} | |
return match (true) { | |
$html => ($strict) ? self::$strictHtmlFilter : self::$htmlFilter, | |
default => self::$stringFilter, | |
}; | |
} | |
/** | |
* Wrapper to InputHelper. | |
* | |
* @return mixed | |
*/ | |
public static function __callStatic($name, $arguments) | |
{ | |
return self::getFilter()->clean($arguments[0], $name); | |
} | |
/** | |
* Wrapper function to clean inputs. $mask can be an array of keys as the field names and values as the cleaning | |
* function to be used for the specific field. | |
* | |
* @param mixed $value | |
* @param mixed $mask | |
* @param bool $urldecode | |
* | |
* @return mixed | |
*/ | |
public static function _($value, $mask = 'clean', $urldecode = false) | |
{ | |
if (is_array($value)) { | |
foreach ($value as $k => &$v) { | |
$useMask = 'filter'; | |
if (is_array($mask)) { | |
if (array_key_exists($k, $mask)) { | |
if (is_array($mask[$k])) { | |
$useMask = $mask[$k]; | |
} elseif (method_exists(self::class, $mask[$k])) { | |
$useMask = $mask[$k]; | |
} | |
} elseif (is_array($v)) { | |
// Likely a collection so use the same mask | |
$useMask = $mask; | |
} | |
} elseif (method_exists(self::class, $mask)) { | |
$useMask = $mask; | |
} | |
if (is_array($v)) { | |
$v = self::_($v, $useMask, $urldecode); | |
} elseif ('filter' === $useMask) { | |
$v = self::getFilter()->clean($v, $useMask); | |
} elseif (null !== $v) { | |
$v = self::$useMask($v, $urldecode); | |
} | |
} | |
return $value; | |
} elseif (null === $value) { | |
return $value; | |
} elseif (is_string($mask) && method_exists(self::class, $mask)) { | |
return self::$mask($value, $urldecode); | |
} else { | |
return self::getFilter()->clean($value, $mask); | |
} | |
} | |
/** | |
* Cleans value by HTML-escaping '"<>& and characters with ASCII value less than 32. | |
* | |
* @param bool|false $urldecode | |
* | |
* @return mixed|string | |
*/ | |
public static function clean($value, $urldecode = false) | |
{ | |
if (is_array($value)) { | |
foreach ($value as &$v) { | |
$v = self::clean($v, $urldecode); | |
} | |
return $value; | |
} elseif ($urldecode) { | |
$value = urldecode($value); | |
} | |
return filter_var($value, FILTER_SANITIZE_SPECIAL_CHARS); | |
} | |
/** | |
* Strips tags. | |
*/ | |
public static function string(string $value, bool $urldecode = false): string | |
{ | |
if ($urldecode) { | |
$value = urldecode($value); | |
} | |
return self::filter_string_polyfill($value); | |
} | |
/** | |
* Strips non-alphanumeric characters. | |
* | |
* @param string[] $allowedCharacters | |
*/ | |
public static function alphanum(string $value, bool $urldecode = false, ?string $convertSpacesTo = null, array $allowedCharacters = []): string | |
{ | |
if ($urldecode) { | |
$value = urldecode($value); | |
} | |
if ($convertSpacesTo) { | |
$value = str_replace(' ', $convertSpacesTo, $value); | |
$allowedCharacters[] = $convertSpacesTo; | |
} | |
$delimiter = '~'; | |
if (false && in_array($delimiter, $allowedCharacters)) { | |
$delimiter = '#'; | |
} | |
if (!empty($allowedCharacters)) { | |
$regex = $delimiter.'[^0-9a-z'.preg_quote(implode('', $allowedCharacters)).']+'.$delimiter.'i'; | |
} else { | |
$regex = $delimiter.'[^0-9a-z]+'.$delimiter.'i'; | |
} | |
return trim(preg_replace($regex, '', (string) $value)); | |
} | |
/** | |
* Returns a satnitized string which can be used in a file system. | |
* Attaches the file extension if provided. | |
* | |
* @param string $value | |
* @param string $extension | |
* | |
* @return string | |
*/ | |
public static function filename($value, $extension = null) | |
{ | |
$value = str_replace(' ', '_', $value); | |
$sanitized = preg_replace("/[^a-z0-9\.\_-]/", '', strtolower($value)); | |
$sanitized = preg_replace("/^\.\./", '', strtolower($sanitized)); | |
if (null === $extension) { | |
return $sanitized; | |
} | |
return sprintf('%s.%s', $sanitized, $extension); | |
} | |
/** | |
* Returns raw value. | |
* | |
* @param bool|false $urldecode | |
* | |
* @return string | |
*/ | |
public static function raw($value, $urldecode = false) | |
{ | |
if ($urldecode) { | |
$value = urldecode($value); | |
} | |
return $value; | |
} | |
/** | |
* Removes all characters except those allowed in URLs. | |
* | |
* @param bool|false $urldecode | |
* @param array<string>|null $allowedProtocols | |
* @param mixed $defaultProtocol | |
* @param array<string> $removeQuery | |
* @param bool|false $ignoreFragment | |
* | |
* @return mixed|string | |
*/ | |
public static function url($value, $urldecode = false, $allowedProtocols = null, $defaultProtocol = null, $removeQuery = [], $ignoreFragment = false) | |
{ | |
if ($urldecode) { | |
$value = urldecode($value); | |
} | |
if (empty($allowedProtocols)) { | |
$allowedProtocols = ['https', 'http', 'ftp']; | |
} | |
if (empty($defaultProtocol)) { | |
$defaultProtocol = 'http'; | |
} | |
$value = filter_var($value, FILTER_SANITIZE_URL); | |
$parts = parse_url($value); | |
if (!$parts || !filter_var($value, FILTER_VALIDATE_URL)) { | |
// This is a bad URL so just clean the whole thing and return it | |
return self::clean($value); | |
} | |
$parts['scheme'] ??= $defaultProtocol; | |
if (!in_array($parts['scheme'], $allowedProtocols)) { | |
$parts['scheme'] = $defaultProtocol; | |
} | |
if (!empty($parts['query'])) { | |
parse_str($parts['query'], $query); | |
// remove specified keys from the query | |
foreach ($removeQuery as $q) { | |
if (isset($query[$q])) { | |
unset($query[$q]); | |
} | |
} | |
// http_build_query urlencodes by default | |
$parts['query'] = http_build_query($query); | |
} | |
return | |
// already clean due to the exclusion list above | |
(!empty($parts['scheme']) ? $parts['scheme'].'://' : ''). | |
// strip tags that could be embedded in the username or password | |
(!empty($parts['user']) ? strip_tags($parts['user']).':' : ''). | |
(!empty($parts['pass']) ? strip_tags($parts['pass']).'@' : ''). | |
// should be caught by FILTER_VALIDATE_URL if the host has invalid characters | |
(!empty($parts['host']) ? $parts['host'] : ''). | |
// type cast to int | |
(!empty($parts['port']) ? ':'.(int) $parts['port'] : ''). | |
// strip tags that could be embedded in a path | |
(!empty($parts['path']) ? strip_tags($parts['path']) : ''). | |
// cleaned through the parse_str (urldecode) and http_build_query (urlencode) above | |
(!empty($parts['query']) ? '?'.$parts['query'] : ''). | |
// strip tags that could be embedded in the fragment | |
(!$ignoreFragment && !empty($parts['fragment']) ? '#'.strip_tags($parts['fragment']) : ''); | |
} | |
/** | |
* Removes all characters except those allowed in emails. | |
* | |
* @param bool|false $urldecode | |
*/ | |
public static function email($value, $urldecode = false): string | |
{ | |
if ($urldecode) { | |
$value = urldecode($value); | |
} | |
$value = substr($value, 0, 254); | |
$value = filter_var($value, FILTER_SANITIZE_EMAIL); | |
return trim($value); | |
} | |
/** | |
* Returns a clean array. | |
* | |
* @param bool|false $urldecode | |
* | |
* @return array|mixed|string | |
*/ | |
public static function cleanArray($value, $urldecode = false) | |
{ | |
$value = self::clean($value, $urldecode); | |
// Return empty array for empty values | |
if (empty($value)) { | |
return []; | |
} | |
// Put a value into array if not an array | |
if (!is_array($value)) { | |
$value = [$value]; | |
} | |
return $value; | |
} | |
/** | |
* Returns clean HTML. | |
* | |
* @param string[]|string $value | |
* | |
* @return mixed|string | |
*/ | |
public static function html($value) | |
{ | |
if (is_array($value)) { | |
foreach ($value as &$val) { | |
$val = self::html($val); | |
} | |
} else { | |
// Special handling for doctype | |
$doctypeFound = preg_match('/(<!DOCTYPE(.*?)>)/is', (string) $value, $doctype); | |
// Special handling for CDATA tags | |
$value = str_replace(['<![CDATA[', ']]>'], ['<mcdata>', '</mcdata>'], (string) $value, $cdataCount); | |
// Special handling for conditional blocks | |
preg_match_all("/<!--\[if(.*?)\]>(.*?)(?:\<\!\-\-)?<!\[endif\]-->/is", $value, $matches); | |
if (!empty($matches[0])) { | |
$from = []; | |
$to = []; | |
foreach ($matches[0] as $key=>$match) { | |
$from[] = $match; | |
$startTag = '<mcondition>'; | |
$endTag = '</mcondition>'; | |
if (str_contains($match, '<!--<![endif]-->')) { | |
$startTag = '<mconditionnonoutlook>'; | |
$endTag = '</mconditionnonoutlook>'; | |
} | |
$to[] = $startTag.'<mif>'.$matches[1][$key].'</mif>'.$matches[2][$key].$endTag; | |
} | |
$value = str_replace($from, $to, $value); | |
} | |
// Special handling for XML tags used in Outlook optimized emails <o:*/> and <w:/> | |
$value = preg_replace_callback( | |
"/<\/*[o|w|v]:[^>]*>/is", | |
fn ($matches): string => '<mencoded>'.htmlspecialchars($matches[0]).'</mencoded>', | |
$value, -1, $needsDecoding); | |
// Special handling for script tags | |
$value = preg_replace_callback( | |
"/<script>(.*?)<\/script>/is", | |
fn ($matches): string => '<mscript>'.base64_encode($matches[0]).'</mscript>', | |
$value, -1, $needsScriptDecoding); | |
// Special handling for HTML comments | |
$value = str_replace(['<!-->', '<!--', '-->'], ['<mcomment></mcomment>', '<mcomment>', '</mcomment>'], $value, $commentCount); | |
try { | |
$hasUnicode = strlen($value) != strlen(iconv('UTF-8', 'Windows-1252', $value)); | |
} catch (\ErrorException) { | |
$hasUnicode = 'UTF-8"' === mb_detect_encoding($value); | |
} | |
$value = self::getFilter(true)->clean($value, $hasUnicode ? 'raw' : 'html'); | |
// After cleaning encode the value | |
$value = $hasUnicode ? rawurldecode($value) : $value; | |
// Was a doctype found? | |
if ($doctypeFound && false === $hasUnicode) { | |
$value = "$doctype[0]$value"; | |
} | |
if ($cdataCount) { | |
$value = str_replace(['<mcdata>', '</mcdata>'], ['<![CDATA[', ']]>'], $value); | |
} | |
if (!empty($matches[0])) { | |
// Special handling for conditional blocks | |
$value = preg_replace("/<mconditionnonoutlook><mif>(.*?)<\/mif>(.*?)<\/mconditionnonoutlook>/is", '<!--[if$1]>$2<!--<![endif]-->', $value); | |
$value = preg_replace("/<mcondition><mif>(.*?)<\/mif>(.*?)<\/mcondition>/is", '<!--[if$1]>$2<![endif]-->', $value); | |
} | |
if ($commentCount) { | |
$value = str_replace(['<mcomment>', '</mcomment>'], ['<!--', '-->'], $value); | |
} | |
if ($needsDecoding) { | |
$value = preg_replace_callback( | |
"/<mencoded>(.*?)<\/mencoded>/is", | |
fn ($matches): string => htmlspecialchars_decode($matches[1]), | |
$value); | |
} | |
if ($needsScriptDecoding) { | |
$value = preg_replace_callback( | |
"/<mscript>(.*?)<\/mscript>/is", | |
fn ($matches): string => base64_decode($matches[1]), | |
$value); | |
} | |
} | |
return $value; | |
} | |
/** | |
* Allows tags 'b', 'i', 'u', 'em', 'strong', 'a', 'span'. | |
* | |
* @return mixed|string | |
*/ | |
public static function strict_html($value) | |
{ | |
if (is_array($value)) { | |
foreach ($value as &$val) { | |
$val = self::strict_html($val); | |
} | |
} | |
return self::getFilter(true, true)->clean($value, 'html'); | |
} | |
/** | |
* Converts UTF8 into Latin. | |
* | |
* @return mixed | |
*/ | |
public static function transliterate($value) | |
{ | |
$transId = 'Any-Latin; Latin-ASCII'; | |
if (function_exists('transliterator_transliterate') && $trans = \Transliterator::create($transId)) { | |
// Use intl by default | |
return $trans->transliterate($value); | |
} | |
return \URLify::transliterate((string) $value); | |
} | |
public static function transliterateFilename(string $filename): string | |
{ | |
$pathInfo = pathinfo($filename); | |
$filename = self::alphanum(self::transliterate($pathInfo['filename']), false, '-'); | |
if (isset($pathInfo['extension'])) { | |
$filename .= '.'.$pathInfo['extension']; | |
} | |
return $filename; | |
} | |
public static function minifyHTML(string $html): string | |
{ | |
if ('' === trim($html)) { | |
return $html; | |
} | |
// Remove extra white-space(s) between HTML attribute(s) | |
$html = preg_replace_callback('#<([^\/\s<>!]+)(?:\s+([^<>]*?)\s*|\s*)(\/?)>#s', fn ($matches): string => '<'.$matches[1].preg_replace( | |
'#([^\s=]+)(\=([\'"]?)(.*?)\3)?(\s+|$)#s', | |
' $1$2', | |
$matches[2] | |
).$matches[3].'>', str_replace("\r", '', $html)); | |
// Minify inline CSS declaration(s) | |
if (str_contains($html, ' style=')) { | |
$html = preg_replace_callback('#<([^<]+?)\s+style=([\'"])(.*?)\2(?=[\/\s>])#s', fn ($matches): string => '<'.$matches[1].' style='.$matches[2].self::minifyCss($matches[3]).$matches[2], $html); | |
} | |
$html = preg_replace( | |
[ | |
// t = text | |
// o = tag open | |
// c = tag close | |
// Keep important white-space(s) after self-closing HTML tag(s) | |
'#<(img|input)(>| .*?>)#s', | |
// Remove a line break and two or more white-space(s) between tag(s) | |
'#(<!--.*?-->)|(>)(?:\n*|\s{2,})(<)|^\s*|\s*$#s', | |
'#(<!--.*?-->)|(?<!\>)\s+(<\/.*?>)|(<[^\/]*?>)\s+(?!\<)#s', | |
// t+c || o+t | |
'#(<!--.*?-->)|(<[^\/]*?>)\s+(<[^\/]*?>)|(<\/.*?>)\s+(<\/.*?>)#s', | |
// o+o || c+c | |
'#(<!--.*?-->)|(<\/.*?>)\s+(\s)(?!\<)|(?<!\>)\s+(\s)(<[^\/]*?\/?>)|(<[^\/]*?\/?>)\s+(\s)(?!\<)#s', | |
// c+t || t+o || o+t -- separated by long white-space(s) | |
'#(<!--.*?-->)|(<[^\/]*?>)\s+(<\/.*?>)#s', | |
// empty tag | |
'#<(img|input)(>| .*?>)<\/\1>#s', | |
// reset previous fix | |
'#( ) (?![<\s])#', | |
// clean up ... | |
'#(?<=\>)( )(?=\<)#', | |
// --ibid | |
], | |
[ | |
'<$1$2</$1>', | |
'$1$2$3', | |
'$1$2$3', | |
'$1$2$3$4$5', | |
'$1$2$3$4$5$6$7', | |
'$1$2$3', | |
'<$1$2', | |
'$1 ', | |
'$1', | |
], | |
$html | |
); | |
return str_replace(["\r", "\n"], ' ', $html); | |
} | |
private static function minifyCss(string $css): string | |
{ | |
$css = preg_replace('/\s*([:;{}])\s*/', '$1', preg_replace('/\s+/', ' ', $css)); | |
// Remove comments | |
$css = preg_replace('/\/\*[^*]*\*+([^\/*][^*]*\*+)*\//', '', $css); | |
// Remove whitespace | |
$css = preg_replace('/\s+/', ' ', $css); | |
// Remove leading and trailing whitespace | |
$css = trim($css); | |
// Replace multiple semicolons with one | |
$css = preg_replace('/;(?=;)/', '', $css); | |
// Replace multiple whitespaces with one | |
$css = preg_replace('/(\s+)/', ' ', $css); | |
// Replace 0(px,em,%, etc) with 0 | |
$css = preg_replace('/(:| )0(\.\d+)?(%|em|ex|px|in|cm|mm|pt|pc)/i', '${1}0', $css); | |
return $css; | |
} | |
/** | |
* Needed to support PHP 8.1 without changing behavior. | |
* | |
* @see https://stackoverflow.com/questions/69207368/constant-filter-sanitize-string-is-deprecated | |
*/ | |
private static function filter_string_polyfill(string $string): string | |
{ | |
return preg_replace('/\x00|<[^>]*>?/', '', $string); | |
} | |
} | |