* @package Alphanormalize * * * \mainpage alphanormalize_php * Simple functions to remove "accents" and replace non-alphanumeric characters. * See alphanormalize.inc file. * *
'Élément'
=> 'Element'
.
*
* If $encoding === null
* then use the internal character encoding.
*
* @param string $s
* @param null|string $encoding
*
* @return string
*/
function mb_str_accentalpha_to_alpha($s, $encoding=null) {
#DEBUG
assert('is_string($s)');
#DEBUG_END
require 'accentalpha_to_alpha.inc';
$a = array(); // array of converted characters
if ($encoding === null) {
$len = mb_strlen($s);
for ($i = 0; $i < $len; $i++) {
$c = mb_substr($s, $i, 1);
array_push($a, (array_key_exists($c, $ACCENTALPHA_TO_ALPHA)
? $ACCENTALPHA_TO_ALPHA[$c]
: $c));
}
}
else {
#DEBUG
assert('is_string($encoding)');
#DEBUG_END
$len = mb_strlen($s, $encoding);
for ($i = 0; $i < $len; $i++) {
$c = mb_substr($s, $i, 1, $encoding);
array_push($a, (array_key_exists($c, $ACCENTALPHA_TO_ALPHA)
? $ACCENTALPHA_TO_ALPHA[$c]
: $c));
}
}
return implode($a);
}
/**
* \brief
* Copy of $s without "accents"
* with the characters of the Greek alphabet were replaced
* and all non-alphanumeric characters are replaced by $replacement.
*
* Returns a copy of $s:
* - each "accented" characters is converted by removing its "accent"
* (the converted characters are those of the associative table $ACCENTALPHA_TO_ALPHA from accentalpha_to_alpha.inc file) ;
* - each characters of the Greek alphabet is converted to alphabetic characters
* (the converted characters are those of the associative table $GREEK_TO_ALPHA from greek_to_alpha.inc file) ;
* - each group of other characters is replaced by $replacement.
*
* For example: 'Élément ; α and ω.'
=> 'Element_a_and_o_'
.
*
* Adopts the standard ONU/ELOT: see http://www.opimedia.be/DS/mementos/grecs.htm .
*
* If $strip
* then begins delete HTML tags.
*
* If $entity_decode
* then begins convert HTML entities to normal characters.
* (Previous PHP 5.4, all HTML entities are not supported!)
*
* If $encoding === null
* then use the internal character encoding.
*
* @param string $s
* @param bool $strip
* @param bool $entity_decode
* @param string $replacement
* @param null|string $encoding
*
* @return string
*/
function mb_str_alphanormalize($s, $strip=false, $entity_decode=false, $replacement='_', $encoding=null) {
#DEBUG
assert('is_string($s)');
assert('is_bool($strip)');
assert('is_bool($entity_decode)');
assert('is_string($replacement)');
#DEBUG_END
require 'accentalpha_to_alpha.inc';
require 'greek_to_alpha.inc';
if ( $strip ) { // delete HTML tags
$s = strip_tags($s);
}
if ( $entity_decode ) { // convert HTML entities to normal characters
$s = html_entity_decode($s, ENT_COMPAT, mb_internal_encoding());
}
$not_consecutive = true; // true if the previous character is not $replacement, else false
$a = array(); // array of converted characters
if ($encoding === null) {
$len = mb_strlen($s);
for ($i = 0; $i < $len; $i++) {
$c = mb_substr($s, $i, 1);
if ((('0' <= $c) && ($c <= '9'))
|| (('A' <= $c) && ($c <= 'Z'))
|| (('a' <= $c) && ($c <= 'z'))) { // alphanumeric character
array_push($a, $c);
$not_consecutive = true;
}
elseif (array_key_exists($c, $ACCENTALPHA_TO_ALPHA)) { // "accented" character -> 1 or 2 alphabetic characters
array_push($a, $ACCENTALPHA_TO_ALPHA[$c]);
$not_consecutive = true;
}
elseif (array_key_exists($c, $GREEK_TO_ALPHA)) { // Greek letter -> 1 or 2 alphabetic characters
array_push($a, $GREEK_TO_ALPHA[$c]);
$not_consecutive = true;
}
elseif ($not_consecutive) { // other characters -> $replacement, if not already preceded by a $replacement
$not_consecutive = false;
array_push($a, $replacement);
}
}
}
else {
#DEBUG
assert('is_string($encoding)');
#DEBUG_END
$len = mb_strlen($s, $encoding);
for ($i = 0; $i < $len; $i++) {
$c = mb_substr($s, $i, 1, $encoding);
if ((('0' <= $c) && ($c <= '9'))
|| (('A' <= $c) && ($c <= 'Z'))
|| (('a' <= $c) && ($c <= 'z'))) { // alphanumeric character
array_push($a, $c);
$not_consecutive = true;
}
elseif (array_key_exists($c, $ACCENTALPHA_TO_ALPHA)) { // "accented" character -> 1 or 2 alphabetic characters
array_push($a, $ACCENTALPHA_TO_ALPHA[$c]);
$not_consecutive = true;
}
elseif (array_key_exists($c, $GREEK_TO_ALPHA)) { // Greek letter -> 1 or 2 alphabetic characters
array_push($a, $GREEK_TO_ALPHA[$c]);
$not_consecutive = true;
}
elseif ($not_consecutive) { // other characters -> $replacement, if not already preceded by a $replacement
$not_consecutive = false;
array_push($a, $replacement);
}
}
}
return implode($a);
}
/**
* \brief Copy of $s with the characters of the Greek alphabet were replaced.
*
* Returns a copy of $s with the characters of the Greek alphabet were converted to alphabetic characters
* (the converted characters are those of the associative table $GREEK_TO_ALPHA from greek_to_alpha.inc file).
*
* Adopts the standard ONU/ELOT: see http://www.opimedia.be/DS/mementos/grecs.htm .
*
* For example: 'α and ω'
=> 'a and o'
.
*
* If $encoding === null
* then use the internal character encoding.
*
* @param string $s
* @param null|string $encoding
*
* @return string
*/
function mb_str_greek_to_alpha($s, $encoding=null) {
#DEBUG
assert('is_string($s)');
#DEBUG_END
require 'greek_to_alpha.inc';
$a = array(); // array of converted characters
if ($encoding === null) {
$len = mb_strlen($s);
for ($i = 0; $i < $len; $i++) {
$c = mb_substr($s, $i, 1);
array_push($a, (array_key_exists($c, $GREEK_TO_ALPHA)
? $GREEK_TO_ALPHA[$c]
: $c));
}
}
else {
#DEBUG
assert('is_string($encoding)');
#DEBUG_END
$len = mb_strlen($s, $encoding);
for ($i = 0; $i < $len; $i++) {
$c = mb_substr($s, $i, 1, $encoding);
array_push($a, (array_key_exists($c, $GREEK_TO_ALPHA)
? $GREEK_TO_ALPHA[$c]
: $c));
}
}
return implode($a);
}
/**
* \brief Return the version of this module.
*
* @return string
*/
function version() {
return '03.00.03 --- June 17, 2020';
}
return true;
?>