* @package Alphanormalize * * * \mainpage alphanormalize_php * Simple functions to remove "accents" and replace non-alphanumeric characters. * See alphanormalize.inc file. * * * * The main function is similar to alphanormalize function of the online JavaScript application * http://www.opimedia.be/DS/online-tools/txt2/ * * [alphanormalize_php] * *
* LGPL * ----- * Copyright (C) 2013, 2016, 2020 Olivier Pirson * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with this program. If not, see . *
* *
* © Olivier Pirson\n * www.opimedia.be\n * olivier.pirson.opi@gmail.com *
*/ namespace Alphanormalize; /** * \brief Copy of $s without "accents". * * Returns a copy of which $s "accented" characters were converted by removing their "accent" * (the converted characters are those of the associative table $ACCENTALPHA_TO_ALPHA from accentalpha_to_alpha.inc file). * * For example: 'Élément' => 'Element'. * * If $encoding === null * then use the internal character encoding. * * @param string $s * @param null|string $encoding * * @return string */ function mb_str_accentalpha_to_alpha($s, $encoding=null) { #DEBUG assert('is_string($s)'); #DEBUG_END require 'accentalpha_to_alpha.inc'; $a = array(); // array of converted characters if ($encoding === null) { $len = mb_strlen($s); for ($i = 0; $i < $len; $i++) { $c = mb_substr($s, $i, 1); array_push($a, (array_key_exists($c, $ACCENTALPHA_TO_ALPHA) ? $ACCENTALPHA_TO_ALPHA[$c] : $c)); } } else { #DEBUG assert('is_string($encoding)'); #DEBUG_END $len = mb_strlen($s, $encoding); for ($i = 0; $i < $len; $i++) { $c = mb_substr($s, $i, 1, $encoding); array_push($a, (array_key_exists($c, $ACCENTALPHA_TO_ALPHA) ? $ACCENTALPHA_TO_ALPHA[$c] : $c)); } } return implode($a); } /** * \brief * Copy of $s without "accents" * with the characters of the Greek alphabet were replaced * and all non-alphanumeric characters are replaced by $replacement. * * Returns a copy of $s: * - each "accented" characters is converted by removing its "accent" * (the converted characters are those of the associative table $ACCENTALPHA_TO_ALPHA from accentalpha_to_alpha.inc file) ; * - each characters of the Greek alphabet is converted to alphabetic characters * (the converted characters are those of the associative table $GREEK_TO_ALPHA from greek_to_alpha.inc file) ; * - each group of other characters is replaced by $replacement. * * For example: 'Élément ; α and ω.' => 'Element_a_and_o_'. * * Adopts the standard ONU/ELOT: see http://www.opimedia.be/DS/mementos/grecs.htm . * * If $strip * then begins delete HTML tags. * * If $entity_decode * then begins convert HTML entities to normal characters. * (Previous PHP 5.4, all HTML entities are not supported!) * * If $encoding === null * then use the internal character encoding. * * @param string $s * @param bool $strip * @param bool $entity_decode * @param string $replacement * @param null|string $encoding * * @return string */ function mb_str_alphanormalize($s, $strip=false, $entity_decode=false, $replacement='_', $encoding=null) { #DEBUG assert('is_string($s)'); assert('is_bool($strip)'); assert('is_bool($entity_decode)'); assert('is_string($replacement)'); #DEBUG_END require 'accentalpha_to_alpha.inc'; require 'greek_to_alpha.inc'; if ( $strip ) { // delete HTML tags $s = strip_tags($s); } if ( $entity_decode ) { // convert HTML entities to normal characters $s = html_entity_decode($s, ENT_COMPAT, mb_internal_encoding()); } $not_consecutive = true; // true if the previous character is not $replacement, else false $a = array(); // array of converted characters if ($encoding === null) { $len = mb_strlen($s); for ($i = 0; $i < $len; $i++) { $c = mb_substr($s, $i, 1); if ((('0' <= $c) && ($c <= '9')) || (('A' <= $c) && ($c <= 'Z')) || (('a' <= $c) && ($c <= 'z'))) { // alphanumeric character array_push($a, $c); $not_consecutive = true; } elseif (array_key_exists($c, $ACCENTALPHA_TO_ALPHA)) { // "accented" character -> 1 or 2 alphabetic characters array_push($a, $ACCENTALPHA_TO_ALPHA[$c]); $not_consecutive = true; } elseif (array_key_exists($c, $GREEK_TO_ALPHA)) { // Greek letter -> 1 or 2 alphabetic characters array_push($a, $GREEK_TO_ALPHA[$c]); $not_consecutive = true; } elseif ($not_consecutive) { // other characters -> $replacement, if not already preceded by a $replacement $not_consecutive = false; array_push($a, $replacement); } } } else { #DEBUG assert('is_string($encoding)'); #DEBUG_END $len = mb_strlen($s, $encoding); for ($i = 0; $i < $len; $i++) { $c = mb_substr($s, $i, 1, $encoding); if ((('0' <= $c) && ($c <= '9')) || (('A' <= $c) && ($c <= 'Z')) || (('a' <= $c) && ($c <= 'z'))) { // alphanumeric character array_push($a, $c); $not_consecutive = true; } elseif (array_key_exists($c, $ACCENTALPHA_TO_ALPHA)) { // "accented" character -> 1 or 2 alphabetic characters array_push($a, $ACCENTALPHA_TO_ALPHA[$c]); $not_consecutive = true; } elseif (array_key_exists($c, $GREEK_TO_ALPHA)) { // Greek letter -> 1 or 2 alphabetic characters array_push($a, $GREEK_TO_ALPHA[$c]); $not_consecutive = true; } elseif ($not_consecutive) { // other characters -> $replacement, if not already preceded by a $replacement $not_consecutive = false; array_push($a, $replacement); } } } return implode($a); } /** * \brief Copy of $s with the characters of the Greek alphabet were replaced. * * Returns a copy of $s with the characters of the Greek alphabet were converted to alphabetic characters * (the converted characters are those of the associative table $GREEK_TO_ALPHA from greek_to_alpha.inc file). * * Adopts the standard ONU/ELOT: see http://www.opimedia.be/DS/mementos/grecs.htm . * * For example: 'α and ω' => 'a and o'. * * If $encoding === null * then use the internal character encoding. * * @param string $s * @param null|string $encoding * * @return string */ function mb_str_greek_to_alpha($s, $encoding=null) { #DEBUG assert('is_string($s)'); #DEBUG_END require 'greek_to_alpha.inc'; $a = array(); // array of converted characters if ($encoding === null) { $len = mb_strlen($s); for ($i = 0; $i < $len; $i++) { $c = mb_substr($s, $i, 1); array_push($a, (array_key_exists($c, $GREEK_TO_ALPHA) ? $GREEK_TO_ALPHA[$c] : $c)); } } else { #DEBUG assert('is_string($encoding)'); #DEBUG_END $len = mb_strlen($s, $encoding); for ($i = 0; $i < $len; $i++) { $c = mb_substr($s, $i, 1, $encoding); array_push($a, (array_key_exists($c, $GREEK_TO_ALPHA) ? $GREEK_TO_ALPHA[$c] : $c)); } } return implode($a); } /** * \brief Return the version of this module. * * @return string */ function version() { return '03.00.03 --- June 17, 2020'; } return true; ?>