PHP改进计算字符串相似度的函数similar_text()、levenshtein()
发布时间:2020-12-13 02:06:08 所属栏目:PHP教程 来源:网络整理
导读:《PHP实例:PHP改进计算字符串相似度的函数similar_text()、levenshtein()》要点: 本文介绍了PHP实例:PHP改进计算字符串相似度的函数similar_text()、levenshtein(),希望对您有用。如果有疑问,可以联系我们。 PHP教程 similar_text()中文汉字版 代码如下
《PHP实例:PHP改进计算字符串相似度的函数similar_text()、levenshtein()》要点: PHP教程similar_text()中文汉字版
代码如下:
???? <?php? ???? //拆分字符串? ???? function split_str($str) {? ?????? preg_match_all("/./u",$str,$arr);? ?????? return $arr[0];? ???? }? ?????? ???? //相似度检测? ???? function similar_text_cn($str1,$str2) {? ?????? $arr_1 = array_unique(split_str($str1));? ?????? $arr_2 = array_unique(split_str($str2));? ?????? $similarity = count($arr_2) - count(array_diff($arr_2,$arr_1));? ???????? ?????? return $similarity;? ???? }?? PHP教程levenshtein()中文汉字版
代码如下:
???? <?php? ???? //拆分字符串? ???? function mbStringToArray($string,$encoding = 'UTF-8') {? ???????? $arrayResult = array();? ???????? while ($iLen = mb_strlen($string,$encoding)) {? ???????????? array_push($arrayResult,mb_substr($string,1,$encoding));? ???????????? $string = mb_substr($string,$iLen,$encoding);? ???????? }? ???????? return $arrayResult;? ???? }? ???? //编纂距离? ???? function levenshtein_cn($str1,$str2,$costReplace = 1,$encoding = 'UTF-8') {? ???????? $count_same_letter = 0;? ???????? $d = array();? ???????? $mb_len1 = mb_strlen($str1,$encoding);? ???????? $mb_len2 = mb_strlen($str2,$encoding);? ???????? $mb_str1 = mbStringToArray($str1,$encoding);? ???????? $mb_str2 = mbStringToArray($str2,$encoding);? ???????? for ($i1 = 0; $i1 <= $mb_len1; $i1++) {? ???????????? $d[$i1] = array();? ???????????? $d[$i1][0] = $i1;? ???????? }? ???????? for ($i2 = 0; $i2 <= $mb_len2; $i2++) {? ???????????? $d[0][$i2] = $i2;? ???????? }? ???????? for ($i1 = 1; $i1 <= $mb_len1; $i1++) {? ???????????? for ($i2 = 1; $i2 <= $mb_len2; $i2++) {? ???????????????? // $cost = ($str1[$i1 - 1] == $str2[$i2 - 1]) ? 0 : 1;? ???????????????? if ($mb_str1[$i1 - 1] === $mb_str2[$i2 - 1]) {? ???????????????????? $cost = 0;? ???????????????????? $count_same_letter++;? ???????????????? } else {? ???????????????????? $cost = $costReplace; //替换? ???????????????? }? ???????????????? $d[$i1][$i2] = min($d[$i1 - 1][$i2] + 1,//插入? ???????????????? $d[$i1][$i2 - 1] + 1,//删除? ???????????????? $d[$i1 - 1][$i2 - 1] + $cost);? ???????????? }? ???????? }? ???????? return $d[$mb_len1][$mb_len2];? ???????? //return array('distance' => $d[$mb_len1][$mb_len2],'count_same_letter' => $count_same_letter);? ???? }?? ? 最长公共子序列LCS() ?
代码如下:
???????? <?php? ???????? //最长公共子序列英文版? ???????? function LCS_en($str_1,$str_2) {? ?????????? $len_1 = strlen($str_1);? ?????????? $len_2 = strlen($str_2);? ?????????? $len = $len_1 > $len_2 ? $len_1 : $len_2;? ?????????? $dp = array();? ?????????? for ($i = 0; $i <= $len; $i++) {? ???????????? $dp[$i] = array();? ???????????? $dp[$i][0] = 0;? ???????????? $dp[0][$i] = 0;? ?????????? }? ?????????? for ($i = 1; $i <= $len_1; $i++) {? ???????????? for ($j = 1; $j <= $len_2; $j++) {? ?????????????? if ($str_1[$i - 1] == $str_2[$j - 1]) {? ???????????????? $dp[$i][$j] = $dp[$i - 1][$j - 1] + 1;? ?????????????? } else {? ???????????????? $dp[$i][$j] = $dp[$i - 1][$j] > $dp[$i][$j - 1] ? $dp[$i - 1][$j] : $dp[$i][$j - 1];? ?????????????? }? ???????????? }? ?????????? }? ?????????? return $dp[$len_1][$len_2];? ???????? }? ???????? //拆分字符串? ???????? function mbStringToArray($string,$encoding = 'UTF-8') {? ?????????? $arrayResult = array();? ?????????? while ($iLen = mb_strlen($string,$encoding);? ?????????? }? ?????????? return $arrayResult;? ???????? }? ???????? //最长公共子序列中文版? ???????? function LCS_cn($str1,$encoding = 'UTF-8') {? ?????????? $mb_len1 = mb_strlen($str1,$encoding);? ?????????? $mb_len2 = mb_strlen($str2,$encoding);? ?????????? $mb_str1 = mbStringToArray($str1,$encoding);? ?????????? $mb_str2 = mbStringToArray($str2,$encoding);? ?????????? $len = $mb_len1 > $mb_len2 ? $mb_len1 : $mb_len2;? ?????????? $dp = array();? ?????????? for ($i = 0; $i <= $len; $i++) {? ???????????? $dp[$i] = array();? ???????????? $dp[$i][0] = 0;? ???????????? $dp[0][$i] = 0;? ?????????? }? ?????????? for ($i = 1; $i <= $mb_len1; $i++) {? ???????????? for ($j = 1; $j <= $mb_len2; $j++) {? ?????????????? if ($mb_str1[$i - 1] == $mb_str2[$j - 1]) {? ???????????????? $dp[$i][$j] = $dp[$i - 1][$j - 1] + 1;? ?????????????? } else {? ???????????????? $dp[$i][$j] = $dp[$i - 1][$j] > $dp[$i][$j - 1] ? $dp[$i - 1][$j] : $dp[$i][$j - 1];? ?????????????? }? ???????????? }? ?????????? }? ?????????? return $dp[$mb_len1][$mb_len2];? ???????? } 编程之家培训学院每天发布《PHP实例:PHP改进计算字符串相似度的函数similar_text()、levenshtein()》等实战技能,PHP、MYSQL、LINUX、APP、JS,CSS全面培养人才。 (编辑:李大同) 【声明】本站内容均来自网络,其相关言论仅代表作者个人观点,不代表本站立场。若无意侵犯到您的权利,请及时与联系站长删除相关内容! |