thinkphp 3 增加分词权重搜索功能 phpanalysis插件

  • Post author:
  • Post category:php


搜索及分词效果.可媲美百度


注意,一定要删除原版自带的词库,自己增加网站常用的即可.否则会卡


演示地址

http://www.microphotons.cn/


在这里插入图片描述

一,先在ThinkPHP\Library\Vendor\目录中安装phpanalysis插件 即WordAnalysis

插件下载地址https://download.csdn.net/download/viqecel/86157012

目录结构如下

二,在搜索控制器中引用插件

Vendor('WordAnalysis.Analysis');
$obj = new \WordAnalysis\Analysis();
$soso2= $obj::getKeywords($soso,12);//最多分出12个词

三,搜索代码示例

Vendor('WordAnalysis.Analysis');
$obj = new \WordAnalysis\Analysis();
$soso2= $obj::getKeywords($soso,12);//最多分出12个词
 $arr = explode(',', $soso2);
		if(count($arr)>1  ){//说明多个词
		//$this->assign('soso', implode(",", $arr));
			 $sql='';
			 //用到合并查询
			foreach ($arr  as $k => $so) {
			$sql.="SELECT `id`,`iss`,`title`,`type`,`isindex`,`xinghao`,price,`price_hidden`,`huohao`,`jscs2`,`description`,`mycontent`,`ctime`,`pic`,`kucun` FROM `tg_contlist` 
			WHERE ( `title` LIKE '%$so%' OR `xinghao` LIKE '%$so%' OR `huohao` LIKE '%$so%' OR `description` LIKE '%$so%' OR `mycontent` LIKE '%$so%' OR `jscs2` LIKE '%$so%'  ) UNION ";		
			}
			$sql=rtrim($sql,'UNION ');	
			$sql=$sql.' limit 600 ';	//限制最多查询
		}else{
			//如果只有一个词则按正常搜索,并直接排序
			 $soso=trim($soso2);
			// print_r($soso4);exit;
			if(!$soso){
				  $this->cp_list='';//防止空字符查询
     //   $this->cp_class_list='';
            $this->display();
			exit;
			}
		$sql="SELECT `id`,`iss`,`isindex`,`title`,`price_hidden`,`type`,`xinghao`,price,`huohao`,`jscs2`,`description`,`mycontent`,`ctime`,`pic`,`kucun`,((case when xinghao like '$soso%' then 100 else 0 end)+
(case when xinghao like '%$soso%' then 90 else 0 end)) as xinghaosort,((case when huohao like '$soso%' then 70 else 0 end)+
(case when huohao like '%$soso%' then 60 else 0 end)) as huohaosort,((case when title like '$soso%' then 400 else 0 end)+
(case when title like '%$soso%' then 300 else 0 end)) as titlesort,((case when description like '$soso%' then 50 else 0 end)+
(case when description like '%$soso%' then 45 else 0 end)) as descriptionsort,((case when mycontent like '$soso%' then 40 else 0 end)+
(case when mycontent like '%$soso%' then 35 else 0 end)) as dessort,((case when jscs2 like '$soso%' then 40 else 0 end)+
(case when jscs2 like '%$soso%' then 35 else 0 end)) as jscs2sort FROM `tg_contlist` WHERE ( `title` LIKE '%$soso%' OR `xinghao` LIKE '%$soso%' OR `huohao` LIKE '%$soso%' OR `description` LIKE '%$soso%' OR `mycontent` LIKE '%$soso%'  OR `jscs2` LIKE '%$soso%' ) ORDER BY titlesort desc,xinghaosort desc,huohaosort desc,descriptionsort desc,dessort desc ,jscs2sort desc limit 600";
		}

	// print_r($sql);exit; 
	 $result =   M()->query($sql);
//print_r($result);exit;
	 $cp_list2=array();
	if(count($arr)>1){
		//数据预处理 增加权重排序虚拟字段 再替换
		foreach ($arr  as  $k0 => $so4) {
		foreach ($result  as $k => $v) {
			if($k0==0){
				$result[$k]['sort']=0;
			}
		if(stristr($result[$k]['title'],$so4) ){
					$result[$k]['sort']+=10000;
				}
			
			//搜索权重.标题打分.标题中包含关键字.打高分.内容中包含.打低分.然后循环相加
				if(stristr($result[$k]['xinghao'],$so4)  ){
					$result[$k]['sort']+=1000;
				} 
	if(stristr($result[$k]['description'],$so4) ){$result[$k]['sort']+=100;} 		
			if(stristr(strip_tags($result[$k]['mycontent']),$so4) ){$result[$k]['sort']+=10;} 
			if(stristr(strip_tags($result[$k]['jscs2']),$so4)  ){$result[$k]['sort']+=1;}
		}
		}
		$last_names = array_column($result,'sort');//二维数组排序
array_multisort($last_names,SORT_DESC,$result);
//print_r($result);exit;
			foreach ($arr  as $k3 => $so) {
		//高亮 
		 if($k3==0){//第一次循环过滤html代码,第二次不能再过滤 否则,高亮会被过滤掉
				foreach ($result  as $k => $v) {
		   $cp_list2[$k]['title']= preg_replace("/($so)/i", "<font color=red>\\1</font>", $v['title']);  
		   //用这个preg_replace函数正则替换关键字,是防止原文 大写字母被替换为小写,改变愿意
		 $cp_list2[$k]['xinghao']= preg_replace("/($so)/i", "<font color=red>\\1</font>", $v['xinghao']); 
		 $cp_list2[$k]['huohao']= preg_replace("/($so)/i", "<font color=red>\\1</font>", $v['huohao']); 
		 $cp_list2[$k]['description']= preg_replace("/($so)/i", "<font color=red>\\1</font>", $v['description']); 
		 $sr4=strip_tags($v['mycontent']);
			$sr4=str_replace('   '," " ,$sr4);
		$sr4=str_replace('  '," " ,$sr4);
		//$sr4=sub_key_h($sr4,$so,$len=40,$enc='utf-8'); 
		 $cp_list2[$k]['mycontent']= preg_replace("/($so)/i", "<font color=red>\\1</font>",$sr4 ); //过滤无用html
		  $sr5=strip_tags($v['jscs2']);
			$sr5=str_replace('   '," " ,$sr5);
		$sr5=str_replace('  '," " ,$sr5);
		$cp_list2[$k]['jscs2']=sub_key_h($sr5,$so,$len=150,$enc='utf-8');  
		// $cp_list2[$k]['jscs2']= preg_replace("/($so)/i", "<font color=red>\\1</font>",$sr5 ); 
		  $cp_list2[$k]['isindex']=$v['isindex'];
		  $cp_list2[$k]['iss']=$v['iss'];
	 $cp_list2[$k]['type']=$v['type'];
		 $cp_list2[$k]['kucun']=$v['kucun'];
		 $cp_list2[$k]['id']=$v['id'];
		 $cp_list2[$k]['ctime']=$v['ctime'];
		 $cp_list2[$k]['pic']=$v['pic'];
		  $cp_list2[$k]['price']=$v['price'];
		  $cp_list2[$k]['price_hidden']=$v['price_hidden'];
		 } 
		 }else{
				foreach ($result  as $k => $v) {
 
		   $cp_list2[$k]['title']= preg_replace("/($so)/i", "<font color=red>\\1</font>", $cp_list2[$k]['title']);  
		 $cp_list2[$k]['xinghao']= preg_replace("/($so)/i", "<font color=red>\\1</font>", $cp_list2[$k]['xinghao']); 
		 $cp_list2[$k]['huohao']= preg_replace("/($so)/i", "<font color=red>\\1</font>", $cp_list2[$k]['huohao']); 
		 $cp_list2[$k]['description']= preg_replace("/($so)/i", "<font color=red>\\1</font>", $cp_list2[$k]['description']); 
		 
		// $sr4=sub_key_h($cp_list2[$k]['mycontent'],$so,$len=40,$enc='utf-8'); 
		 $cp_list2[$k]['mycontent']= preg_replace("/($so)/i", "<font color=red>\\1</font>", $cp_list2[$k]['mycontent']); 
	
		//$cp_list2[$k]['jscs2']=sub_key_h($cp_list2[$k]['jscs2'],$so,$len=550,$enc='utf-8');  
	$cp_list2[$k]['jscs2']= preg_replace("/($so)/i", "<font color=red>\\1</font>", $cp_list2[$k]['jscs2']); 
	
		 } 
		 }
	
		}
			//print_r($cp_list2);exit;
			
		}else{ //
		//print_r($result);exit;
			foreach ($result  as $k => $v) {
			  $cp_list2[$k]['title']= preg_replace("/($soso)/i", "<font color=red>\\1</font>", $v['title']);  
		 $cp_list2[$k]['xinghao']= preg_replace("/($soso)/i", "<font color=red>\\1</font>", $v['xinghao']); 
		 $cp_list2[$k]['huohao']= preg_replace("/($soso)/i", "<font color=red>\\1</font>", $v['huohao']); 
		 $cp_list2[$k]['description']= preg_replace("/($soso)/i", "<font color=red>\\1</font>", $v['description']); 
		  $sr4=strip_tags($v['mycontent']);
		//$sr4=preg_replace("/\s(?=\s)/","\\1", $sr4);
		$sr4=str_replace('   '," " ,$sr4);
		$sr4=str_replace('  '," " ,$sr4);
		//$sr4=sub_key_h($sr4,$soso,$len=40,$enc='utf-8'); 
		 $cp_list2[$k]['mycontent']= preg_replace("/($soso)/i", "<font color=red>\\1</font>", $sr4); 
		 
		 
		  $sr7=strip_tags($v['jscs2']);
		//$sr4=preg_replace("/\s(?=\s)/","\\1", $sr4);
		$sr7=str_replace('   '," " ,$sr7);
		$sr7=str_replace('  '," " ,$sr7);
		
		 $cp_list2[$k]['jscs2']= preg_replace("/($soso)/i", "<font color=red>\\1</font>", $sr7); 
		 
		 
		  $cp_list2[$k]['iss']=$v['iss'];
	 $cp_list2[$k]['type']=$v['type'];
		 $cp_list2[$k]['kucun']=$v['kucun'];
		 $cp_list2[$k]['id']=$v['id'];
		 $cp_list2[$k]['ctime']=$v['ctime'];
		 $cp_list2[$k]['pic']=$v['pic'];
		  $cp_list2[$k]['price']=$v['price'];
		   $cp_list2[$k]['isindex']=$v['isindex'];
		    $cp_list2[$k]['price_hidden']=$v['price_hidden'];
		}
		//print_r($cp_list2);exit;
		}
		
		// 分页
$count = count($cp_list2);
//print_r($count );exit;
$Page  = new \Think\Page($count, 5);
$show       = $Page->show();
$cp_list2 = array_splice(array_splice($cp_list2, $Page->firstRow), 0,5);//模拟分页.因为增加了权重搜索.所以.需要一次性把所有产品搜索出来.再权重打分进行排序.所以.分页需要用到array_splice
//print_r($cp_list2);exit;
//从数组中移除元素,并用新元素取代它:
//$page->firstRow是列表起始行数,$page->listRows是列表每页显示行数,
$this->assign('page',$show);
 $this->cp_list=$cp_list2;

下面是一个高亮函数,截取内容左右部分内容.来显示,

需要放在functionmy自定义函数库中. 这个可能用不到.

	function sub_key_h($str,$key,$len=20,$enc='utf-8'){
    $strlen = mb_strlen($str,$enc);
    $keylen = mb_strlen($key,$enc);
    $keypos = mb_strpos($str,$key,0,$enc);
    $leftpos = $keypos - 1;//关键字所在位置
    $rightpos = $keypos + $keylen;

    if($keylen > $len){
        return "<font style='color:red;'>".mb_substr($key,0,$len,$enc)."</font>...";
    }
    $result = "<font style='color:red;'>".$key."</font>";
	 $result2 = $result1= "";;
    for($i = 0;$i<$len - $keylen;$i++){
        if($leftpos >= 0){
            $result1 = mb_substr($str,$leftpos--,1,$enc).$result1;//向左截取
			//继而向右截取一部分
			 if($rightpos < $strlen){
        $result2 .= mb_substr($str,$rightpos++,1,$enc);
    }
        }else{
            $result2 .= mb_substr($str,$rightpos++,1,$enc);
        }
    }
        $result = $result1.$result.$result2;
    return $result;
}



版权声明:本文为viqecel原创文章,遵循 CC 4.0 BY-SA 版权协议,转载请附上原文出处链接和本声明。