ElasticSearch 纯度和匹配度排序实现

  • Post author:
  • Post category:其他


wildcardQuery 
GaussDecayFunctionBuilder 高斯函数比对



1、纯度越高:需求是Formulation中包含的Trade的纯度越高


该配方包含此trade,且国家是140的更靠前,并且包含当前trade的纯度越高越靠前

EsQuery query = new EsQuery();
if(null==size){
    query.setPager(0,10);
}else{
    query.setPager(0,size);
}
query.includes( "id","tradeName","prd","coverImgId","coverImgSrc","ecoStatus","base64Image" );
//原点(origin):该字段最理想的值,这个值可以得到满分(1.0) 最佳只包含一个inci
//偏移量(offset):与原点相差在偏移量之内的值也可以得到满分
//衰减规模(scale):当值超出了原点到偏移量这段范围,它所得的分数就开始进行衰减了,衰减规模决定了这个分数衰减速度的快慢
//衰减值(decay):该字段可以被接受的值(默认为 0.5),相当于一个分界点,具体的效果与衰减的模式有关
double origin = 1, offset = 0, scale = 20,decay = 0.5;


//以 e 为底的指数函数
//1、最优先展示只含有当前成分的BASF Product(一对一)
ExponentialDecayFunctionBuilder functionBuilder = ScoreFunctionBuilders.exponentialDecayFunction("inciCount", origin, scale, offset, decay).setWeight(100l);
// 2、优先展示Released Country = Asia Pacific的BASF Product 在字典表中的id:140
BoolQueryBuilder countryBuilder= QueryBuilders.boolQuery()
            .must(QueryBuilders.termsQuery("inci_ids_arr",tradeMap.get("id")))
            .should(QueryBuilders.matchQuery("releasedCountryId",140).boost(25)); //提高评分
    //联合分数
    FunctionScoreQueryBuilder scoreQueryBuilder = QueryBuilders.functionScoreQuery(countryBuilder,functionBuilder).boostMode(CombineFunction.SUM);
    query.boolQueryBuilder(scoreQueryBuilder);
    query.orderByDesc("_score","hitNum","id");
    //添加用户角色过滤
    EsQueryUtil.addTargetUserAndRole(query);
    EsResult result = elasticsearchService.createSearch(EsIndexConstant.es_basf_product,query,Map.class);
    resultMaps = (List<Map<String, String>>)result.getData();
    for (Map<String, String> map : resultMaps) {
        if (tradeMap.get("ecoStatus") != null) {
            map.put("base64Image", tradeMapper.getbase64ImageByEco(Integer.valueOf(tradeMap.get("ecoStatus").toString())));
        }
    }
return resultMaps;




2、相似度越高:Trade中和当前Trade包含的成分,越多匹配越靠前


if (StrUtil.isNotBlank(formulationQuery.getFunctionClaimId()) && !formulationQuery.getFunctionClaimId().equals("0")) {
    List<Integer> claimIdList = CollUtil.convertToListFromString(formulationQuery.getFunctionClaimId(), ",");
    FunctionScoreQueryBuilder.FilterFunctionBuilder[] filterFunctionBuilders = new FunctionScoreQueryBuilder.FilterFunctionBuilder[claimIdList.size() + 1];
    ScoreFunctionBuilder claims = new WeightBuilder();
    TermsQueryBuilder claimsQuery = QueryBuilders.termsQuery("functionClaimIdArray", claimIdList);
    claims.setWeight(32);
    FunctionScoreQueryBuilder.FilterFunctionBuilder claimsBuilder = new FunctionScoreQueryBuilder.FilterFunctionBuilder(claimsQuery, claims);
    filterFunctionBuilders[0] = claimsBuilder;
    for (Integer claimId : claimIdList) {
        ScoreFunctionBuilder scoreFunctionBuilder = new WeightBuilder();
        TermQueryBuilder functionClaimIdArray = QueryBuilders.termQuery("functionClaimIdArray", claimId);
        scoreFunctionBuilder.setWeight(32f);
        FunctionScoreQueryBuilder.FilterFunctionBuilder claim = new FunctionScoreQueryBuilder.FilterFunctionBuilder(functionClaimIdArray, scoreFunctionBuilder);
        filterFunctionBuilders[claimIdList.indexOf(claimId) + 1] = claim;
    }
    boolQueryBuilder.should(QueryBuilders.functionScoreQuery(filterFunctionBuilders)
.scoreMode(FunctionScoreQuery.ScoreMode.SUM)
.boostMode(CombineFunction.REPLACE));
}



3、除了上述两个方式之外 有些算法需要 筛选之后 根据多个条件匹配修改排分 可以使用script

1.第一优先级:子分类相同

2.第三优先级:product form相同

3.第四优先级:product aspect相同

4.第五优先级:SPF相同

5.第六优先级:PA相同

以上条件相同时,按点击数从高到低

“sort”: [

{





_script


“: {


“script”: {


“source”: “def total =_score; if(doc[‘countryId’].value ==45) { total+=0.5; } if(doc[‘formId’] ==80) { total+=0.3; } return total”,

“lang”: “painless”

},

“type”: “number”,

“order”: “desc”

}

},

{


“_score”: {


“order”: “desc”

}

},

{


“hitNum”: {


“order”: “desc”

}

},

{


“id”: {


“order”: “desc”

}

}

]

public void fillEsFilterSortScore(EsQuery esQuery, FormulationQuery formulationQuery){
    StringBuilder scriptStr = new StringBuilder("def total =_score;");
    if (formulationQuery.getFormId() != null && formulationQuery.getFormId() != 0) {
        // Create a ScriptSortBuilder for the custom sorting logic
        scriptStr.append("if(doc['formId'].value == "+formulationQuery.getFormId()+") {total+=0.5; }");
    }
    if (formulationQuery.getSpfId() != null && formulationQuery.getSpfId() != 0) {
        scriptStr.append("if(doc['spfId'].value == "+formulationQuery.getSpfId()+") {total+=0.3;}");
    }
    if (formulationQuery.getAspectId() != null && formulationQuery.getAspectId() != 0) {
        scriptStr.append("if(doc['aspectId'].value == "+formulationQuery.getAspectId()+") {total+=0.2; }");
    }
    if (formulationQuery.getPaId() != null && formulationQuery.getPaId() != 0) {
        scriptStr.append("if(doc['paId'].value == "+formulationQuery.getPaId()+") {total+=0.1; }");
    }
    Script script = new Script(scriptStr.toString());
    ScriptSortBuilder scriptSortBuilder = SortBuilders.scriptSort(script, ScriptSortBuilder.ScriptSortType.NUMBER);
    scriptSortBuilder.order(SortOrder.DESC);
    esQuery.sort(scriptSortBuilder);
}

在这个版本的改造过程中发现 第二天这个script竟然失效了,因为有的doc里面formId为空 所以补充修改 如果不为空才进行逻辑判断加分

{


“_script”: {


“script”: {


“source”: “double formV=0,spfV = 0, aspectV=0,paV = 0;if(doc[‘formId’].size()!=0){if(doc[‘formId’].value==80){formV=1.0}}if(doc[‘aspectId’].size()!=0){if(doc[‘aspectId’].value==98){aspectV=0.2}}return formV+aspectV+spfV+paV;”,

“lang”: “painless”

},

“type”: “number”,

“order”: “desc”

}

},



版权声明:本文为Yuli_li原创文章,遵循 CC 4.0 BY-SA 版权协议,转载请附上原文出处链接和本声明。