搜索需求:拼音搜索、汉字词组、英文词组、数字搜索
过滤字符:截取前50个字为关键词,超过长度的文本不计入有效关键词
匹配策略:
一个汉字或单词:完全匹配
截取英文部分,如果符合拼音全拼:拼音搜索匹配
如果是全英文、空格、数字:完全匹配
中文、其他:中文分词搜索+完全匹配
实现代码
/**
*搜索
*/
@Override
public RtData search(String keywords, Integer nameLimit, Integer contentLimit, Long uid) {
List list = new ArrayList<>();
if (StringUtils.isEmpty(keywords)) {
return ResponseBuilder.success(list);
}
keywords = keywords.trim();
if (keywords.length() > 50) {
keywords = keywords.substring(0, 50);
}
if (nameLimit == null || nameLimit > Constants.MAX_FILE_NAME_LENGTH) {
nameLimit = Constants.MAX_FILE_NAME_LENGTH;
}
if (contentLimit == null || contentLimit > 500) {
contentLimit = 500;
}
//关键词替换特殊字符
keywords = keywords.replaceAll("\"","\\\\\"");
Map<String, String> fidRoleMap = fsFileService.getOtherColFidRoleMap(uid);
List<String> fidList = fidRoleMap.keySet().stream().collect(Collectors.toList());
//正则匹配
String queryStr;
//一个汉字、或单词
String pattern = "^[\\u4e00-\\u9fa5]";
String match = "";
if (Pattern.matches(pattern, keywords)) {
//一个汉字
queryStr = "{\"highlight\":{ \"fields\": {\"name" + match + "\": {\"fragment_size\" : " + nameLimit * 2 + "},\"content" + match + "\": {\"fragment_size\" : " + contentLimit * 2 + "}} },\"query\": {\"bool\": {\"must\": [ {\"bool\":{\"should\":[{\"match\": {\"name" + match + "\": { \"query\": \"" + keywords + "\",\"boost\": 2}}},{\"match\": {\"content" + match + "\": \"" + keywords + "\"}}]}},{\"bool\":{\"should\":[{ \"terms\":{ \"_id\":" + JSON.toJSONString(fidList) + " }},{\"term\":{ \"owner\": " + uid + "}}]}}]}},\"size\":300}";
} else {
//截取英文部分
String s = "\\w+";
Pattern pattern2 = Pattern.compile(s);
Matcher matcher = pattern2.matcher(keywords);
//空格或小写拼音
pattern = "^(?!a-z)(\\s|a[io]?|ou?|e[inr]?|ang?|ng|[bmp](a[io]?|[aei]ng?|ei|ie?|ia[no]|o|u)|pou|me|m[io]u|[fw](a|[ae]ng?|ei|o|u)|fou|wai|[dt](a[io]?|an|e|[aeio]ng|ie?|ia[no]|ou|u[ino]?|uan)|dei|diu|[nl](a[io]?|ei?|[eio]ng|i[eu]?|i?ang?|iao|in|ou|u[eo]?|ve?|uan)|nen|lia|lun|[ghk](a[io]?|[ae]ng?|e|ong|ou|u[aino]?|uai|uang?)|[gh]ei|[jqx](i(ao?|ang?|e|ng?|ong|u)?|u[en]?|uan)|([csz]h?|r)([ae]ng?|ao|e|i|ou|u[ino]?|uan)|[csz](ai?|ong)|[csz]h(ai?|uai|uang)|zei|[sz]hua|([cz]h|r)ong|y(ao?|[ai]ng?|e|i|ong|ou|u[en]?|uan)){1,}";
if (matcher.find() && Pattern.matches(pattern, keywords)) {
//拼音
match = ".pinyin";
//含有拼音
queryStr = "{\"highlight\":{ \"fields\": {\"name\" : {\"fragment_size\" : " + nameLimit * 2 + "},\"content\" : {\"fragment_size\" : " + contentLimit * 2 + "},\"name" + match + "\": {\"fragment_size\" : " + nameLimit * 2 + "},\"content" + match + "\": {\"fragment_size\" : " + contentLimit * 2 + "}} },\"query\": {\"bool\": {\"must\": [ {\"bool\":{\"should\":[{\"match\": {\"name" + match + "\": { \"query\": \"" + keywords + "\",\"boost\": 2}}},{\"match\": {\"content" + match + "\": \"" + keywords + "\"}},{\"wildcard\": {\"name\": {\"value\":\"*" + keywords.toLowerCase() + "*\",\"boost\":2}}},{\"wildcard\": {\"content\": {\"value\":\"*" + keywords.toLowerCase() + "*\",\"boost\":1}}}]}},{\"bool\":{\"should\":[{ \"terms\":{ \"_id\":" + JSON.toJSONString(fidList) + " }},{\"term\":{ \"owner\": " + uid + "}}]}}]}},\"size\":300}";
} else if (Pattern.matches("^[a-zA-Z0-9\\s]{1,}$", keywords)) {
//英文 空格 数字
//匹配或者单字匹配
queryStr = "{\"highlight\":{ \"fields\": {\"name\" : {\"fragment_size\" : " + nameLimit * 2 + "},\"content\" : {\"fragment_size\" : " + contentLimit * 2 + "} }},\"query\": {\"bool\": {\"must\": [ {\"bool\":{\"should\":[{\"match\": {\"name\": {\"query\": \"" + keywords + "\",\"boost\": 2}}},{\"match\": {\"content\": {\"query\": \"" + keywords + "\",\"boost\": 1}}},{\"wildcard\": {\"content\": {\"value\":\"*" + keywords.toLowerCase() + "*\",\"boost\":2}}},{\"wildcard\": {\"name\": {\"value\":\"*" + keywords.toLowerCase() + "*\",\"boost\":4}}}]}},{\"bool\":{\"should\":[{ \"terms\":{ \"_id\":" + JSON.toJSONString(fidList) + " }},{\"term\":{ \"owner\": " + uid + "}}]}}]}},\"size\":300}";
} else {
//中文、其他
match = ".words";
queryStr = "{\"highlight\":{ \"fields\": {\"name\" : {\"fragment_size\" : " + nameLimit * 2 + "},\"content\" : {\"fragment_size\" : " + contentLimit * 2 + "},\"name" + match + "\": {\"fragment_size\" : " + nameLimit * 2 + "},\"content" + match + "\": {\"fragment_size\" : " + contentLimit * 2 + "}} },\"query\": {\"bool\": {\"must\": [ {\"bool\":{\"should\":[{\"match\": {\"name\": {\"query\": \"" + keywords + "\",\"boost\": 2}}},{\"match\": {\"content\": {\"query\": \"" + keywords + "\",\"boost\": 1}}},{\"match\": {\"name" + match + "\": { \"query\": \"" + keywords + "\",\"boost\": 5}}},{\"match\": {\"content" + match + "\": { \"query\": \"" + keywords + "\",\"boost\": 2}}}]}},{\"bool\":{\"should\":[{ \"terms\":{ \"_id\":" + JSON.toJSONString(fidList) + " }},{\"term\":{ \"owner\": " + uid + "}}]}}]}},\"size\":300}";
}
}
log.info(queryStr);
Request request = new Request("GET", "/" + indexName + "/type/_search");
try {
HttpEntity entity = new NStringEntity(queryStr, ContentType.APPLICATION_JSON);
//json 查询条件
request.setEntity(entity);
Response response = restClient.performRequest(request);
String res = EntityUtils.toString(response.getEntity());
System.out.println(res);
log.info(res);
JSONObject jsonObject = JSON.parseObject(res);
JSONArray jsonArray = jsonObject.getJSONObject("hits").getJSONArray("hits");
if (jsonArray.size() > 0) {
for (int i = 0; i < jsonArray.size(); i++) {
JSONObject indexObj = (JSONObject) jsonArray.get(i);
JSONObject source = indexObj.getJSONObject("_source");
JSONObject highlight = indexObj.getJSONObject("highlight");
if (highlight != null) {
JSONArray nameHighlight = null;
JSONArray contentHighlight = null;
boolean namePinyinHighLight = false;
boolean contentPinyinHighLight = false;
if (match.equals(".pinyin")) {
if(highlight.getJSONArray("name") != null){
nameHighlight = highlight.getJSONArray("name");
}else {
namePinyinHighLight = true;
nameHighlight = highlight.getJSONArray("name" + match);
}
if(highlight.getJSONArray("content") != null){
contentHighlight = highlight.getJSONArray("content");
}else {
contentPinyinHighLight = true;
contentHighlight = highlight.getJSONArray("content" + match);
}
} else {
nameHighlight = highlight.getJSONArray("name" + match) != null ? highlight.getJSONArray("name" + match) : highlight.getJSONArray("name");
contentHighlight = highlight.getJSONArray("content" + match) != null ? highlight.getJSONArray("content" + match) : highlight.getJSONArray("content");
}
if (nameHighlight != null && nameHighlight.size() > 0) {
String text = "";
for (Object str : nameHighlight) {
text = StringUtils.isEmpty(text) ? (String) str : (text + "..." + str);
}
text = text.replaceAll("\\n", " ");
if (namePinyinHighLight) {
text = text.replaceAll("<em>", "").replaceAll("</em>","");
}
// if (match.equals(".pinyin")) {
// text = pinyinHighlightContentExactMatch(text, keywords);
// }
source.put("name", getHighlightContent(text, nameLimit));
} else {
//如果内容不为空,截取前limit个字符
if (source.get("name") != null) {
source.put("name", source.getString("name").replaceAll("\\n", " "));
source.put("name", source.getString("name").substring(0, source.getString("name").length() >= nameLimit ? nameLimit : source.getString("name").length()));
}
}
if (contentHighlight != null && contentHighlight.size() > 0) {
String text = "";
for (Object str : contentHighlight) {
text = StringUtils.isEmpty(text) ? (String) str : (text + "..." + str);
}
text = text.replaceAll("\\n", " ");
if (contentPinyinHighLight) {
text = text.replaceAll("<em>", "").replaceAll("</em>","");
}
source.put("content", getHighlightContent(text, contentLimit));
} else {
//如果内容不为空,截取前limit个字符
if (source.get("content") != null) {
source.put("content", source.getString("content").replaceAll("\\n", " "));
source.put("content", source.getString("content").substring(0, source.getString("content").length() >= contentLimit ? contentLimit : source.getString("content").length()));
}
}
} else {
if (!StringUtils.isEmpty(source.getString("name")) && source.getString("name").length() > nameLimit) {
source.put("name", source.getString("name").substring(0, nameLimit));
}
if (!StringUtils.isEmpty(source.getString("content")) && source.getString("content").length() > contentLimit) {
source.put("content", source.getString("content").substring(0, contentLimit));
}
}
list.add(source);
}
}
return ResponseBuilder.success(list);
} catch (IOException e) {
e.printStackTrace();
}
return ResponseBuilder.fail();
}