1.修复代码块级别 特征相似度大于100%的bug

without_nacos
liuzongren 7 months ago
parent 7d037191fa
commit bb737f8613
  1. 4
      src/main/java/com/keyware/composeanalysis/config/GlobalExceptionHandler.java
  2. 9
      src/main/java/com/keyware/composeanalysis/task/CodeBlockAnalysisTask.java
  3. 10
      src/main/java/com/keyware/composeanalysis/task/FunctionAnalysisTask.java
  4. 2
      src/main/java/com/keyware/composeanalysis/task/PorjectAnalysisTask.java
  5. 1
      src/main/java/com/keyware/composeanalysis/util/SolrUtils.java
  6. 4
      src/main/resources/application-dev.yaml

@ -16,7 +16,7 @@ public class GlobalExceptionHandler {
//全局异常处理 //全局异常处理
@ExceptionHandler(value = Exception.class) @ExceptionHandler(value = Exception.class)
public Result defaultErrorHandler(Exception e) { public Result defaultErrorHandler(Exception e) {
log.error("全局异常信息,ex={}",e.getMessage(),e); log.error("全局异常信息,ex={}", e.getMessage(), e);
return Result.fail(ResultCode.FAIL.getCode(), e.getMessage()); return Result.fail(ResultCode.FAIL.getCode(), e.getMessage());
} }
@ -25,7 +25,7 @@ public class GlobalExceptionHandler {
@ExceptionHandler(value = BusinessException.class) @ExceptionHandler(value = BusinessException.class)
@ResponseBody @ResponseBody
public Result businessExceptionHandler(BusinessException e) { public Result businessExceptionHandler(BusinessException e) {
log.error("业务异常信息",e); log.error("业务异常信息", e);
return Result.fail(e.getCode(), e.getMsg()); return Result.fail(e.getCode(), e.getMsg());
} }

@ -172,7 +172,6 @@ public class CodeBlockAnalysisTask extends IAnalysisTask {
return; return;
} }
//保存所有匹配的特征代码块MD5信息,方便统计总的匹配行数 //保存所有匹配的特征代码块MD5信息,方便统计总的匹配行数
Set<String> matchingTraitLineSet = new HashSet<>(); Set<String> matchingTraitLineSet = new HashSet<>();
@ -190,8 +189,11 @@ public class CodeBlockAnalysisTask extends IAnalysisTask {
matchCodeBlockLineCount += traitsFeatureMd5AndFeatureLineNumMap.get(matchFeatureMd5); matchCodeBlockLineCount += traitsFeatureMd5AndFeatureLineNumMap.get(matchFeatureMd5);
} }
//特征行总数
int totalFeatureLineCount = traitsFeatureMd5AndFeatureLineNumMap.values().stream().mapToInt(Integer::intValue).sum();
//计算文件的总体特征相似度 //计算文件的总体特征相似度
BigDecimal featureSimilarity = new BigDecimal(matchCodeBlockLineCount).divide(new BigDecimal(analysisFile.getCodeRowNum()), 4, RoundingMode.HALF_UP).multiply(new BigDecimal(100)).setScale(2); BigDecimal featureSimilarity = new BigDecimal(matchCodeBlockLineCount).divide(new BigDecimal(totalFeatureLineCount), 4, RoundingMode.HALF_UP).multiply(new BigDecimal(100)).setScale(2);
//计算文件的总体开源率 //计算文件的总体开源率
BigDecimal openRate = new BigDecimal(matchedLineRowsNum.size()).divide(new BigDecimal(analysisFile.getCodeRowNum()), 4, RoundingMode.HALF_UP).multiply(new BigDecimal(100)).setScale(2); BigDecimal openRate = new BigDecimal(matchedLineRowsNum.size()).divide(new BigDecimal(analysisFile.getCodeRowNum()), 4, RoundingMode.HALF_UP).multiply(new BigDecimal(100)).setScale(2);
@ -344,12 +346,13 @@ public class CodeBlockAnalysisTask extends IAnalysisTask {
} }
/** /**
* 或者特征代码块的md5 当前md5包含的特征行数 * 获取每个特征代码块包含的特征行数
* *
* @param codeBlockInfos * @param codeBlockInfos
* @return * @return
*/ */
private Map<String, Integer> getTraitsFeatureMd5AndFeatureLineNumMap(List<LineModel> codeBlockInfos) { private Map<String, Integer> getTraitsFeatureMd5AndFeatureLineNumMap(List<LineModel> codeBlockInfos) {
//按照特征代码块的md5进行分组
Map<String, List<LineModel>> traitMd5GroupMap = codeBlockInfos.stream().collect(Collectors.groupingBy(LineModel::getTraitLineMd5)); Map<String, List<LineModel>> traitMd5GroupMap = codeBlockInfos.stream().collect(Collectors.groupingBy(LineModel::getTraitLineMd5));
Map<String, Integer> resultMap = new HashMap<>(); Map<String, Integer> resultMap = new HashMap<>();
for (String traitMd5 : traitMd5GroupMap.keySet()) { for (String traitMd5 : traitMd5GroupMap.keySet()) {

@ -106,8 +106,8 @@ public class FunctionAnalysisTask extends IAnalysisTask {
//根据文件的名称获取函数解析器 //根据文件的名称获取函数解析器
Analysis analysis = AnalysisFactory.getAnalysis(fileName); Analysis analysis = AnalysisFactory.getAnalysis(fileName);
if (analysis == null){ if (analysis == null) {
throw new BusinessException("获取文件解析器失败,文件名称:"+fileName); throw new BusinessException("获取文件解析器失败,文件名称:" + fileName);
} }
//解析文件 //解析文件
@ -238,7 +238,7 @@ public class FunctionAnalysisTask extends IAnalysisTask {
//被测件文本内容 //被测件文本内容
String sourcefileContent= FileUtil.readUtf8String(analysisFile.getFileUrl()); String sourcefileContent = FileUtil.readUtf8String(analysisFile.getFileUrl());
//将文本内容解析成行信息,用于后续文件的开源率计算 //将文本内容解析成行信息,用于后续文件的开源率计算
List<String> analysisFileLineInfo = SimilarityUtil.getSplitWords(sourcefileContent); List<String> analysisFileLineInfo = SimilarityUtil.getSplitWords(sourcefileContent);
@ -291,7 +291,9 @@ public class FunctionAnalysisTask extends IAnalysisTask {
SolrDocument openEntries = md5VersionInfoMap.get(openSourceFileMd5); SolrDocument openEntries = md5VersionInfoMap.get(openSourceFileMd5);
VersionTree versionInfo = versionIdVersionInfoMap.get(openEntries.get("versionId")); VersionTree versionInfo = versionIdVersionInfoMap.get(openEntries.get("versionId"));
String openFilePath = (String) md5VersionInfoMap.get(openSourceFileMd5).getFieldValue("fullPath"); String openFilePath = (String) md5VersionInfoMap.get(openSourceFileMd5).getFieldValue("fullPath");
if (versionInfo == null){
throw new BusinessException("根据版本ID,未查询到相关的版本信息。versionId:" + openEntries.get("versionId"));
}
//组装当前开源文件的开源项目信息 //组装当前开源文件的开源项目信息
MatchOpenFile matchOpenFileInfo = new MatchOpenFile(); MatchOpenFile matchOpenFileInfo = new MatchOpenFile();
matchOpenFileInfo.setId(IdGenerator.uuid32()) matchOpenFileInfo.setId(IdGenerator.uuid32())

@ -76,7 +76,7 @@ public class PorjectAnalysisTask {
if (!matchedPrject) { if (!matchedPrject) {
List<FileDataMongoDto> unMatchedFiles = matchByAllFilesMd5(); List<FileDataMongoDto> unMatchedFiles = matchByAllFilesMd5();
//剩余没有匹配文件,用文件的md5去匹配solr库的versionTree //剩余没有匹配文件,用文件的md5去批量匹配solr库的versionTree
if (CollectionUtils.isNotEmpty(unMatchedFiles)) { if (CollectionUtils.isNotEmpty(unMatchedFiles)) {
matchByFileMd5s(unMatchedFiles); matchByFileMd5s(unMatchedFiles);
} }

@ -7,7 +7,6 @@ import lombok.Data;
import lombok.extern.log4j.Log4j2; import lombok.extern.log4j.Log4j2;
import org.apache.commons.collections.CollectionUtils; import org.apache.commons.collections.CollectionUtils;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
import org.apache.solr.client.solrj.SolrClient;
import org.apache.solr.client.solrj.SolrRequest; import org.apache.solr.client.solrj.SolrRequest;
import org.apache.solr.client.solrj.SolrServerException; import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.impl.HttpSolrClient; import org.apache.solr.client.solrj.impl.HttpSolrClient;

@ -8,7 +8,7 @@ spring:
mongodb: mongodb:
uri: mongodb://127.0.0.1:27017/KEYSWAN uri: mongodb://127.0.0.1:27017/KEYSWAN
redis: redis:
host: 127.0.0.1 host: 172.16.36.7
port: 6379 port: 6379
password: 123456 password: 123456
datasource: datasource:
@ -30,7 +30,7 @@ spring:
#solr检索库地址 #solr检索库地址
solr: solr:
solrUrl: http://172.16.36.7:8993/solr/ solrUrl: http://172.16.36.5:8993/solr/
#批量匹配时,返回的匹配数量 #批量匹配时,返回的匹配数量
row: 5 row: 5

Loading…
Cancel
Save