1.修复代码块级别 特征相似度大于100%的bug

without_nacos
liuzongren 6 months ago
parent 7d037191fa
commit bb737f8613
  1. 9
      src/main/java/com/keyware/composeanalysis/task/CodeBlockAnalysisTask.java
  2. 4
      src/main/java/com/keyware/composeanalysis/task/FunctionAnalysisTask.java
  3. 2
      src/main/java/com/keyware/composeanalysis/task/PorjectAnalysisTask.java
  4. 1
      src/main/java/com/keyware/composeanalysis/util/SolrUtils.java
  5. 4
      src/main/resources/application-dev.yaml

@ -172,7 +172,6 @@ public class CodeBlockAnalysisTask extends IAnalysisTask {
return;
}
//保存所有匹配的特征代码块MD5信息,方便统计总的匹配行数
Set<String> matchingTraitLineSet = new HashSet<>();
@ -190,8 +189,11 @@ public class CodeBlockAnalysisTask extends IAnalysisTask {
matchCodeBlockLineCount += traitsFeatureMd5AndFeatureLineNumMap.get(matchFeatureMd5);
}
//特征行总数
int totalFeatureLineCount = traitsFeatureMd5AndFeatureLineNumMap.values().stream().mapToInt(Integer::intValue).sum();
//计算文件的总体特征相似度
BigDecimal featureSimilarity = new BigDecimal(matchCodeBlockLineCount).divide(new BigDecimal(analysisFile.getCodeRowNum()), 4, RoundingMode.HALF_UP).multiply(new BigDecimal(100)).setScale(2);
BigDecimal featureSimilarity = new BigDecimal(matchCodeBlockLineCount).divide(new BigDecimal(totalFeatureLineCount), 4, RoundingMode.HALF_UP).multiply(new BigDecimal(100)).setScale(2);
//计算文件的总体开源率
BigDecimal openRate = new BigDecimal(matchedLineRowsNum.size()).divide(new BigDecimal(analysisFile.getCodeRowNum()), 4, RoundingMode.HALF_UP).multiply(new BigDecimal(100)).setScale(2);
@ -344,12 +346,13 @@ public class CodeBlockAnalysisTask extends IAnalysisTask {
}
/**
* 或者特征代码块的md5 当前md5包含的特征行数
* 获取每个特征代码块包含的特征行数
*
* @param codeBlockInfos
* @return
*/
private Map<String, Integer> getTraitsFeatureMd5AndFeatureLineNumMap(List<LineModel> codeBlockInfos) {
//按照特征代码块的md5进行分组
Map<String, List<LineModel>> traitMd5GroupMap = codeBlockInfos.stream().collect(Collectors.groupingBy(LineModel::getTraitLineMd5));
Map<String, Integer> resultMap = new HashMap<>();
for (String traitMd5 : traitMd5GroupMap.keySet()) {

@ -291,7 +291,9 @@ public class FunctionAnalysisTask extends IAnalysisTask {
SolrDocument openEntries = md5VersionInfoMap.get(openSourceFileMd5);
VersionTree versionInfo = versionIdVersionInfoMap.get(openEntries.get("versionId"));
String openFilePath = (String) md5VersionInfoMap.get(openSourceFileMd5).getFieldValue("fullPath");
if (versionInfo == null){
throw new BusinessException("根据版本ID,未查询到相关的版本信息。versionId:" + openEntries.get("versionId"));
}
//组装当前开源文件的开源项目信息
MatchOpenFile matchOpenFileInfo = new MatchOpenFile();
matchOpenFileInfo.setId(IdGenerator.uuid32())

@ -76,7 +76,7 @@ public class PorjectAnalysisTask {
if (!matchedPrject) {
List<FileDataMongoDto> unMatchedFiles = matchByAllFilesMd5();
//剩余没有匹配文件,用文件的md5去匹配solr库的versionTree
//剩余没有匹配文件,用文件的md5去批量匹配solr库的versionTree
if (CollectionUtils.isNotEmpty(unMatchedFiles)) {
matchByFileMd5s(unMatchedFiles);
}

@ -7,7 +7,6 @@ import lombok.Data;
import lombok.extern.log4j.Log4j2;
import org.apache.commons.collections.CollectionUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.solr.client.solrj.SolrClient;
import org.apache.solr.client.solrj.SolrRequest;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.impl.HttpSolrClient;

@ -8,7 +8,7 @@ spring:
mongodb:
uri: mongodb://127.0.0.1:27017/KEYSWAN
redis:
host: 127.0.0.1
host: 172.16.36.7
port: 6379
password: 123456
datasource:
@ -30,7 +30,7 @@ spring:
#solr检索库地址
solr:
solrUrl: http://172.16.36.7:8993/solr/
solrUrl: http://172.16.36.5:8993/solr/
#批量匹配时,返回的匹配数量
row: 5

Loading…
Cancel
Save