1.修复代码块级别 特征相似度大于100%的bug

without_nacos
liuzongren 6 months ago
parent 7d037191fa
commit bb737f8613
  1. 9
      src/main/java/com/keyware/composeanalysis/task/CodeBlockAnalysisTask.java
  2. 4
      src/main/java/com/keyware/composeanalysis/task/FunctionAnalysisTask.java
  3. 2
      src/main/java/com/keyware/composeanalysis/task/PorjectAnalysisTask.java
  4. 1
      src/main/java/com/keyware/composeanalysis/util/SolrUtils.java
  5. 4
      src/main/resources/application-dev.yaml

@ -172,7 +172,6 @@ public class CodeBlockAnalysisTask extends IAnalysisTask {
return; return;
} }
//保存所有匹配的特征代码块MD5信息,方便统计总的匹配行数 //保存所有匹配的特征代码块MD5信息,方便统计总的匹配行数
Set<String> matchingTraitLineSet = new HashSet<>(); Set<String> matchingTraitLineSet = new HashSet<>();
@ -190,8 +189,11 @@ public class CodeBlockAnalysisTask extends IAnalysisTask {
matchCodeBlockLineCount += traitsFeatureMd5AndFeatureLineNumMap.get(matchFeatureMd5); matchCodeBlockLineCount += traitsFeatureMd5AndFeatureLineNumMap.get(matchFeatureMd5);
} }
//特征行总数
int totalFeatureLineCount = traitsFeatureMd5AndFeatureLineNumMap.values().stream().mapToInt(Integer::intValue).sum();
//计算文件的总体特征相似度 //计算文件的总体特征相似度
BigDecimal featureSimilarity = new BigDecimal(matchCodeBlockLineCount).divide(new BigDecimal(analysisFile.getCodeRowNum()), 4, RoundingMode.HALF_UP).multiply(new BigDecimal(100)).setScale(2); BigDecimal featureSimilarity = new BigDecimal(matchCodeBlockLineCount).divide(new BigDecimal(totalFeatureLineCount), 4, RoundingMode.HALF_UP).multiply(new BigDecimal(100)).setScale(2);
//计算文件的总体开源率 //计算文件的总体开源率
BigDecimal openRate = new BigDecimal(matchedLineRowsNum.size()).divide(new BigDecimal(analysisFile.getCodeRowNum()), 4, RoundingMode.HALF_UP).multiply(new BigDecimal(100)).setScale(2); BigDecimal openRate = new BigDecimal(matchedLineRowsNum.size()).divide(new BigDecimal(analysisFile.getCodeRowNum()), 4, RoundingMode.HALF_UP).multiply(new BigDecimal(100)).setScale(2);
@ -344,12 +346,13 @@ public class CodeBlockAnalysisTask extends IAnalysisTask {
} }
/** /**
* 或者特征代码块的md5 当前md5包含的特征行数 * 获取每个特征代码块包含的特征行数
* *
* @param codeBlockInfos * @param codeBlockInfos
* @return * @return
*/ */
private Map<String, Integer> getTraitsFeatureMd5AndFeatureLineNumMap(List<LineModel> codeBlockInfos) { private Map<String, Integer> getTraitsFeatureMd5AndFeatureLineNumMap(List<LineModel> codeBlockInfos) {
//按照特征代码块的md5进行分组
Map<String, List<LineModel>> traitMd5GroupMap = codeBlockInfos.stream().collect(Collectors.groupingBy(LineModel::getTraitLineMd5)); Map<String, List<LineModel>> traitMd5GroupMap = codeBlockInfos.stream().collect(Collectors.groupingBy(LineModel::getTraitLineMd5));
Map<String, Integer> resultMap = new HashMap<>(); Map<String, Integer> resultMap = new HashMap<>();
for (String traitMd5 : traitMd5GroupMap.keySet()) { for (String traitMd5 : traitMd5GroupMap.keySet()) {

@ -291,7 +291,9 @@ public class FunctionAnalysisTask extends IAnalysisTask {
SolrDocument openEntries = md5VersionInfoMap.get(openSourceFileMd5); SolrDocument openEntries = md5VersionInfoMap.get(openSourceFileMd5);
VersionTree versionInfo = versionIdVersionInfoMap.get(openEntries.get("versionId")); VersionTree versionInfo = versionIdVersionInfoMap.get(openEntries.get("versionId"));
String openFilePath = (String) md5VersionInfoMap.get(openSourceFileMd5).getFieldValue("fullPath"); String openFilePath = (String) md5VersionInfoMap.get(openSourceFileMd5).getFieldValue("fullPath");
if (versionInfo == null){
throw new BusinessException("根据版本ID,未查询到相关的版本信息。versionId:" + openEntries.get("versionId"));
}
//组装当前开源文件的开源项目信息 //组装当前开源文件的开源项目信息
MatchOpenFile matchOpenFileInfo = new MatchOpenFile(); MatchOpenFile matchOpenFileInfo = new MatchOpenFile();
matchOpenFileInfo.setId(IdGenerator.uuid32()) matchOpenFileInfo.setId(IdGenerator.uuid32())

@ -76,7 +76,7 @@ public class PorjectAnalysisTask {
if (!matchedPrject) { if (!matchedPrject) {
List<FileDataMongoDto> unMatchedFiles = matchByAllFilesMd5(); List<FileDataMongoDto> unMatchedFiles = matchByAllFilesMd5();
//剩余没有匹配文件,用文件的md5去匹配solr库的versionTree //剩余没有匹配文件,用文件的md5去批量匹配solr库的versionTree
if (CollectionUtils.isNotEmpty(unMatchedFiles)) { if (CollectionUtils.isNotEmpty(unMatchedFiles)) {
matchByFileMd5s(unMatchedFiles); matchByFileMd5s(unMatchedFiles);
} }

@ -7,7 +7,6 @@ import lombok.Data;
import lombok.extern.log4j.Log4j2; import lombok.extern.log4j.Log4j2;
import org.apache.commons.collections.CollectionUtils; import org.apache.commons.collections.CollectionUtils;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
import org.apache.solr.client.solrj.SolrClient;
import org.apache.solr.client.solrj.SolrRequest; import org.apache.solr.client.solrj.SolrRequest;
import org.apache.solr.client.solrj.SolrServerException; import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.impl.HttpSolrClient; import org.apache.solr.client.solrj.impl.HttpSolrClient;

@ -8,7 +8,7 @@ spring:
mongodb: mongodb:
uri: mongodb://127.0.0.1:27017/KEYSWAN uri: mongodb://127.0.0.1:27017/KEYSWAN
redis: redis:
host: 127.0.0.1 host: 172.16.36.7
port: 6379 port: 6379
password: 123456 password: 123456
datasource: datasource:
@ -30,7 +30,7 @@ spring:
#solr检索库地址 #solr检索库地址
solr: solr:
solrUrl: http://172.16.36.7:8993/solr/ solrUrl: http://172.16.36.5:8993/solr/
#批量匹配时,返回的匹配数量 #批量匹配时,返回的匹配数量
row: 5 row: 5

Loading…
Cancel
Save