|
|
@ -2,12 +2,14 @@ package com.keyware.composeanalysis.task; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import cn.hutool.core.collection.CollectionUtil; |
|
|
|
import cn.hutool.core.collection.CollectionUtil; |
|
|
|
|
|
|
|
import cn.hutool.core.lang.Pair; |
|
|
|
|
|
|
|
import cn.hutool.core.util.StrUtil; |
|
|
|
|
|
|
|
import com.keyware.common.constant.enums.AnalysisStatusEnum; |
|
|
|
import com.keyware.composeanalysis.constant.FixedValue; |
|
|
|
import com.keyware.composeanalysis.constant.FixedValue; |
|
|
|
import com.keyware.composeanalysis.constant.FunctionAndAnalysisAssemblyConst; |
|
|
|
import com.keyware.composeanalysis.constant.FunctionAndAnalysisAssemblyConst; |
|
|
|
import com.keyware.composeanalysis.constant.RedisConst; |
|
|
|
import com.keyware.composeanalysis.constant.RedisConst; |
|
|
|
import com.keyware.composeanalysis.constant.SolrDBConst; |
|
|
|
import com.keyware.composeanalysis.constant.SolrDBConst; |
|
|
|
import com.keyware.composeanalysis.constant.enums.AnalysisLevelEnum; |
|
|
|
import com.keyware.composeanalysis.constant.enums.AnalysisLevelEnum; |
|
|
|
import com.keyware.composeanalysis.constant.enums.AnalysisStatusEnum; |
|
|
|
|
|
|
|
import com.keyware.composeanalysis.constant.enums.FileAnalysisStatusEnum; |
|
|
|
import com.keyware.composeanalysis.constant.enums.FileAnalysisStatusEnum; |
|
|
|
import com.keyware.composeanalysis.entity.AnalysisTask; |
|
|
|
import com.keyware.composeanalysis.entity.AnalysisTask; |
|
|
|
import com.keyware.composeanalysis.mongo.FileDataMongoDto; |
|
|
|
import com.keyware.composeanalysis.mongo.FileDataMongoDto; |
|
|
@ -15,10 +17,7 @@ import com.keyware.composeanalysis.mongo.LineDataMongoDto; |
|
|
|
import com.keyware.composeanalysis.mongo.MatchOpenFile; |
|
|
|
import com.keyware.composeanalysis.mongo.MatchOpenFile; |
|
|
|
import com.keyware.composeanalysis.mongo.MatchOpenFileMongoDto; |
|
|
|
import com.keyware.composeanalysis.mongo.MatchOpenFileMongoDto; |
|
|
|
import com.keyware.composeanalysis.solr.VersionTree; |
|
|
|
import com.keyware.composeanalysis.solr.VersionTree; |
|
|
|
import com.keyware.composeanalysis.util.AnalysisLogUtil; |
|
|
|
import com.keyware.composeanalysis.util.*; |
|
|
|
import com.keyware.composeanalysis.util.RedisUtil; |
|
|
|
|
|
|
|
import com.keyware.composeanalysis.util.SolrUtils; |
|
|
|
|
|
|
|
import com.keyware.composeanalysis.util.SpringContextUtils; |
|
|
|
|
|
|
|
import com.keyware.keyswan.anaysis.Analysis; |
|
|
|
import com.keyware.keyswan.anaysis.Analysis; |
|
|
|
import com.keyware.keyswan.anaysis.AnalysisFactory; |
|
|
|
import com.keyware.keyswan.anaysis.AnalysisFactory; |
|
|
|
import com.keyware.keyswan.common.CodeFile; |
|
|
|
import com.keyware.keyswan.common.CodeFile; |
|
|
@ -34,6 +33,7 @@ import java.math.BigDecimal; |
|
|
|
import java.math.RoundingMode; |
|
|
|
import java.math.RoundingMode; |
|
|
|
import java.util.*; |
|
|
|
import java.util.*; |
|
|
|
import java.util.concurrent.CountDownLatch; |
|
|
|
import java.util.concurrent.CountDownLatch; |
|
|
|
|
|
|
|
import java.util.stream.Collectors; |
|
|
|
|
|
|
|
|
|
|
|
import static org.springframework.data.mongodb.core.query.Criteria.where; |
|
|
|
import static org.springframework.data.mongodb.core.query.Criteria.where; |
|
|
|
|
|
|
|
|
|
|
@ -84,37 +84,23 @@ public class LineAnalysisTask extends IAnalysisTask { |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
//获取文件地址
|
|
|
|
//获取文件地址
|
|
|
|
String filePath = analysisFile.getFileUrl(); |
|
|
|
String filePath = analysisFile.getFileUrl(); |
|
|
|
//获取文件名称
|
|
|
|
//获取文件名称
|
|
|
|
String fileName = analysisFile.getName(); |
|
|
|
String fileName = analysisFile.getName(); |
|
|
|
|
|
|
|
|
|
|
|
AnalysisLogUtil.insert(mongoTemplate, "【行级特征提取】正在提取" + fileName); |
|
|
|
AnalysisLogUtil.insert(mongoTemplate, "【行级特征提取】正在提取" + fileName); |
|
|
|
try { |
|
|
|
try { |
|
|
|
LineDataMongoDto lineDataMongoDto = new LineDataMongoDto(); |
|
|
|
|
|
|
|
lineDataMongoDto.setFileId(analysisFile.getId()) |
|
|
|
|
|
|
|
.setStatus(0) |
|
|
|
|
|
|
|
.setIsSelect(false); |
|
|
|
|
|
|
|
Analysis analysis = AnalysisFactory.getAnalysis(filePath); |
|
|
|
Analysis analysis = AnalysisFactory.getAnalysis(filePath); |
|
|
|
CodeFile codeFile = null; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
//获取文件行级特征md5
|
|
|
|
//获取文件行级特征md5
|
|
|
|
codeFile = analysis.analysisFile(filePath, FunctionAndAnalysisAssemblyConst.LINE_EXTRACT, FunctionAndAnalysisAssemblyConst.LINE_EXTRACT); |
|
|
|
CodeFile codeFile = analysis.analysisFile(filePath, FunctionAndAnalysisAssemblyConst.LINE_EXTRACT, FunctionAndAnalysisAssemblyConst.LINE_EXTRACT); |
|
|
|
//每一行原内容MD5值集合
|
|
|
|
|
|
|
|
// String cutFileLineMd5 = codeFile.getCutFileLineMd5();
|
|
|
|
|
|
|
|
//每一行特征内容MD5值集合
|
|
|
|
|
|
|
|
String traitFileLineMd5 = codeFile.getTraitFileLineMd5(); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
String[] featureMd5Arr = {}; |
|
|
|
|
|
|
|
if (StringUtils.isNotBlank(traitFileLineMd5)) { |
|
|
|
|
|
|
|
featureMd5Arr = traitFileLineMd5.split(","); |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
List<String> lineFeatures = Arrays.asList(featureMd5Arr); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
//从solr中获取特征相似的 文件
|
|
|
|
//从solr中获取特征相似的 文件
|
|
|
|
SolrDocumentList featureSimilarityFromSolr = getFeatureSimilarityFromSolr(lineFeatures); |
|
|
|
SolrDocumentList featureSimilarityFromSolr = getFeatureSimilarityFromSolr(codeFile); |
|
|
|
|
|
|
|
|
|
|
|
//计算文件的开源率
|
|
|
|
//计算文件的开源率
|
|
|
|
calculateOpenRate(featureSimilarityFromSolr, lineFeatures); |
|
|
|
doAnalysis(featureSimilarityFromSolr, codeFile); |
|
|
|
|
|
|
|
|
|
|
|
//更新文件表的分析状态为3 行级特征以分析完毕
|
|
|
|
//更新文件表的分析状态为3 行级特征以分析完毕
|
|
|
|
analysisFile.setFileAnalysisStatus(FileAnalysisStatusEnum.ANALYSIS_DONE.getCode()); |
|
|
|
analysisFile.setFileAnalysisStatus(FileAnalysisStatusEnum.ANALYSIS_DONE.getCode()); |
|
|
@ -123,11 +109,12 @@ public class LineAnalysisTask extends IAnalysisTask { |
|
|
|
.replaceWith(analysisFile) |
|
|
|
.replaceWith(analysisFile) |
|
|
|
.findAndReplace(); |
|
|
|
.findAndReplace(); |
|
|
|
|
|
|
|
|
|
|
|
AnalysisLogUtil.insert(mongoTemplate, "【行级特征提取】提取完成" + fileName); |
|
|
|
//插入日志
|
|
|
|
|
|
|
|
AnalysisLogUtil.insert(mongoTemplate, "【行级分析】完成" + fileName); |
|
|
|
log.info("文件" + fileName + ":行级分析完成"); |
|
|
|
log.info("文件" + fileName + ":行级分析完成"); |
|
|
|
} catch (Exception e) { |
|
|
|
} catch (Exception e) { |
|
|
|
AnalysisLogUtil.insertErrorInfo(mongoTemplate, "【行级特征提取】提取失败" + fileName, e); |
|
|
|
AnalysisLogUtil.insertErrorInfo(mongoTemplate, "【行级分析】失败" + fileName, e); |
|
|
|
log.error("文件:" + fileName + "行级别特征提取失败!", e); |
|
|
|
log.error("文件:{}行级别分析失败!", fileName,e); |
|
|
|
//修改当前文件分析状态未失败
|
|
|
|
//修改当前文件分析状态未失败
|
|
|
|
mongoTemplate.update(FileDataMongoDto.class) |
|
|
|
mongoTemplate.update(FileDataMongoDto.class) |
|
|
|
.matching(where("_id").is(analysisFile.getId())) |
|
|
|
.matching(where("_id").is(analysisFile.getId())) |
|
|
@ -143,100 +130,193 @@ public class LineAnalysisTask extends IAnalysisTask { |
|
|
|
* 计算开源率 被测件的开源率 |
|
|
|
* 计算开源率 被测件的开源率 |
|
|
|
* |
|
|
|
* |
|
|
|
* @param matcheOpenSourceFiles |
|
|
|
* @param matcheOpenSourceFiles |
|
|
|
* @param lineFeatures |
|
|
|
* @param codeFile 文件解析结果 |
|
|
|
*/ |
|
|
|
*/ |
|
|
|
private void calculateOpenRate(SolrDocumentList matcheOpenSourceFiles, List<String> lineFeatures) { |
|
|
|
private void doAnalysis(SolrDocumentList matcheOpenSourceFiles, CodeFile codeFile) { |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
//根据文件后缀判断需要查询的文件版本库名称
|
|
|
|
|
|
|
|
String versionIdCoreName = FixedValue.SUFFIX_SOLR_VERSION.get(analysisFile.getSuffix()); |
|
|
|
|
|
|
|
|
|
|
|
if (CollectionUtil.isEmpty(matcheOpenSourceFiles)) { |
|
|
|
if (CollectionUtil.isEmpty(matcheOpenSourceFiles)) { |
|
|
|
|
|
|
|
//因为行的特征库较少,这里补充一个对比逻辑,如果当前文件解析失败,或者没有通过代码块匹配到数据,则直接通过文件的md5 再次查询一次solr库
|
|
|
|
|
|
|
|
checkByOriginalFileMd5(versionIdCoreName, analysisFile.getMd5()); |
|
|
|
return; |
|
|
|
return; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
//根据文件后缀判断需要查询的文件版本库名称
|
|
|
|
|
|
|
|
String versionIdCoreName = FixedValue.SUFFIX_SOLR_VERSION.get(analysisFile.getSuffix()); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
//保存所有匹配的行数信息,方便统计总的匹配行数
|
|
|
|
|
|
|
|
Set<String> matchedFeatureMd5 = new HashSet<>(); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
//保存所有匹配的行数信息,方便统计总的匹配行数
|
|
|
|
|
|
|
|
Set<Integer> matchLineRowsNum = new HashSet<>(); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
//获取文件总特征行数
|
|
|
|
|
|
|
|
String traitFileLineMd5 = codeFile.getTraitFileLineMd5(); |
|
|
|
|
|
|
|
List<String> lineFeatureList = Arrays.asList(traitFileLineMd5.split(",")); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
//统计每个文件的开源率
|
|
|
|
|
|
|
|
List<MatchOpenFile> matchOpenFilesRes = calculateSimilarityAndOpenRate(matcheOpenSourceFiles, codeFile, versionIdCoreName, matchLineRowsNum, matchedFeatureMd5); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
//计算文件的总体特征相似度
|
|
|
|
|
|
|
|
BigDecimal featureSimilarity = new BigDecimal(matchedFeatureMd5.size()).divide(new BigDecimal(lineFeatureList.size()), 4, RoundingMode.HALF_UP).multiply(new BigDecimal(100)).setScale(2); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
//计算文件的总体开源率
|
|
|
|
|
|
|
|
BigDecimal openRate = new BigDecimal(matchLineRowsNum.size()).divide(new BigDecimal(analysisFile.getCodeRowNum()), 4, RoundingMode.HALF_UP).multiply(new BigDecimal(100)).setScale(2); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
//获取开源率的阈值
|
|
|
|
|
|
|
|
Integer openRateThreshold = analysisTask.getOpenRateThreshold(); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
//如果开源率大于阈值,则将当前文件设置成开源
|
|
|
|
|
|
|
|
if (openRate.compareTo(new BigDecimal(openRateThreshold)) >= 0) { |
|
|
|
|
|
|
|
analysisFile.setOpenType(true); |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
//定义结果集对象
|
|
|
|
//保存当前文件的开源信息到mongo库中
|
|
|
|
MatchOpenFileMongoDto matchOpenFileMongo = new MatchOpenFileMongoDto(); |
|
|
|
MatchOpenFileMongoDto matchOpenFileMongo = new MatchOpenFileMongoDto(); |
|
|
|
matchOpenFileMongo.setId(IdGenerator.uuid32()) |
|
|
|
matchOpenFileMongo.setId(IdGenerator.uuid32()) |
|
|
|
.setFilePath(analysisFile.getFileUrl()) |
|
|
|
.setFilePath(analysisFile.getFileUrl()) |
|
|
|
.setFileName(analysisFile.getName()); |
|
|
|
.setFileName(analysisFile.getName()) |
|
|
|
|
|
|
|
.setFeatureSimilarity(featureSimilarity.floatValue()) |
|
|
|
|
|
|
|
.setOpenRate(openRate.floatValue()) |
|
|
|
|
|
|
|
.setOpenType(analysisFile.getOpenType()) |
|
|
|
|
|
|
|
.setMatchOpenFile(matchOpenFilesRes); |
|
|
|
|
|
|
|
log.info("文件" + analysisFile.getName() + ":开源率:" + openRate.floatValue() + ",特征相似度:" + featureSimilarity.floatValue()); |
|
|
|
|
|
|
|
mongoTemplate.save(matchOpenFileMongo); |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
//开源文件信息保存结果集
|
|
|
|
|
|
|
|
List<MatchOpenFile> matchOpenFileInfoList = new ArrayList<>(); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
//保存所有匹配的行数信息,方便统计总的匹配行数
|
|
|
|
/** |
|
|
|
Set<String> matchingLineSet = new HashSet<>(); |
|
|
|
* 计算当前文件的特征相似度 和 开源率 |
|
|
|
|
|
|
|
* |
|
|
|
//获取文件总行数
|
|
|
|
* @param matchOpenFiles 通过MD5 匹配到的所有开源文件 |
|
|
|
BigDecimal totalCodeRowNum = new BigDecimal(analysisFile.getCodeRowNum()); |
|
|
|
* @param sourceFileBaseCoreName 当前文件特征文件的 solr coreName |
|
|
|
|
|
|
|
* @param matchLineRowsNum 所有开源文件匹配到的开源行号列表 |
|
|
|
//统计每个开源文件和被测件的匹配行数
|
|
|
|
* @param matchFeatureLineMd5s 所有开源文件匹配到的特征行MD5 |
|
|
|
for (SolrDocument matchFile : matcheOpenSourceFiles) { |
|
|
|
* @return 匹配的开源文件解析后的结果集 |
|
|
|
//解析文件的代码块特征值
|
|
|
|
*/ |
|
|
|
String lineFeatureMd5s = (String) matchFile.get("tz_line_hay"); |
|
|
|
private List<MatchOpenFile> calculateSimilarityAndOpenRate(SolrDocumentList matchOpenFiles, CodeFile fileAnalysisRes, String sourceFileBaseCoreName, Set<Integer> matchLineRowsNum, Set<String> matchFeatureLineMd5s) { |
|
|
|
List<String> matchedLineFeatures = Arrays.asList(lineFeatureMd5s.split(",")); |
|
|
|
|
|
|
|
|
|
|
|
//匹配的开源文件列表
|
|
|
|
//匹配的总行数
|
|
|
|
List<MatchOpenFile> matchOpenFilesRes = new ArrayList<>(); |
|
|
|
int currentFileMatchLineCount = 0; |
|
|
|
|
|
|
|
|
|
|
|
//首先根据文件的MD5查询开源文件的版本ID,和路径信息
|
|
|
|
//遍历当前文件的代码块特征,统计匹配的总行数
|
|
|
|
Set<String> openSourceFileMd5s = matchOpenFiles.stream().map(doc -> (String) doc.get("sourceMd5")).collect(Collectors.toSet()); |
|
|
|
for (String originalLineFeatureMd5 : lineFeatures) { |
|
|
|
Map<String, SolrDocument> md5VersionInfoMap = solrUtils.batchQueryVersionIdFromSourceFileBaseBySourceMd5(sourceFileBaseCoreName, openSourceFileMd5s); |
|
|
|
for (String matchLineFeatureMd5 : matchedLineFeatures) { |
|
|
|
|
|
|
|
if (originalLineFeatureMd5.equals(matchLineFeatureMd5)) { |
|
|
|
//根据版本ID查询版本的详细信息
|
|
|
|
currentFileMatchLineCount++; |
|
|
|
//todo 这里 查询一个版本的信息 需要检索 两个 solr 库 而且还需要检索 versioinTree 后面需要优化
|
|
|
|
matchingLineSet.add(originalLineFeatureMd5); |
|
|
|
Set<String> openSourceFileVersionIds = md5VersionInfoMap.values().stream().map(doc -> (String) doc.get("versionId")).collect(Collectors.toSet()); |
|
|
|
|
|
|
|
List<VersionTree> versionTrees = solrUtils.queryBatchVersionInfoByVersionIds(openSourceFileVersionIds); |
|
|
|
|
|
|
|
Map<String, VersionTree> versionIdVersionInfoMap = versionTrees.stream().collect(Collectors.toMap(VersionTree::getVersionId, java.util.function.Function.identity())); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
String traitFileLineMd5 = fileAnalysisRes.getTraitFileLineMd5(); |
|
|
|
|
|
|
|
List<String> lineFeatureList = Arrays.asList(traitFileLineMd5.split(",")); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
for (SolrDocument openSourceFile : matchOpenFiles) { |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
//开源文件MD5
|
|
|
|
|
|
|
|
String openSourceFileMd5 = openSourceFile.getFieldValue("sourceMd5").toString(); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
//解析文件的特征行
|
|
|
|
|
|
|
|
String lineFeatureMd5s = (String) openSourceFile.get("tz_line_hay"); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
List<String> openFileLineFeatures = Arrays.asList(lineFeatureMd5s.split(",")); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
//获取开源文件的文本信息
|
|
|
|
|
|
|
|
String openSourceContent = solrUtils.getOpenFileContentByMd5(openSourceFileMd5); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
//当前文件匹配特征行总行数
|
|
|
|
|
|
|
|
int currentFileMatchFeatureLineCount = 0; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
//遍历函数特征MD5
|
|
|
|
|
|
|
|
for (String lineFeatureMd5 : lineFeatureList) { |
|
|
|
|
|
|
|
//源文件的特征行列表
|
|
|
|
|
|
|
|
for (String openFileLineFeature : openFileLineFeatures) { |
|
|
|
|
|
|
|
if (lineFeatureMd5.equals(openFileLineFeature)) { |
|
|
|
|
|
|
|
matchFeatureLineMd5s.add(lineFeatureMd5); |
|
|
|
|
|
|
|
currentFileMatchFeatureLineCount++; |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
//首先根据文件的MD5查询开源文件的版本ID,和路径信息
|
|
|
|
//当前文件的开源率
|
|
|
|
SolrDocument versionIdAndPath = solrUtils.queryOne(versionIdCoreName, "sourceFileMd5:" + matchFile.get("sourceMd5"), "versionId,fullPath,sourceFileMd5"); |
|
|
|
Pair<Float, HashSet<Integer>> openRateAndSaveRowNum = SimilarityUtil.getOpenRateAndSaveRowNum(fileAnalysisRes.getSourceFileContent(), openSourceContent); |
|
|
|
|
|
|
|
|
|
|
|
//根据版本ID查询版本的详细信息
|
|
|
|
//将当前文件匹配的行号,存储到缓存中,方便统计整体的开源率
|
|
|
|
//todo 这里 查询一个版本的信息 需要检索 两个 solr 库 而且还需要检索 versioinTree 后面需要优化
|
|
|
|
matchLineRowsNum.addAll(openRateAndSaveRowNum.getValue()); |
|
|
|
VersionTree versionInfo = solrUtils.queryVersionInfoByVersionId((String) versionIdAndPath.get("versionId")); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
//计算与当前开源文件的开源率
|
|
|
|
//统计当前文件的特征相似度
|
|
|
|
BigDecimal openRate = new BigDecimal(currentFileMatchLineCount).divide(totalCodeRowNum, 4, RoundingMode.HALF_UP).multiply(new BigDecimal(100)); |
|
|
|
BigDecimal featureSimilarity = new BigDecimal(currentFileMatchFeatureLineCount).divide(new BigDecimal(lineFeatureList.size()), 4, RoundingMode.HALF_UP).multiply(new BigDecimal(100)).setScale(2); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
SolrDocument openEntries = md5VersionInfoMap.get(openSourceFileMd5); |
|
|
|
|
|
|
|
VersionTree versionInfo = versionIdVersionInfoMap.get(openEntries.get("versionId")); |
|
|
|
|
|
|
|
if (versionInfo == null) { |
|
|
|
|
|
|
|
log.error("根据版本ID,未查询到相关的版本信息。versionId:{}", openEntries.get("versionId")); |
|
|
|
|
|
|
|
continue; |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
//当前开源文件的开源项目信息
|
|
|
|
//组装当前开源文件的开源项目信息
|
|
|
|
MatchOpenFile matchOpenFileInfo = new MatchOpenFile(); |
|
|
|
MatchOpenFile matchOpenFileInfo = new MatchOpenFile(); |
|
|
|
matchOpenFileInfo.setPId(versionInfo.getProId()) |
|
|
|
matchOpenFileInfo.setPId(versionInfo.getProId()) |
|
|
|
.setPName(versionInfo.getProName()) |
|
|
|
.setPName(versionInfo.getProName()) |
|
|
|
.setSourceUrl(versionInfo.getDownUrl()) |
|
|
|
.setSourceUrl((String) openEntries.get("fullPath")) |
|
|
|
.setOpenRate(openRate.floatValue()) |
|
|
|
.setFeatureSimilarity(featureSimilarity.floatValue()) |
|
|
|
|
|
|
|
.setOpenRate(openRateAndSaveRowNum.getKey()) |
|
|
|
.setVersion(versionInfo.getVersionName()) |
|
|
|
.setVersion(versionInfo.getVersionName()) |
|
|
|
.setLicenseType(versionInfo.getLicenseType()) |
|
|
|
.setLicenseType(versionInfo.getLicenseType()) |
|
|
|
.setAnalyzeType(AnalysisLevelEnum.LINE_LEVEL.getCode()); |
|
|
|
.setAnalyzeType(AnalysisLevelEnum.FUNCTION_LEVEL.getCode()); |
|
|
|
matchOpenFileInfoList.add(matchOpenFileInfo); |
|
|
|
matchOpenFilesRes.add(matchOpenFileInfo); |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
return matchOpenFilesRes; |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
//统计当前文件的整体开源率
|
|
|
|
|
|
|
|
BigDecimal openRate = new BigDecimal(matchingLineSet.size()).divide(totalCodeRowNum, 4, RoundingMode.HALF_UP).multiply(new BigDecimal(100)); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
//获取开源率的阈值
|
|
|
|
|
|
|
|
Integer openRateThreshold = analysisTask.getOpenRateThreshold(); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
//如果开源率大于阈值,则将当前文件设置成开源
|
|
|
|
/** |
|
|
|
if (openRate.compareTo(new BigDecimal(openRateThreshold)) >= 0) { |
|
|
|
* 防止代码块特征库不全,再次根据文件MD5查询开源文件信息, 做二次校验 |
|
|
|
analysisFile.setOpenType(true); |
|
|
|
* |
|
|
|
} |
|
|
|
* @param originalFileMd5 |
|
|
|
|
|
|
|
* @param versionIdCoreName |
|
|
|
|
|
|
|
*/ |
|
|
|
|
|
|
|
private void checkByOriginalFileMd5(String versionIdCoreName, String originalFileMd5) { |
|
|
|
|
|
|
|
|
|
|
|
//保存当前文件的开源信息
|
|
|
|
//根据文件的MD5,查询特征库,看当前文件是否在开源代码库中
|
|
|
|
matchOpenFileMongo.setOpenType(analysisFile.getOpenType()) |
|
|
|
SolrDocument versionIdAndPath = solrUtils.queryOne(versionIdCoreName, "sourceFileMd5:" + originalFileMd5, "versionId,fullPath,sourceFileMd5"); |
|
|
|
.setMatchOpenFile(matchOpenFileInfoList); |
|
|
|
|
|
|
|
mongoTemplate.save(matchOpenFileMongo); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if (versionIdAndPath != null) { |
|
|
|
|
|
|
|
//根据版本ID查询版本的详细信息
|
|
|
|
|
|
|
|
VersionTree versionInfo = solrUtils.queryVersionInfoByVersionId((String) versionIdAndPath.get("versionId")); |
|
|
|
|
|
|
|
if (versionInfo != null) { |
|
|
|
|
|
|
|
//当前开源文件的开源项目信息
|
|
|
|
|
|
|
|
MatchOpenFile matchOpenFileInfo = new MatchOpenFile(); |
|
|
|
|
|
|
|
matchOpenFileInfo.setPId(versionInfo.getProId()) |
|
|
|
|
|
|
|
.setPName(versionInfo.getProName()) |
|
|
|
|
|
|
|
.setSourceUrl(versionInfo.getDownUrl()) |
|
|
|
|
|
|
|
.setFeatureSimilarity(100.00f) |
|
|
|
|
|
|
|
.setOpenRate(100.00f) |
|
|
|
|
|
|
|
.setAnalyzeType(AnalysisLevelEnum.FILE_LEVEL.getCode()); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
//保存当前文件的开源信息到mongo库中
|
|
|
|
|
|
|
|
MatchOpenFileMongoDto matchOpenFileMongo = new MatchOpenFileMongoDto(); |
|
|
|
|
|
|
|
matchOpenFileMongo.setId(IdGenerator.uuid32()) |
|
|
|
|
|
|
|
.setFilePath(analysisFile.getFileUrl()) |
|
|
|
|
|
|
|
.setFileName(analysisFile.getName()) |
|
|
|
|
|
|
|
.setOpenRate(100.00f) |
|
|
|
|
|
|
|
.setOpenType(analysisFile.getOpenType()) |
|
|
|
|
|
|
|
.setMatchOpenFile(Arrays.asList(matchOpenFileInfo)); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
mongoTemplate.save(matchOpenFileMongo); |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/** |
|
|
|
/** |
|
|
|
* 将特征值插入到mongo库中 |
|
|
|
* 将特征值插入到mongo库中 |
|
|
|
* |
|
|
|
|
|
|
|
* @param features 特征集合 |
|
|
|
* @param features 特征集合 |
|
|
|
* @param lineDataMongoDto 当前分析任务 ,特征信息存储 |
|
|
|
* @param lineDataMongoDto 当前分析任务 ,特征信息存储 |
|
|
|
* todo 后期 看看有没有插入的必要 |
|
|
|
* todo 后期 看看有没有插入的必要 |
|
|
|
* @param |
|
|
|
* @param |
|
|
|
*/ |
|
|
|
*/ |
|
|
|
@Deprecated |
|
|
|
@Deprecated |
|
|
@ -281,16 +361,31 @@ public class LineAnalysisTask extends IAnalysisTask { |
|
|
|
/** |
|
|
|
/** |
|
|
|
* 根据 特征值 从特征库中检索 具有特征相似的 |
|
|
|
* 根据 特征值 从特征库中检索 具有特征相似的 |
|
|
|
* |
|
|
|
* |
|
|
|
* @param lineFeatureList 行特征信息 |
|
|
|
* @param codeFile 行特征信息 |
|
|
|
* @return |
|
|
|
* @return |
|
|
|
*/ |
|
|
|
*/ |
|
|
|
private SolrDocumentList getFeatureSimilarityFromSolr(List<String> lineFeatureList) { |
|
|
|
private SolrDocumentList getFeatureSimilarityFromSolr(CodeFile codeFile) { |
|
|
|
String solrCoreName = SolrDBConst.CORE_NAME_SOURCE_FILE_INFO_TEMP; |
|
|
|
Set<String> queryMd5Set = new HashSet<>(); |
|
|
|
|
|
|
|
//每一行原内容MD5值集合
|
|
|
|
|
|
|
|
String cutFileLineMd5 = codeFile.getCutFileLineMd5(); |
|
|
|
|
|
|
|
if (StrUtil.isNotBlank(cutFileLineMd5)) { |
|
|
|
|
|
|
|
List<String> lineCutList = Arrays.asList(cutFileLineMd5.split(",")); |
|
|
|
|
|
|
|
queryMd5Set.addAll(lineCutList); |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
//每一行特征内容MD5值集合
|
|
|
|
|
|
|
|
String traitFileLineMd5 = codeFile.getTraitFileLineMd5(); |
|
|
|
|
|
|
|
if (StrUtil.isNotBlank(traitFileLineMd5)) { |
|
|
|
|
|
|
|
List<String> lineFeatureList = Arrays.asList(traitFileLineMd5.split(",")); |
|
|
|
|
|
|
|
queryMd5Set.addAll(lineFeatureList); |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
if (CollectionUtil.isEmpty(queryMd5Set)) { |
|
|
|
|
|
|
|
log.error("特征相似度检索失败,特征为空:{}", analysisFile.getName()); |
|
|
|
|
|
|
|
return new SolrDocumentList(); |
|
|
|
|
|
|
|
} |
|
|
|
//拼接行特征查询条件
|
|
|
|
//拼接行特征查询条件
|
|
|
|
String queryStr = "tz_line_hay:(" + StringUtils.join(lineFeatureList, " OR ") + ")"; |
|
|
|
String queryStr = "tz_line_hay:(" + StringUtils.join(queryMd5Set, " OR ") + ")"; |
|
|
|
log.info("查询条件: solrCoreName:{},queryStr:{}", solrCoreName, queryStr); |
|
|
|
log.info("查询条件: solrCoreName:{},queryStr:{}", SolrDBConst.CORE_NAME_SOURCE_FILE_INFO_TEMP, queryStr); |
|
|
|
SolrDocumentList result = solrUtils.query(solrCoreName, queryStr, "sourceMd5,tz_line_hay"); |
|
|
|
SolrDocumentList result = solrUtils.query(SolrDBConst.CORE_NAME_SOURCE_FILE_INFO_TEMP, queryStr, "sourceMd5,tz_line_hay"); |
|
|
|
log.info("查询结果: result:{}", result); |
|
|
|
|
|
|
|
return result; |
|
|
|
return result; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|