|
|
|
@ -1,5 +1,6 @@ |
|
|
|
|
package com.keyware.composeanalysis.task; |
|
|
|
|
|
|
|
|
|
import cn.hutool.core.lang.Pair; |
|
|
|
|
import com.keyware.common.constant.enums.AnalysisStatusEnum; |
|
|
|
|
import com.keyware.composeanalysis.constant.FixedValue; |
|
|
|
|
import com.keyware.composeanalysis.constant.RedisConst; |
|
|
|
@ -101,7 +102,7 @@ public class FileAnalysisTask extends IAnalysisTask { |
|
|
|
|
SolrDocumentList openSourceFileList = solrUtils.query(featureCoreName, querySb, "sourceMd5"); |
|
|
|
|
//如果当前文件在源码库中,匹配到了数据,则统计当前文件的开源率
|
|
|
|
|
if (CollectionUtils.isNotEmpty(openSourceFileList)) { |
|
|
|
|
ananlyzeFileOpenRate(openSourceFileList); |
|
|
|
|
ananlyzeFileOpenRate(openSourceFileList,codeFile); |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
@ -132,18 +133,13 @@ public class FileAnalysisTask extends IAnalysisTask { |
|
|
|
|
/** |
|
|
|
|
* 分析文件的开源率 |
|
|
|
|
* |
|
|
|
|
* @param fileList 匹配的开源文件信息 |
|
|
|
|
* @param openSourceFileList 匹配的开源文件信息 |
|
|
|
|
* @throws IOException |
|
|
|
|
*/ |
|
|
|
|
private void ananlyzeFileOpenRate(SolrDocumentList fileList) throws IOException { |
|
|
|
|
//创建匹配开源文件信息匹配对象
|
|
|
|
|
MatchOpenFileMongoDto matchOpenFileInfo = new MatchOpenFileMongoDto(); |
|
|
|
|
matchOpenFileInfo.setId(IdGenerator.uuid32()) |
|
|
|
|
.setFileName(analysisFile.getName()) |
|
|
|
|
.setFilePath(analysisFile.getFileUrl()); |
|
|
|
|
private void ananlyzeFileOpenRate(SolrDocumentList openSourceFileList,CodeFile fileAnalysisRes) throws IOException { |
|
|
|
|
|
|
|
|
|
//根据匹配的开源文件的md5 获取版本ID
|
|
|
|
|
Set<String> sourceFileMd5 = fileList.stream().map(solrDocument -> (String) solrDocument.get("sourceMd5")).collect(Collectors.toSet()); |
|
|
|
|
Set<String> sourceFileMd5 = openSourceFileList.stream().map(solrDocument -> (String) solrDocument.get("sourceMd5")).collect(Collectors.toSet()); |
|
|
|
|
String sourceCoreName = FixedValue.SUFFIX_SOLR_VERSION.get(analysisFile.getSuffix()); |
|
|
|
|
Map<String, SolrDocument> md5VersionObjMap = solrUtils.batchQueryVersionIdFromSourceFileBaseBySourceMd5(sourceCoreName, sourceFileMd5); |
|
|
|
|
|
|
|
|
@ -160,52 +156,14 @@ public class FileAnalysisTask extends IAnalysisTask { |
|
|
|
|
|
|
|
|
|
HashSet<Integer> openLineNum = new HashSet<>(); |
|
|
|
|
|
|
|
|
|
//开源文件结果集合
|
|
|
|
|
ArrayList<MatchOpenFile> matchOpenFileList = new ArrayList<>(); |
|
|
|
|
//遍历匹配到的开源文件列表
|
|
|
|
|
for (int i = 0; i < fileList.size(); i++) { |
|
|
|
|
String openFileMd5 = (String) fileList.get(i).get("sourceMd5"); |
|
|
|
|
SolrDocument versionObj = md5VersionObjMap.get(openFileMd5); |
|
|
|
|
String versionId = (String) versionObj.get("versionId"); |
|
|
|
|
VersionTree versionInfo = versionIdMap.get(versionId); |
|
|
|
|
if (versionInfo == null) { |
|
|
|
|
log.error("未在versionTree中找到版本信息,openFileMd5:{},versionId:{}",openFileMd5, versionId); |
|
|
|
|
continue; |
|
|
|
|
} |
|
|
|
|
MatchOpenFile matchOpenFile = new MatchOpenFile(); |
|
|
|
|
matchOpenFile.setId(IdGenerator.uuid32()) |
|
|
|
|
.setVersionId(versionId) |
|
|
|
|
.setSourceFilePath((String) versionObj.get("fullPath")) |
|
|
|
|
.setSourceUrl(versionInfo.getDownUrl()) |
|
|
|
|
.setPId(versionInfo.getProId()) |
|
|
|
|
.setPName(versionInfo.getProName()) |
|
|
|
|
.setLicenseType(versionInfo.getLicenseType()) |
|
|
|
|
.setAnalyzeType(AnalysisLevelEnum.FILE_LEVEL.getCode()) |
|
|
|
|
.setVersion(versionInfo.getVersionName()) |
|
|
|
|
.setFeatureSimilarity(100.00f); |
|
|
|
|
//计算被测件和开源文件的文本相似度
|
|
|
|
|
//根据文件的MD5的第一位获取solr库索引名称
|
|
|
|
|
String solrNameIndex =openFileMd5.substring(0, 1) + SolrDBConst.CORE_NAME_SUFFIX_SOURCE_FILE_INFO; |
|
|
|
|
SolrDocumentList sourceFileInfo = solrUtils.query(solrNameIndex, "sourceFileMd5:" + openFileMd5, "sourceContent"); |
|
|
|
|
if (CollectionUtils.isNotEmpty(sourceFileInfo)) { |
|
|
|
|
String openSourceContent = String.valueOf(sourceFileInfo.get(0).getFieldValue("sourceContent")); |
|
|
|
|
//这里存在优化空间,被测件的文件行拆分 可以拿到循环外面
|
|
|
|
|
double similarity = SimilarityUtil.getSimilarityAndSaveRowNum(fileLines, openSourceContent, openLineNum); |
|
|
|
|
matchOpenFile.setOpenRate(new BigDecimal(similarity * 100).setScale(2, RoundingMode.HALF_UP).floatValue()); |
|
|
|
|
//如果找不到源代码,直接将原文开源率置为 100%
|
|
|
|
|
} else { |
|
|
|
|
log.error("找不到源代码,DBname:{},sourceFileMd5:{}", solrNameIndex, openFileMd5); |
|
|
|
|
matchOpenFile.setOpenRate(100.00f); |
|
|
|
|
} |
|
|
|
|
matchOpenFile.setMd5(openFileMd5); |
|
|
|
|
matchOpenFileList.add(matchOpenFile); |
|
|
|
|
} |
|
|
|
|
List<MatchOpenFile> matchOpenFilesRes = calculateOpenRate(openSourceFileList,fileAnalysisRes,sourceCoreName,openLineNum); |
|
|
|
|
|
|
|
|
|
//统计被测件的总体开源率
|
|
|
|
|
|
|
|
|
|
//获取开源率阈值,判断当前文件是否开源
|
|
|
|
|
Integer openRateThreshold = analysisTask.getOpenRateThreshold(); |
|
|
|
|
int openLineCount = openLineNum.size(); |
|
|
|
|
BigDecimal totalLineCount = new BigDecimal(fileLines.size()); |
|
|
|
|
BigDecimal openRate = new BigDecimal(openLineCount).divide(totalLineCount, 4, RoundingMode.HALF_UP).multiply(new BigDecimal(100)); |
|
|
|
|
|
|
|
|
|
BigDecimal openRate = new BigDecimal(openLineNum.size()).divide(new BigDecimal(fileLines.size()), 4, RoundingMode.HALF_UP).multiply(new BigDecimal(100)).setScale(2); |
|
|
|
|
|
|
|
|
|
//超过阈值,则认为当前文件是开源文件
|
|
|
|
|
if (openRate.compareTo(new BigDecimal(openRateThreshold)) > 0) { |
|
|
|
@ -215,18 +173,83 @@ public class FileAnalysisTask extends IAnalysisTask { |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
//修改保存测试文件信息
|
|
|
|
|
analysisFile.setOpenLineCount(openLineCount) |
|
|
|
|
analysisFile.setOpenLineCount(openLineNum.size()) |
|
|
|
|
.setOpenRate(openRate.floatValue()); |
|
|
|
|
|
|
|
|
|
//组装开源信息
|
|
|
|
|
matchOpenFileInfo.setFilePath(analysisFile.getFileUrl()) |
|
|
|
|
|
|
|
|
|
//保存当前文件的开源信息到mongo库中
|
|
|
|
|
MatchOpenFileMongoDto matchOpenFileMongo = new MatchOpenFileMongoDto(); |
|
|
|
|
matchOpenFileMongo.setId(IdGenerator.uuid32()) |
|
|
|
|
.setFilePath(analysisFile.getFileUrl()) |
|
|
|
|
.setFileName(analysisFile.getName()) |
|
|
|
|
.setFeatureSimilarity(100.00f) |
|
|
|
|
.setOpenRate(openRate.floatValue()) |
|
|
|
|
.setOpenType(analysisFile.getOpenType()) |
|
|
|
|
.setOpenRate(analysisFile.getOpenType() ? 100.00f : 0.00f) |
|
|
|
|
.setMatchOpenFile(matchOpenFileList); |
|
|
|
|
.setMatchOpenFile(matchOpenFilesRes); |
|
|
|
|
|
|
|
|
|
//保存当前开源信息数据
|
|
|
|
|
mongoTemplate.insert(matchOpenFileInfo); |
|
|
|
|
mongoTemplate.insert(matchOpenFileMongo); |
|
|
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/** |
|
|
|
|
* 计算当前文件的特征相似度 和 开源率 |
|
|
|
|
* |
|
|
|
|
* @param matchOpenFiles 通过MD5 匹配到的所有开源文件 |
|
|
|
|
* @param sourceFileBaseCoreName 当前文件特征文件的 solr coreName |
|
|
|
|
* @param matchLineRowsNum 所有开源文件匹配到的开源行号列表 |
|
|
|
|
* @return 匹配的开源文件解析后的结果集 |
|
|
|
|
*/ |
|
|
|
|
private List<MatchOpenFile> calculateOpenRate(SolrDocumentList matchOpenFiles, CodeFile fileAnalysisRes, String sourceFileBaseCoreName, Set<Integer> matchLineRowsNum) { |
|
|
|
|
|
|
|
|
|
//匹配的开源文件列表
|
|
|
|
|
List<MatchOpenFile> matchOpenFilesRes = new ArrayList<>(); |
|
|
|
|
|
|
|
|
|
//首先根据文件的MD5查询开源文件的版本ID,和路径信息
|
|
|
|
|
Set<String> openSourceFileMd5s = matchOpenFiles.stream().map(doc -> (String) doc.get("sourceMd5")).collect(Collectors.toSet()); |
|
|
|
|
Map<String, SolrDocument> md5VersionInfoMap = solrUtils.batchQueryVersionIdFromSourceFileBaseBySourceMd5(sourceFileBaseCoreName, openSourceFileMd5s); |
|
|
|
|
|
|
|
|
|
//根据版本ID查询版本的详细信息
|
|
|
|
|
//todo 这里 查询一个版本的信息 需要检索 两个 solr 库 而且还需要检索 versioinTree 后面需要优化
|
|
|
|
|
Set<String> openSourceFileVersionIds = md5VersionInfoMap.values().stream().map(doc -> (String) doc.get("versionId")).collect(Collectors.toSet()); |
|
|
|
|
List<VersionTree> versionTrees = solrUtils.queryBatchVersionInfoByVersionIds(openSourceFileVersionIds); |
|
|
|
|
Map<String, VersionTree> versionIdVersionInfoMap = versionTrees.stream().collect(Collectors.toMap(VersionTree::getVersionId, java.util.function.Function.identity())); |
|
|
|
|
|
|
|
|
|
for (SolrDocument openSourceFile : matchOpenFiles) { |
|
|
|
|
|
|
|
|
|
//开源文件md5
|
|
|
|
|
String openSourceFileMd5 = openSourceFile.getFieldValue("sourceMd5").toString(); |
|
|
|
|
|
|
|
|
|
String openFileContent = solrUtils.getOpenFileContentByMd5(openSourceFileMd5); |
|
|
|
|
|
|
|
|
|
//当前文件的开源率
|
|
|
|
|
Pair<Float, HashSet<Integer>> openRateAndSaveRowNum = SimilarityUtil.getOpenRateAndSaveRowNum(fileAnalysisRes.getSourceFileContent(), openFileContent); |
|
|
|
|
//将当前文件匹配的行号,存储到缓存中,方便统计整体的开源率
|
|
|
|
|
matchLineRowsNum.addAll(openRateAndSaveRowNum.getValue()); |
|
|
|
|
|
|
|
|
|
SolrDocument openEntries = md5VersionInfoMap.get(openSourceFileMd5); |
|
|
|
|
VersionTree versionInfo = versionIdVersionInfoMap.get(openEntries.get("versionId")); |
|
|
|
|
if (versionInfo == null){ |
|
|
|
|
log.error("找不到开源文件版本信息,versionId:{}", openEntries.get("versionId")); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
//组装当前开源文件的开源项目信息
|
|
|
|
|
MatchOpenFile matchOpenFileInfo = new MatchOpenFile(); |
|
|
|
|
matchOpenFileInfo.setPId(versionInfo.getProId()) |
|
|
|
|
.setPName(versionInfo.getProName()) |
|
|
|
|
.setSourceUrl((String) openEntries.get("fullPath")) |
|
|
|
|
.setFeatureSimilarity(100.00f) |
|
|
|
|
.setOpenRate(openRateAndSaveRowNum.getKey()) |
|
|
|
|
.setVersion(versionInfo.getVersionName()) |
|
|
|
|
.setLicenseType(versionInfo.getLicenseType()) |
|
|
|
|
.setAnalyzeType(AnalysisLevelEnum.FILE_LEVEL.getCode()); |
|
|
|
|
matchOpenFilesRes.add(matchOpenFileInfo); |
|
|
|
|
} |
|
|
|
|
return matchOpenFilesRes; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
} |
|
|
|
|