|
|
|
@ -1,9 +1,12 @@ |
|
|
|
|
package com.keyware.composeanalysis.task; |
|
|
|
|
|
|
|
|
|
import cn.hutool.core.collection.CollUtil; |
|
|
|
|
import cn.hutool.core.collection.CollectionUtil; |
|
|
|
|
import cn.hutool.core.io.FileUtil; |
|
|
|
|
import com.baomidou.mybatisplus.core.conditions.update.LambdaUpdateWrapper; |
|
|
|
|
import com.google.common.collect.Sets; |
|
|
|
|
import com.keyware.common.constant.enums.AnalysisStatusEnum; |
|
|
|
|
import com.keyware.common.exception.BusinessException; |
|
|
|
|
import com.keyware.composeanalysis.constant.FixedValue; |
|
|
|
|
import com.keyware.composeanalysis.constant.MongoDBConst; |
|
|
|
|
import com.keyware.composeanalysis.constant.enums.AnalysisLevelEnum; |
|
|
|
@ -75,7 +78,7 @@ public class PorjectAnalysisTask { |
|
|
|
|
if (!matchedPrject) { |
|
|
|
|
List<FileDataMongoDto> unMatchedFiles = matchByAllFilesMd5(); |
|
|
|
|
|
|
|
|
|
//剩余没有匹配文件,用文件的md5去匹配solr库的versionTree
|
|
|
|
|
//剩余没有匹配文件,用文件的md5去批量匹配solr库的versionTree
|
|
|
|
|
if (CollectionUtils.isNotEmpty(unMatchedFiles)) { |
|
|
|
|
matchByFileMd5s(unMatchedFiles); |
|
|
|
|
} |
|
|
|
@ -83,10 +86,12 @@ public class PorjectAnalysisTask { |
|
|
|
|
//todo 如果整体耗时较长,將matchOpenFileInfo存储到数据库的逻辑修改成异步的
|
|
|
|
|
log.info("项目级分析完成,用时:" + (System.currentTimeMillis() - startTime) / 1000 + "s"); |
|
|
|
|
} catch (Exception e) { |
|
|
|
|
AnalysisLogUtil.insert(mongoTemplate, "成分分析失败:" + e.getStackTrace()); |
|
|
|
|
AnalysisLogUtil.insert(mongoTemplate, "成分分析失败:" + e.getMessage()); |
|
|
|
|
log.error("项目级分析失败,项目名称:" + analysisTask.getFileName(), e); |
|
|
|
|
analysisTask.setAnalysisStatus(AnalysisStatusEnum.FAIL_ANALYSIS.getCode()); |
|
|
|
|
analysisService.updateById(analysisTask); |
|
|
|
|
LambdaUpdateWrapper<AnalysisTask> updateWrapper = new LambdaUpdateWrapper<>(); |
|
|
|
|
updateWrapper.eq(AnalysisTask::getId, analysisTask.getId()) |
|
|
|
|
.set(AnalysisTask::getAnalysisStatus, AnalysisStatusEnum.FAIL_ANALYSIS.getCode()); |
|
|
|
|
analysisService.update(null,updateWrapper); |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
@ -120,19 +125,16 @@ public class PorjectAnalysisTask { |
|
|
|
|
//当前文件开源信息存入数据库中
|
|
|
|
|
mongoTemplate.insert(projectAssembly); |
|
|
|
|
|
|
|
|
|
//更新文件分析的状态
|
|
|
|
|
mongoTemplate.update(FileDataMongoDto.class) |
|
|
|
|
.matching(where("isDirectory").is(false)) |
|
|
|
|
.apply(new Update().set("openType", true) |
|
|
|
|
.set("openRate", 100.00d) |
|
|
|
|
.set("fileAnalysisStatus", FileAnalysisStatusEnum.ANALYSIS_DONE.getCode())) |
|
|
|
|
.all(); |
|
|
|
|
|
|
|
|
|
//保存具体开源文件信息
|
|
|
|
|
VersionTree openProjectList = solrUtils.queryVersionTreeByVersionId(openSourceProject.getVersionId()); |
|
|
|
|
VersionTree openProject = solrUtils.queryVersionTreeByVersionId(openSourceProject.getVersionId()); |
|
|
|
|
if (openProject == null) { |
|
|
|
|
throw new BusinessException("查询开源项目信息失败,项目versionId:" + openSourceProject.getVersionId()); |
|
|
|
|
} |
|
|
|
|
Query fileQuery = new Query(where("isDirectory").is(false)); |
|
|
|
|
List<FileDataMongoDto> fileDataMongoDtos = mongoTemplate.find(fileQuery, FileDataMongoDto.class); |
|
|
|
|
saveProjectOpenInfo(openProjectList, fileDataMongoDtos); |
|
|
|
|
Set<String> openFileMd5s = fileDataMongoDtos.stream().map(FileDataMongoDto::getMd5).collect(Collectors.toSet()); |
|
|
|
|
//匹配到了整个项目,则将被测件的所有文件设置为开源
|
|
|
|
|
saveProjectOpenInfo(openProject, fileDataMongoDtos,openFileMd5s); |
|
|
|
|
return true; |
|
|
|
|
} |
|
|
|
|
return false; |
|
|
|
@ -169,11 +171,10 @@ public class PorjectAnalysisTask { |
|
|
|
|
//通过md5去*_SourceFileBase中匹配版本Id
|
|
|
|
|
Set<String> fileMd5s = data.stream().map(FileDataMongoDto::getMd5).collect(Collectors.toSet()); |
|
|
|
|
Map<String, SolrDocument> md5VersionObjMap = solrUtils.batchQueryVersionIdFromSourceFileBaseBySourceMd5(currentCoreName, fileMd5s); |
|
|
|
|
if (CollectionUtil.isEmpty(md5VersionObjMap)) { |
|
|
|
|
return; |
|
|
|
|
if (CollectionUtil.isNotEmpty(md5VersionObjMap)) { |
|
|
|
|
//保存结果数据
|
|
|
|
|
saveMatchOpenFileInfo(md5VersionObjMap, data); |
|
|
|
|
} |
|
|
|
|
//保存结果数据
|
|
|
|
|
saveMatchOpenFileInfo(md5VersionObjMap, data); |
|
|
|
|
} else { |
|
|
|
|
//非主流语言的,没有单独的特征库,统一到默认的特征库进行检索
|
|
|
|
|
otherLanguageFiles.addAll(data); |
|
|
|
@ -189,9 +190,9 @@ public class PorjectAnalysisTask { |
|
|
|
|
//暂时忽略字符流md5的匹配,因为大部分都是一样的
|
|
|
|
|
Set<String> fileMd5s = otherLanguageFiles.stream().map(FileDataMongoDto::getMd5).collect(Collectors.toSet()); |
|
|
|
|
Map<String, SolrDocument> md5VersionIdMap = solrUtils.batchQueryVersionIdFromSourceFileBaseBySourceMd5(MongoDBConst.TABLE_NAME_SOURCE_FILE_BASE, fileMd5s); |
|
|
|
|
if (md5VersionIdMap == null || md5VersionIdMap.isEmpty()) { |
|
|
|
|
if (CollUtil.isEmpty(md5VersionIdMap)) { |
|
|
|
|
//如果没有匹配到,直接更新文件分析状态已完成,因为非32种语言的文件,无法进行解析,通过源文件的MD5匹配不到,就匹配不到了,无需进行下一步的匹配
|
|
|
|
|
updateFileAnalysisStatus(fileMd5s,true); |
|
|
|
|
updateFileAnalysisStatus(fileMd5s,false); |
|
|
|
|
return; |
|
|
|
|
} |
|
|
|
|
saveMatchOpenFileInfo(md5VersionIdMap, otherLanguageFiles); |
|
|
|
@ -223,46 +224,37 @@ public class PorjectAnalysisTask { |
|
|
|
|
MatchOpenFileMongoDto matchOpenFile = getMatchOpenFile(versionInfo, fileDataMongoDto, versionObj.get("fullPath").toString()); |
|
|
|
|
batchInsertCache.add(matchOpenFile); |
|
|
|
|
}); |
|
|
|
|
|
|
|
|
|
if (CollectionUtils.isNotEmpty(batchInsertCache)) { |
|
|
|
|
mongoTemplate.insert(batchInsertCache, MatchOpenFileMongoDto.class); |
|
|
|
|
//更新文件分析的状态
|
|
|
|
|
updateFileAnalysisStatus(md5VersionIdMap.keySet(),true); |
|
|
|
|
Set<String> openFileIds = batchInsertCache.stream().map(MatchOpenFileMongoDto::getId).collect(Collectors.toSet()); |
|
|
|
|
updateFileAnalysisStatus(openFileIds,true); |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
//匹配到开源项目后,保存各个文件的开源信息
|
|
|
|
|
private void saveProjectOpenInfo(VersionTree versionInfo, List<FileDataMongoDto> originalFiles) { |
|
|
|
|
//匹配到开源项目后,保存匹配到的各个文件的开源信息
|
|
|
|
|
private void saveProjectOpenInfo(VersionTree versionInfo, List<FileDataMongoDto> originalFiles,Set<String> matchedFileMd5s) { |
|
|
|
|
Map<String, FileDataMongoDto> originalMd5ObjMap = originalFiles.stream().collect(Collectors.toMap(FileDataMongoDto::getMd5, Function.identity(), (key1, key2) -> key1)); |
|
|
|
|
Set<String> matchedMd5s = new HashSet<>(); |
|
|
|
|
List<MatchOpenFileMongoDto> batchInsertCache = new ArrayList<>(); |
|
|
|
|
List<VersionTreeNode> fileInfos = versionInfo.getDirTree(); |
|
|
|
|
//todo 这里会出现重复的md5数据,后续需要处理
|
|
|
|
|
Map<String, String> md5ToFullPathMap = fileInfos.stream().collect(Collectors.toMap(VersionTreeNode::getSourceFileMd5, VersionTreeNode::getFullPath, (key1, key2) -> key1)); |
|
|
|
|
|
|
|
|
|
fileInfos.forEach(versionTreeNodeObj->{ |
|
|
|
|
String openFileMd5 = versionTreeNodeObj.getSourceFileMd5(); |
|
|
|
|
//看是否和被测件的md5匹配
|
|
|
|
|
if (originalMd5ObjMap.keySet().contains(openFileMd5)) { |
|
|
|
|
//匹配的文件只保存一次
|
|
|
|
|
if (!matchedMd5s.contains(openFileMd5)) { |
|
|
|
|
MatchOpenFileMongoDto matchOpenFile = getMatchOpenFile(versionInfo, originalMd5ObjMap.get(openFileMd5),versionTreeNodeObj.getFullPath()); |
|
|
|
|
batchInsertCache.add(matchOpenFile); |
|
|
|
|
matchedMd5s.add(openFileMd5); |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
matchedFileMd5s.forEach(fileMd5 -> { |
|
|
|
|
//匹配的文件只保存一次
|
|
|
|
|
MatchOpenFileMongoDto matchOpenFile = getMatchOpenFile(versionInfo, originalMd5ObjMap.get(fileMd5), md5ToFullPathMap.get(fileMd5)); |
|
|
|
|
batchInsertCache.add(matchOpenFile); |
|
|
|
|
//分批保存,防止单个项目太大,撑爆内存 或 超过 mongodb最大插入数
|
|
|
|
|
if (batchInsertCache.size() >= 1000) { |
|
|
|
|
mongoTemplate.insert(batchInsertCache, MatchOpenFileMongoDto.class); |
|
|
|
|
batchInsertCache.clear(); |
|
|
|
|
} |
|
|
|
|
}); |
|
|
|
|
|
|
|
|
|
if (batchInsertCache.size() != 0) { |
|
|
|
|
if (!batchInsertCache.isEmpty()) { |
|
|
|
|
mongoTemplate.insert(batchInsertCache, MatchOpenFileMongoDto.class); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
//更新文件分析的状态
|
|
|
|
|
updateFileAnalysisStatus(matchedMd5s,true); |
|
|
|
|
updateFileAnalysisStatus(matchedFileMd5s,true); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
//获取匹配到的开源文件信息
|
|
|
|
@ -319,18 +311,19 @@ public class PorjectAnalysisTask { |
|
|
|
|
VersionTree openProject = solrUtils.queryVersionTree(queryStr); |
|
|
|
|
log.info("query versionTree cost:{}s", (System.currentTimeMillis() - startTime) / 1000); |
|
|
|
|
//如果存在没有匹配到开源数据的情况,直接退出循环匹配
|
|
|
|
|
if (openProject == null){ |
|
|
|
|
if (openProject == null) { |
|
|
|
|
break; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
//保存匹配的开源文件信息
|
|
|
|
|
saveProjectOpenInfo(openProject, projectFiles); |
|
|
|
|
|
|
|
|
|
//获取开源项目的所有文件md5集合
|
|
|
|
|
List<String> openFilesMd5 = openProject.getDirTree().stream().map(VersionTreeNode::getSourceFileMd5).collect(Collectors.toList()); |
|
|
|
|
//获取被测件和开源项目相同的文件
|
|
|
|
|
Set<String> matchedFiles = unMatchedFileMd5s.stream().filter(item -> openFilesMd5.contains(item)).collect(Collectors.toSet()); |
|
|
|
|
|
|
|
|
|
//保存匹配的开源文件信息
|
|
|
|
|
//todo 这里会重复保存数据, 需要优化
|
|
|
|
|
saveProjectOpenInfo(openProject, projectFiles, matchedFiles); |
|
|
|
|
|
|
|
|
|
//保存已匹配的文件md5,后续需要统计整体的开源率
|
|
|
|
|
matchedFileMd5Set.addAll(matchedFiles); |
|
|
|
|
|
|
|
|
|