From f86530cee10f7f272e59231e62b05957c3b8221e Mon Sep 17 00:00:00 2001 From: liuzongren <15011502566@163.com> Date: Tue, 19 Nov 2024 13:17:05 +0800 Subject: [PATCH] =?UTF-8?q?1.=E4=BC=98=E5=8C=96=E5=88=86=E6=9E=90=E9=80=BB?= =?UTF-8?q?=E8=BE=91=EF=BC=8C=E5=A4=84=E7=90=86=E5=88=86=E6=9E=90=E8=BF=87?= =?UTF-8?q?=E7=A8=8B=E4=B8=AD=E7=A9=BA=E6=8C=87=E9=92=88=E7=9A=84=E9=97=AE?= =?UTF-8?q?=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../service/impl/AnalysisTaskServiceImpl.java | 1 + .../task/FileAnalysisTask.java | 5 +- .../task/LineAnalysisTask.java | 4 + .../task/PorjectAnalysisTask.java | 74 ++++++++----------- .../composeanalysis/util/SolrUtils.java | 1 + 5 files changed, 42 insertions(+), 43 deletions(-) diff --git a/src/main/java/com/keyware/composeanalysis/service/impl/AnalysisTaskServiceImpl.java b/src/main/java/com/keyware/composeanalysis/service/impl/AnalysisTaskServiceImpl.java index b5171d9..5be75c9 100644 --- a/src/main/java/com/keyware/composeanalysis/service/impl/AnalysisTaskServiceImpl.java +++ b/src/main/java/com/keyware/composeanalysis/service/impl/AnalysisTaskServiceImpl.java @@ -166,6 +166,7 @@ public class AnalysisTaskServiceImpl extends ServiceImpl updateWrapper = new LambdaUpdateWrapper<>(); updateWrapper.eq(AnalysisTask::getId, analysisTask.getId()) @@ -123,19 +125,16 @@ public class PorjectAnalysisTask { //当前文件开源信息存入数据库中 mongoTemplate.insert(projectAssembly); - //更新文件分析的状态 - mongoTemplate.update(FileDataMongoDto.class) - .matching(where("isDirectory").is(false)) - .apply(new Update().set("openType", true) - .set("openRate", 100.00d) - .set("fileAnalysisStatus", FileAnalysisStatusEnum.ANALYSIS_DONE.getCode())) - .all(); - //保存具体开源文件信息 - VersionTree openProjectList = solrUtils.queryVersionTreeByVersionId(openSourceProject.getVersionId()); + VersionTree openProject = solrUtils.queryVersionTreeByVersionId(openSourceProject.getVersionId()); + if (openProject == null) { + throw new BusinessException("查询开源项目信息失败,项目versionId:" + openSourceProject.getVersionId()); + } Query fileQuery = new Query(where("isDirectory").is(false)); List fileDataMongoDtos = mongoTemplate.find(fileQuery, FileDataMongoDto.class); - saveProjectOpenInfo(openProjectList, fileDataMongoDtos); + Set openFileMd5s = fileDataMongoDtos.stream().map(FileDataMongoDto::getMd5).collect(Collectors.toSet()); + //匹配到了整个项目,则将被测件的所有文件设置为开源 + saveProjectOpenInfo(openProject, fileDataMongoDtos,openFileMd5s); return true; } return false; @@ -172,11 +171,10 @@ public class PorjectAnalysisTask { //通过md5去*_SourceFileBase中匹配版本Id Set fileMd5s = data.stream().map(FileDataMongoDto::getMd5).collect(Collectors.toSet()); Map md5VersionObjMap = solrUtils.batchQueryVersionIdFromSourceFileBaseBySourceMd5(currentCoreName, fileMd5s); - if (CollectionUtil.isEmpty(md5VersionObjMap)) { - return; + if (CollectionUtil.isNotEmpty(md5VersionObjMap)) { + //保存结果数据 + saveMatchOpenFileInfo(md5VersionObjMap, data); } - //保存结果数据 - saveMatchOpenFileInfo(md5VersionObjMap, data); } else { //非主流语言的,没有单独的特征库,统一到默认的特征库进行检索 otherLanguageFiles.addAll(data); @@ -192,9 +190,9 @@ public class PorjectAnalysisTask { //暂时忽略字符流md5的匹配,因为大部分都是一样的 Set fileMd5s = otherLanguageFiles.stream().map(FileDataMongoDto::getMd5).collect(Collectors.toSet()); Map md5VersionIdMap = solrUtils.batchQueryVersionIdFromSourceFileBaseBySourceMd5(MongoDBConst.TABLE_NAME_SOURCE_FILE_BASE, fileMd5s); - if (md5VersionIdMap == null || md5VersionIdMap.isEmpty()) { + if (CollUtil.isEmpty(md5VersionIdMap)) { //如果没有匹配到,直接更新文件分析状态已完成,因为非32种语言的文件,无法进行解析,通过源文件的MD5匹配不到,就匹配不到了,无需进行下一步的匹配 - updateFileAnalysisStatus(fileMd5s,true); + updateFileAnalysisStatus(fileMd5s,false); return; } saveMatchOpenFileInfo(md5VersionIdMap, otherLanguageFiles); @@ -226,46 +224,37 @@ public class PorjectAnalysisTask { MatchOpenFileMongoDto matchOpenFile = getMatchOpenFile(versionInfo, fileDataMongoDto, versionObj.get("fullPath").toString()); batchInsertCache.add(matchOpenFile); }); - if (CollectionUtils.isNotEmpty(batchInsertCache)) { mongoTemplate.insert(batchInsertCache, MatchOpenFileMongoDto.class); - //更新文件分析的状态 - updateFileAnalysisStatus(md5VersionIdMap.keySet(),true); + Set openFileIds = batchInsertCache.stream().map(MatchOpenFileMongoDto::getId).collect(Collectors.toSet()); + updateFileAnalysisStatus(openFileIds,true); } } - //匹配到开源项目后,保存各个文件的开源信息 - private void saveProjectOpenInfo(VersionTree versionInfo, List originalFiles) { + //匹配到开源项目后,保存匹配到的各个文件的开源信息 + private void saveProjectOpenInfo(VersionTree versionInfo, List originalFiles,Set matchedFileMd5s) { Map originalMd5ObjMap = originalFiles.stream().collect(Collectors.toMap(FileDataMongoDto::getMd5, Function.identity(), (key1, key2) -> key1)); - Set matchedMd5s = new HashSet<>(); List batchInsertCache = new ArrayList<>(); List fileInfos = versionInfo.getDirTree(); + //todo 这里会出现重复的md5数据,后续需要处理 + Map md5ToFullPathMap = fileInfos.stream().collect(Collectors.toMap(VersionTreeNode::getSourceFileMd5, VersionTreeNode::getFullPath, (key1, key2) -> key1)); - fileInfos.forEach(versionTreeNodeObj->{ - String openFileMd5 = versionTreeNodeObj.getSourceFileMd5(); - //看是否和被测件的md5匹配 - if (originalMd5ObjMap.keySet().contains(openFileMd5)) { - //匹配的文件只保存一次 - if (!matchedMd5s.contains(openFileMd5)) { - MatchOpenFileMongoDto matchOpenFile = getMatchOpenFile(versionInfo, originalMd5ObjMap.get(openFileMd5),versionTreeNodeObj.getFullPath()); - batchInsertCache.add(matchOpenFile); - matchedMd5s.add(openFileMd5); - } - } + matchedFileMd5s.forEach(fileMd5 -> { + //匹配的文件只保存一次 + MatchOpenFileMongoDto matchOpenFile = getMatchOpenFile(versionInfo, originalMd5ObjMap.get(fileMd5), md5ToFullPathMap.get(fileMd5)); + batchInsertCache.add(matchOpenFile); //分批保存,防止单个项目太大,撑爆内存 或 超过 mongodb最大插入数 if (batchInsertCache.size() >= 1000) { mongoTemplate.insert(batchInsertCache, MatchOpenFileMongoDto.class); batchInsertCache.clear(); } }); - - if (batchInsertCache.size() != 0) { + if (!batchInsertCache.isEmpty()) { mongoTemplate.insert(batchInsertCache, MatchOpenFileMongoDto.class); } - //更新文件分析的状态 - updateFileAnalysisStatus(matchedMd5s,true); + updateFileAnalysisStatus(matchedFileMd5s,true); } //获取匹配到的开源文件信息 @@ -322,18 +311,19 @@ public class PorjectAnalysisTask { VersionTree openProject = solrUtils.queryVersionTree(queryStr); log.info("query versionTree cost:{}s", (System.currentTimeMillis() - startTime) / 1000); //如果存在没有匹配到开源数据的情况,直接退出循环匹配 - if (openProject == null){ + if (openProject == null) { break; } - //保存匹配的开源文件信息 - saveProjectOpenInfo(openProject, projectFiles); - //获取开源项目的所有文件md5集合 List openFilesMd5 = openProject.getDirTree().stream().map(VersionTreeNode::getSourceFileMd5).collect(Collectors.toList()); //获取被测件和开源项目相同的文件 Set matchedFiles = unMatchedFileMd5s.stream().filter(item -> openFilesMd5.contains(item)).collect(Collectors.toSet()); + //保存匹配的开源文件信息 + //todo 这里会重复保存数据, 需要优化 + saveProjectOpenInfo(openProject, projectFiles, matchedFiles); + //保存已匹配的文件md5,后续需要统计整体的开源率 matchedFileMd5Set.addAll(matchedFiles); diff --git a/src/main/java/com/keyware/composeanalysis/util/SolrUtils.java b/src/main/java/com/keyware/composeanalysis/util/SolrUtils.java index 5152898..fce77e1 100644 --- a/src/main/java/com/keyware/composeanalysis/util/SolrUtils.java +++ b/src/main/java/com/keyware/composeanalysis/util/SolrUtils.java @@ -188,6 +188,7 @@ public class SolrUtils { if (openSourceContent == null) { log.error("根据开源文件MD5:{}未找到对应的开源文件源码", openSourceFileMd5); + return ""; } return openSourceContent.getFieldValue("sourceContent").toString(); }