Compare commits

...

4 Commits

  1. 36
      pom.xml
  2. 4
      src/main/java/com/keyware/composeanalysis/ComposeAnalysisApplication.java
  3. 13
      src/main/java/com/keyware/composeanalysis/service/impl/AnalysisTaskServiceImpl.java
  4. 9
      src/main/java/com/keyware/composeanalysis/task/CodeBlockAnalysisTask.java
  5. 5
      src/main/java/com/keyware/composeanalysis/task/FileAnalysisTask.java
  6. 4
      src/main/java/com/keyware/composeanalysis/task/FunctionAnalysisTask.java
  7. 4
      src/main/java/com/keyware/composeanalysis/task/LineAnalysisTask.java
  8. 75
      src/main/java/com/keyware/composeanalysis/task/PorjectAnalysisTask.java
  9. 2
      src/main/java/com/keyware/composeanalysis/util/SolrUtils.java
  10. 42
      src/main/resources/application-dev.yaml
  11. 52
      src/main/resources/application.yaml

@ -20,24 +20,24 @@
<artifactId>spring-boot-starter-web</artifactId> <artifactId>spring-boot-starter-web</artifactId>
</dependency> </dependency>
<!-- nacos 服务的注册发现 --> <!-- &lt;!&ndash; nacos 服务的注册发现 &ndash;&gt;-->
<dependency> <!-- <dependency>-->
<groupId>com.alibaba.cloud</groupId> <!-- <groupId>com.alibaba.cloud</groupId>-->
<artifactId>spring-cloud-starter-alibaba-nacos-discovery</artifactId> <!-- <artifactId>spring-cloud-starter-alibaba-nacos-discovery</artifactId>-->
</dependency> <!-- </dependency>-->
<!--客户端负载均衡loadbalancer--> <!-- &lt;!&ndash;客户端负载均衡loadbalancer&ndash;&gt;-->
<dependency> <!-- <dependency>-->
<groupId>org.springframework.cloud</groupId> <!-- <groupId>org.springframework.cloud</groupId>-->
<artifactId>spring-cloud-starter-loadbalancer</artifactId> <!-- <artifactId>spring-cloud-starter-loadbalancer</artifactId>-->
</dependency> <!-- </dependency>-->
<!-- nacos 配置中心做依赖管理 --> <!-- &lt;!&ndash; nacos 配置中心做依赖管理 &ndash;&gt;-->
<dependency> <!-- <dependency>-->
<groupId>com.alibaba.cloud</groupId> <!-- <groupId>com.alibaba.cloud</groupId>-->
<artifactId>spring-cloud-starter-alibaba-nacos-config</artifactId> <!-- <artifactId>spring-cloud-starter-alibaba-nacos-config</artifactId>-->
</dependency> <!-- </dependency>-->
<!-- mongodb --> <!-- mongodb -->
<dependency> <dependency>

@ -3,14 +3,10 @@ package com.keyware.composeanalysis;
import org.mybatis.spring.annotation.MapperScan; import org.mybatis.spring.annotation.MapperScan;
import org.springframework.boot.SpringApplication; import org.springframework.boot.SpringApplication;
import org.springframework.boot.autoconfigure.SpringBootApplication; import org.springframework.boot.autoconfigure.SpringBootApplication;
import org.springframework.cloud.client.discovery.EnableDiscoveryClient;
import org.springframework.cloud.context.config.annotation.RefreshScope;
import org.springframework.scheduling.annotation.EnableAsync; import org.springframework.scheduling.annotation.EnableAsync;
@MapperScan("com.keyware.composeanalysis.mapper") @MapperScan("com.keyware.composeanalysis.mapper")
@SpringBootApplication @SpringBootApplication
@EnableDiscoveryClient
@RefreshScope
@EnableAsync @EnableAsync
public class ComposeAnalysisApplication { public class ComposeAnalysisApplication {

@ -2,6 +2,7 @@ package com.keyware.composeanalysis.service.impl;
import cn.hutool.core.date.DateUnit; import cn.hutool.core.date.DateUnit;
import cn.hutool.core.date.DateUtil; import cn.hutool.core.date.DateUtil;
import com.baomidou.mybatisplus.core.conditions.update.LambdaUpdateWrapper;
import com.baomidou.mybatisplus.extension.service.impl.ServiceImpl; import com.baomidou.mybatisplus.extension.service.impl.ServiceImpl;
import com.keyware.common.constant.RedisConst; import com.keyware.common.constant.RedisConst;
import com.keyware.common.constant.enums.AnalysisStatusEnum; import com.keyware.common.constant.enums.AnalysisStatusEnum;
@ -69,8 +70,10 @@ public class AnalysisTaskServiceImpl extends ServiceImpl<AnalyzeTaskMapper, Anal
retryGetDecompressionFlag(analysisTask); retryGetDecompressionFlag(analysisTask);
//开始分析前,将成分分析的状态为 进行中 //开始分析前,将成分分析的状态为 进行中
analysisTask.setComposeFlag(AnalysisStatusEnum.ANALYSISING.getCode()); LambdaUpdateWrapper<AnalysisTask> updateWrapper = new LambdaUpdateWrapper<>();
this.updateById(analysisTask); updateWrapper.eq(AnalysisTask::getId, analysisTask.getId())
.set(AnalysisTask::getComposeFlag, AnalysisStatusEnum.ANALYSISING.getCode());
this.update(null,updateWrapper);
MongoTemplate mongoTemplate = new MongoTemplate(mongoClient, MongoDBConst.DB_NAME_PREFIX + analysisTask.getId()); MongoTemplate mongoTemplate = new MongoTemplate(mongoClient, MongoDBConst.DB_NAME_PREFIX + analysisTask.getId());
AnalysisLogUtil.insert(mongoTemplate, "【成分分析】开始:" + analysisTask.getFileName()); AnalysisLogUtil.insert(mongoTemplate, "【成分分析】开始:" + analysisTask.getFileName());
@ -85,8 +88,9 @@ public class AnalysisTaskServiceImpl extends ServiceImpl<AnalyzeTaskMapper, Anal
checkProjectIfOpen(mongoTemplate,analysisTask); checkProjectIfOpen(mongoTemplate,analysisTask);
//修改成分分析状态为完成 //修改成分分析状态为完成
analysisTask.setComposeFlag(AnalysisStatusEnum.ANALYSIS_DONE.getCode()); updateWrapper.set(AnalysisTask::getComposeFlag, AnalysisStatusEnum.ANALYSIS_DONE.getCode())
this.updateById(analysisTask); .set(AnalysisTask::getOpenType, analysisTask.getOpenType());
this.update(null,updateWrapper);
//插入分析日志 //插入分析日志
AnalysisLogUtil.insert(mongoTemplate,"【成分分析】已完成,耗时:"+ DateUtil.between(analysisTask.getAnalysisStartTime(),DateUtil.date(), DateUnit.SECOND) +"秒"); AnalysisLogUtil.insert(mongoTemplate,"【成分分析】已完成,耗时:"+ DateUtil.between(analysisTask.getAnalysisStartTime(),DateUtil.date(), DateUnit.SECOND) +"秒");
@ -162,6 +166,7 @@ public class AnalysisTaskServiceImpl extends ServiceImpl<AnalyzeTaskMapper, Anal
//引入解压缩有可能会很慢,这里添加重试机制,最多重试6次,60s //引入解压缩有可能会很慢,这里添加重试机制,最多重试6次,60s
//todo 主程序 后续可以添加压缩标志位
private boolean retryGetDecompressionFlag(AnalysisTask analysisTask) { private boolean retryGetDecompressionFlag(AnalysisTask analysisTask) {
int retryCount = 0; int retryCount = 0;
while (retryCount < 60) { while (retryCount < 60) {

@ -172,7 +172,6 @@ public class CodeBlockAnalysisTask extends IAnalysisTask {
return; return;
} }
//保存所有匹配的特征代码块MD5信息,方便统计总的匹配行数 //保存所有匹配的特征代码块MD5信息,方便统计总的匹配行数
Set<String> matchingTraitLineSet = new HashSet<>(); Set<String> matchingTraitLineSet = new HashSet<>();
@ -190,8 +189,11 @@ public class CodeBlockAnalysisTask extends IAnalysisTask {
matchCodeBlockLineCount += traitsFeatureMd5AndFeatureLineNumMap.get(matchFeatureMd5); matchCodeBlockLineCount += traitsFeatureMd5AndFeatureLineNumMap.get(matchFeatureMd5);
} }
//特征行总数
int totalFeatureLineCount = traitsFeatureMd5AndFeatureLineNumMap.values().stream().mapToInt(Integer::intValue).sum();
//计算文件的总体特征相似度 //计算文件的总体特征相似度
BigDecimal featureSimilarity = new BigDecimal(matchCodeBlockLineCount).divide(new BigDecimal(analysisFile.getCodeRowNum()), 4, RoundingMode.HALF_UP).multiply(new BigDecimal(100)).setScale(2); BigDecimal featureSimilarity = new BigDecimal(matchCodeBlockLineCount).divide(new BigDecimal(totalFeatureLineCount), 4, RoundingMode.HALF_UP).multiply(new BigDecimal(100)).setScale(2);
//计算文件的总体开源率 //计算文件的总体开源率
BigDecimal openRate = new BigDecimal(matchedLineRowsNum.size()).divide(new BigDecimal(analysisFile.getCodeRowNum()), 4, RoundingMode.HALF_UP).multiply(new BigDecimal(100)).setScale(2); BigDecimal openRate = new BigDecimal(matchedLineRowsNum.size()).divide(new BigDecimal(analysisFile.getCodeRowNum()), 4, RoundingMode.HALF_UP).multiply(new BigDecimal(100)).setScale(2);
@ -344,12 +346,13 @@ public class CodeBlockAnalysisTask extends IAnalysisTask {
} }
/** /**
* 或者特征代码块的md5 当前md5包含的特征行数 * 获取每个特征代码块包含的特征行数
* *
* @param codeBlockInfos * @param codeBlockInfos
* @return * @return
*/ */
private Map<String, Integer> getTraitsFeatureMd5AndFeatureLineNumMap(List<LineModel> codeBlockInfos) { private Map<String, Integer> getTraitsFeatureMd5AndFeatureLineNumMap(List<LineModel> codeBlockInfos) {
//按照特征代码块的md5进行分组
Map<String, List<LineModel>> traitMd5GroupMap = codeBlockInfos.stream().collect(Collectors.groupingBy(LineModel::getTraitLineMd5)); Map<String, List<LineModel>> traitMd5GroupMap = codeBlockInfos.stream().collect(Collectors.groupingBy(LineModel::getTraitLineMd5));
Map<String, Integer> resultMap = new HashMap<>(); Map<String, Integer> resultMap = new HashMap<>();
for (String traitMd5 : traitMd5GroupMap.keySet()) { for (String traitMd5 : traitMd5GroupMap.keySet()) {

@ -4,6 +4,7 @@ import cn.hutool.core.collection.CollUtil;
import cn.hutool.core.io.FileUtil; import cn.hutool.core.io.FileUtil;
import cn.hutool.core.lang.Pair; import cn.hutool.core.lang.Pair;
import com.keyware.common.constant.enums.AnalysisStatusEnum; import com.keyware.common.constant.enums.AnalysisStatusEnum;
import com.keyware.common.exception.BusinessException;
import com.keyware.composeanalysis.constant.FixedValue; import com.keyware.composeanalysis.constant.FixedValue;
import com.keyware.composeanalysis.constant.RedisConst; import com.keyware.composeanalysis.constant.RedisConst;
import com.keyware.composeanalysis.constant.enums.AnalysisLevelEnum; import com.keyware.composeanalysis.constant.enums.AnalysisLevelEnum;
@ -82,6 +83,9 @@ public class FileAnalysisTask extends IAnalysisTask {
String featureCoreName = FixedValue.SUFFIX_SOLR_FILE.get(analysisFile.getSuffix()); String featureCoreName = FixedValue.SUFFIX_SOLR_FILE.get(analysisFile.getSuffix());
//根据文件名称,获取文件解析器 //根据文件名称,获取文件解析器
Analysis analysis = AnalysisFactory.getAnalysis(fileName); Analysis analysis = AnalysisFactory.getAnalysis(fileName);
if (analysis == null){
throw new BusinessException("获取文件解析器失败,文件名称:" + fileName);
}
//如果文件大小超过3M,则不进行文件级行级特征提取 //如果文件大小超过3M,则不进行文件级行级特征提取
CodeFile codeFile = analysis.analysisFile(analysisFile.getFileUrl(), "1", "0"); CodeFile codeFile = analysis.analysisFile(analysisFile.getFileUrl(), "1", "0");
@ -97,7 +101,6 @@ public class FileAnalysisTask extends IAnalysisTask {
.matching(where("_id").is(analysisFile.getId())) .matching(where("_id").is(analysisFile.getId()))
.replaceWith(analysisFile) .replaceWith(analysisFile)
.findAndReplace(); .findAndReplace();
AnalysisLogUtil.insert(mongoTemplate, "【文件级分析】成功" + fileName); AnalysisLogUtil.insert(mongoTemplate, "【文件级分析】成功" + fileName);
} catch (Exception e) { } catch (Exception e) {
AnalysisLogUtil.insertErrorInfo(mongoTemplate, "【文件级】分析失败" + fileName, e); AnalysisLogUtil.insertErrorInfo(mongoTemplate, "【文件级】分析失败" + fileName, e);

@ -291,7 +291,9 @@ public class FunctionAnalysisTask extends IAnalysisTask {
SolrDocument openEntries = md5VersionInfoMap.get(openSourceFileMd5); SolrDocument openEntries = md5VersionInfoMap.get(openSourceFileMd5);
VersionTree versionInfo = versionIdVersionInfoMap.get(openEntries.get("versionId")); VersionTree versionInfo = versionIdVersionInfoMap.get(openEntries.get("versionId"));
String openFilePath = (String) md5VersionInfoMap.get(openSourceFileMd5).getFieldValue("fullPath"); String openFilePath = (String) md5VersionInfoMap.get(openSourceFileMd5).getFieldValue("fullPath");
if (versionInfo == null){
throw new BusinessException("根据版本ID,未查询到相关的版本信息。versionId:" + openEntries.get("versionId"));
}
//组装当前开源文件的开源项目信息 //组装当前开源文件的开源项目信息
MatchOpenFile matchOpenFileInfo = new MatchOpenFile(); MatchOpenFile matchOpenFileInfo = new MatchOpenFile();
matchOpenFileInfo.setId(IdGenerator.uuid32()) matchOpenFileInfo.setId(IdGenerator.uuid32())

@ -258,6 +258,10 @@ public class LineAnalysisTask extends IAnalysisTask {
BigDecimal featureSimilarity = new BigDecimal(currentFileMatchFeatureLineCount).divide(new BigDecimal(lineFeatureList.size()), 4, RoundingMode.HALF_UP).multiply(new BigDecimal(100)).setScale(2); BigDecimal featureSimilarity = new BigDecimal(currentFileMatchFeatureLineCount).divide(new BigDecimal(lineFeatureList.size()), 4, RoundingMode.HALF_UP).multiply(new BigDecimal(100)).setScale(2);
SolrDocument openEntries = md5VersionInfoMap.get(openSourceFileMd5); SolrDocument openEntries = md5VersionInfoMap.get(openSourceFileMd5);
if (openEntries == null){
log.error("根据开源文件MD5,未查询到相关的开源文件版本信息,md5:{}", openSourceFileMd5);
continue;
}
VersionTree versionInfo = versionIdVersionInfoMap.get(openEntries.get("versionId")); VersionTree versionInfo = versionIdVersionInfoMap.get(openEntries.get("versionId"));
if (versionInfo == null) { if (versionInfo == null) {
log.error("根据版本ID,未查询到相关的版本信息。versionId:{}", openEntries.get("versionId")); log.error("根据版本ID,未查询到相关的版本信息。versionId:{}", openEntries.get("versionId"));

@ -1,9 +1,12 @@
package com.keyware.composeanalysis.task; package com.keyware.composeanalysis.task;
import cn.hutool.core.collection.CollUtil;
import cn.hutool.core.collection.CollectionUtil; import cn.hutool.core.collection.CollectionUtil;
import cn.hutool.core.io.FileUtil; import cn.hutool.core.io.FileUtil;
import com.baomidou.mybatisplus.core.conditions.update.LambdaUpdateWrapper;
import com.google.common.collect.Sets; import com.google.common.collect.Sets;
import com.keyware.common.constant.enums.AnalysisStatusEnum; import com.keyware.common.constant.enums.AnalysisStatusEnum;
import com.keyware.common.exception.BusinessException;
import com.keyware.composeanalysis.constant.FixedValue; import com.keyware.composeanalysis.constant.FixedValue;
import com.keyware.composeanalysis.constant.MongoDBConst; import com.keyware.composeanalysis.constant.MongoDBConst;
import com.keyware.composeanalysis.constant.enums.AnalysisLevelEnum; import com.keyware.composeanalysis.constant.enums.AnalysisLevelEnum;
@ -75,7 +78,7 @@ public class PorjectAnalysisTask {
if (!matchedPrject) { if (!matchedPrject) {
List<FileDataMongoDto> unMatchedFiles = matchByAllFilesMd5(); List<FileDataMongoDto> unMatchedFiles = matchByAllFilesMd5();
//剩余没有匹配文件,用文件的md5去匹配solr库的versionTree //剩余没有匹配文件,用文件的md5去批量匹配solr库的versionTree
if (CollectionUtils.isNotEmpty(unMatchedFiles)) { if (CollectionUtils.isNotEmpty(unMatchedFiles)) {
matchByFileMd5s(unMatchedFiles); matchByFileMd5s(unMatchedFiles);
} }
@ -83,10 +86,12 @@ public class PorjectAnalysisTask {
//todo 如果整体耗时较长,將matchOpenFileInfo存储到数据库的逻辑修改成异步的 //todo 如果整体耗时较长,將matchOpenFileInfo存储到数据库的逻辑修改成异步的
log.info("项目级分析完成,用时:" + (System.currentTimeMillis() - startTime) / 1000 + "s"); log.info("项目级分析完成,用时:" + (System.currentTimeMillis() - startTime) / 1000 + "s");
} catch (Exception e) { } catch (Exception e) {
AnalysisLogUtil.insert(mongoTemplate, "成分分析失败:" + e.getStackTrace()); AnalysisLogUtil.insert(mongoTemplate, "成分分析失败:" + e.getMessage());
log.error("项目级分析失败,项目名称:" + analysisTask.getFileName(), e); log.error("项目级分析失败,项目名称:" + analysisTask.getFileName(), e);
analysisTask.setAnalysisStatus(AnalysisStatusEnum.FAIL_ANALYSIS.getCode()); LambdaUpdateWrapper<AnalysisTask> updateWrapper = new LambdaUpdateWrapper<>();
analysisService.updateById(analysisTask); updateWrapper.eq(AnalysisTask::getId, analysisTask.getId())
.set(AnalysisTask::getAnalysisStatus, AnalysisStatusEnum.FAIL_ANALYSIS.getCode());
analysisService.update(null,updateWrapper);
} }
} }
@ -120,19 +125,16 @@ public class PorjectAnalysisTask {
//当前文件开源信息存入数据库中 //当前文件开源信息存入数据库中
mongoTemplate.insert(projectAssembly); mongoTemplate.insert(projectAssembly);
//更新文件分析的状态
mongoTemplate.update(FileDataMongoDto.class)
.matching(where("isDirectory").is(false))
.apply(new Update().set("openType", true)
.set("openRate", 100.00d)
.set("fileAnalysisStatus", FileAnalysisStatusEnum.ANALYSIS_DONE.getCode()))
.all();
//保存具体开源文件信息 //保存具体开源文件信息
VersionTree openProjectList = solrUtils.queryVersionTreeByVersionId(openSourceProject.getVersionId()); VersionTree openProject = solrUtils.queryVersionTreeByVersionId(openSourceProject.getVersionId());
if (openProject == null) {
throw new BusinessException("查询开源项目信息失败,项目versionId:" + openSourceProject.getVersionId());
}
Query fileQuery = new Query(where("isDirectory").is(false)); Query fileQuery = new Query(where("isDirectory").is(false));
List<FileDataMongoDto> fileDataMongoDtos = mongoTemplate.find(fileQuery, FileDataMongoDto.class); List<FileDataMongoDto> fileDataMongoDtos = mongoTemplate.find(fileQuery, FileDataMongoDto.class);
saveProjectOpenInfo(openProjectList, fileDataMongoDtos); Set<String> openFileMd5s = fileDataMongoDtos.stream().map(FileDataMongoDto::getMd5).collect(Collectors.toSet());
//匹配到了整个项目,则将被测件的所有文件设置为开源
saveProjectOpenInfo(openProject, fileDataMongoDtos,openFileMd5s);
return true; return true;
} }
return false; return false;
@ -169,11 +171,10 @@ public class PorjectAnalysisTask {
//通过md5去*_SourceFileBase中匹配版本Id //通过md5去*_SourceFileBase中匹配版本Id
Set<String> fileMd5s = data.stream().map(FileDataMongoDto::getMd5).collect(Collectors.toSet()); Set<String> fileMd5s = data.stream().map(FileDataMongoDto::getMd5).collect(Collectors.toSet());
Map<String, SolrDocument> md5VersionObjMap = solrUtils.batchQueryVersionIdFromSourceFileBaseBySourceMd5(currentCoreName, fileMd5s); Map<String, SolrDocument> md5VersionObjMap = solrUtils.batchQueryVersionIdFromSourceFileBaseBySourceMd5(currentCoreName, fileMd5s);
if (CollectionUtil.isEmpty(md5VersionObjMap)) { if (CollectionUtil.isNotEmpty(md5VersionObjMap)) {
return;
}
//保存结果数据 //保存结果数据
saveMatchOpenFileInfo(md5VersionObjMap, data); saveMatchOpenFileInfo(md5VersionObjMap, data);
}
} else { } else {
//非主流语言的,没有单独的特征库,统一到默认的特征库进行检索 //非主流语言的,没有单独的特征库,统一到默认的特征库进行检索
otherLanguageFiles.addAll(data); otherLanguageFiles.addAll(data);
@ -189,9 +190,9 @@ public class PorjectAnalysisTask {
//暂时忽略字符流md5的匹配,因为大部分都是一样的 //暂时忽略字符流md5的匹配,因为大部分都是一样的
Set<String> fileMd5s = otherLanguageFiles.stream().map(FileDataMongoDto::getMd5).collect(Collectors.toSet()); Set<String> fileMd5s = otherLanguageFiles.stream().map(FileDataMongoDto::getMd5).collect(Collectors.toSet());
Map<String, SolrDocument> md5VersionIdMap = solrUtils.batchQueryVersionIdFromSourceFileBaseBySourceMd5(MongoDBConst.TABLE_NAME_SOURCE_FILE_BASE, fileMd5s); Map<String, SolrDocument> md5VersionIdMap = solrUtils.batchQueryVersionIdFromSourceFileBaseBySourceMd5(MongoDBConst.TABLE_NAME_SOURCE_FILE_BASE, fileMd5s);
if (md5VersionIdMap == null || md5VersionIdMap.isEmpty()) { if (CollUtil.isEmpty(md5VersionIdMap)) {
//如果没有匹配到,直接更新文件分析状态已完成,因为非32种语言的文件,无法进行解析,通过源文件的MD5匹配不到,就匹配不到了,无需进行下一步的匹配 //如果没有匹配到,直接更新文件分析状态已完成,因为非32种语言的文件,无法进行解析,通过源文件的MD5匹配不到,就匹配不到了,无需进行下一步的匹配
updateFileAnalysisStatus(fileMd5s,true); updateFileAnalysisStatus(fileMd5s,false);
return; return;
} }
saveMatchOpenFileInfo(md5VersionIdMap, otherLanguageFiles); saveMatchOpenFileInfo(md5VersionIdMap, otherLanguageFiles);
@ -223,46 +224,37 @@ public class PorjectAnalysisTask {
MatchOpenFileMongoDto matchOpenFile = getMatchOpenFile(versionInfo, fileDataMongoDto, versionObj.get("fullPath").toString()); MatchOpenFileMongoDto matchOpenFile = getMatchOpenFile(versionInfo, fileDataMongoDto, versionObj.get("fullPath").toString());
batchInsertCache.add(matchOpenFile); batchInsertCache.add(matchOpenFile);
}); });
if (CollectionUtils.isNotEmpty(batchInsertCache)) { if (CollectionUtils.isNotEmpty(batchInsertCache)) {
mongoTemplate.insert(batchInsertCache, MatchOpenFileMongoDto.class); mongoTemplate.insert(batchInsertCache, MatchOpenFileMongoDto.class);
//更新文件分析的状态 Set<String> openFileIds = batchInsertCache.stream().map(MatchOpenFileMongoDto::getId).collect(Collectors.toSet());
updateFileAnalysisStatus(md5VersionIdMap.keySet(),true); updateFileAnalysisStatus(openFileIds,true);
} }
} }
//匹配到开源项目后,保存各个文件的开源信息 //匹配到开源项目后,保存匹配到的各个文件的开源信息
private void saveProjectOpenInfo(VersionTree versionInfo, List<FileDataMongoDto> originalFiles) { private void saveProjectOpenInfo(VersionTree versionInfo, List<FileDataMongoDto> originalFiles,Set<String> matchedFileMd5s) {
Map<String, FileDataMongoDto> originalMd5ObjMap = originalFiles.stream().collect(Collectors.toMap(FileDataMongoDto::getMd5, Function.identity(), (key1, key2) -> key1)); Map<String, FileDataMongoDto> originalMd5ObjMap = originalFiles.stream().collect(Collectors.toMap(FileDataMongoDto::getMd5, Function.identity(), (key1, key2) -> key1));
Set<String> matchedMd5s = new HashSet<>();
List<MatchOpenFileMongoDto> batchInsertCache = new ArrayList<>(); List<MatchOpenFileMongoDto> batchInsertCache = new ArrayList<>();
List<VersionTreeNode> fileInfos = versionInfo.getDirTree(); List<VersionTreeNode> fileInfos = versionInfo.getDirTree();
//todo 这里会出现重复的md5数据,后续需要处理
Map<String, String> md5ToFullPathMap = fileInfos.stream().collect(Collectors.toMap(VersionTreeNode::getSourceFileMd5, VersionTreeNode::getFullPath, (key1, key2) -> key1));
fileInfos.forEach(versionTreeNodeObj->{ matchedFileMd5s.forEach(fileMd5 -> {
String openFileMd5 = versionTreeNodeObj.getSourceFileMd5();
//看是否和被测件的md5匹配
if (originalMd5ObjMap.keySet().contains(openFileMd5)) {
//匹配的文件只保存一次 //匹配的文件只保存一次
if (!matchedMd5s.contains(openFileMd5)) { MatchOpenFileMongoDto matchOpenFile = getMatchOpenFile(versionInfo, originalMd5ObjMap.get(fileMd5), md5ToFullPathMap.get(fileMd5));
MatchOpenFileMongoDto matchOpenFile = getMatchOpenFile(versionInfo, originalMd5ObjMap.get(openFileMd5),versionTreeNodeObj.getFullPath());
batchInsertCache.add(matchOpenFile); batchInsertCache.add(matchOpenFile);
matchedMd5s.add(openFileMd5);
}
}
//分批保存,防止单个项目太大,撑爆内存 或 超过 mongodb最大插入数 //分批保存,防止单个项目太大,撑爆内存 或 超过 mongodb最大插入数
if (batchInsertCache.size() >= 1000) { if (batchInsertCache.size() >= 1000) {
mongoTemplate.insert(batchInsertCache, MatchOpenFileMongoDto.class); mongoTemplate.insert(batchInsertCache, MatchOpenFileMongoDto.class);
batchInsertCache.clear(); batchInsertCache.clear();
} }
}); });
if (!batchInsertCache.isEmpty()) {
if (batchInsertCache.size() != 0) {
mongoTemplate.insert(batchInsertCache, MatchOpenFileMongoDto.class); mongoTemplate.insert(batchInsertCache, MatchOpenFileMongoDto.class);
} }
//更新文件分析的状态 //更新文件分析的状态
updateFileAnalysisStatus(matchedMd5s,true); updateFileAnalysisStatus(matchedFileMd5s,true);
} }
//获取匹配到的开源文件信息 //获取匹配到的开源文件信息
@ -323,14 +315,15 @@ public class PorjectAnalysisTask {
break; break;
} }
//保存匹配的开源文件信息
saveProjectOpenInfo(openProject, projectFiles);
//获取开源项目的所有文件md5集合 //获取开源项目的所有文件md5集合
List<String> openFilesMd5 = openProject.getDirTree().stream().map(VersionTreeNode::getSourceFileMd5).collect(Collectors.toList()); List<String> openFilesMd5 = openProject.getDirTree().stream().map(VersionTreeNode::getSourceFileMd5).collect(Collectors.toList());
//获取被测件和开源项目相同的文件 //获取被测件和开源项目相同的文件
Set<String> matchedFiles = unMatchedFileMd5s.stream().filter(item -> openFilesMd5.contains(item)).collect(Collectors.toSet()); Set<String> matchedFiles = unMatchedFileMd5s.stream().filter(item -> openFilesMd5.contains(item)).collect(Collectors.toSet());
//保存匹配的开源文件信息
//todo 这里会重复保存数据, 需要优化
saveProjectOpenInfo(openProject, projectFiles, matchedFiles);
//保存已匹配的文件md5,后续需要统计整体的开源率 //保存已匹配的文件md5,后续需要统计整体的开源率
matchedFileMd5Set.addAll(matchedFiles); matchedFileMd5Set.addAll(matchedFiles);

@ -7,7 +7,6 @@ import lombok.Data;
import lombok.extern.log4j.Log4j2; import lombok.extern.log4j.Log4j2;
import org.apache.commons.collections.CollectionUtils; import org.apache.commons.collections.CollectionUtils;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
import org.apache.solr.client.solrj.SolrClient;
import org.apache.solr.client.solrj.SolrRequest; import org.apache.solr.client.solrj.SolrRequest;
import org.apache.solr.client.solrj.SolrServerException; import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.impl.HttpSolrClient; import org.apache.solr.client.solrj.impl.HttpSolrClient;
@ -189,6 +188,7 @@ public class SolrUtils {
if (openSourceContent == null) { if (openSourceContent == null) {
log.error("根据开源文件MD5:{}未找到对应的开源文件源码", openSourceFileMd5); log.error("根据开源文件MD5:{}未找到对应的开源文件源码", openSourceFileMd5);
return "";
} }
return openSourceContent.getFieldValue("sourceContent").toString(); return openSourceContent.getFieldValue("sourceContent").toString();
} }

@ -0,0 +1,42 @@
server:
port: 8001
spring:
application:
name: compose-analysis
data:
mongodb:
uri: mongodb://127.0.0.1:27017/KEYSWAN
redis:
host: 172.16.36.7
port: 6379
password: 123456
datasource:
driver-class-name: com.mysql.cj.jdbc.Driver
url: jdbc:mysql://127.0.0.1:3306/keyswan?useUnicode=true&characterEncoding=utf-8&useSSL=true&serverTimezone=GMT%2B8
username: root
password: 123456
hikari:
#最小连接数
minimum-idle: 5
#最大连接数
maximum-pool-size: 30
#最大空闲时间
idle-timeout: 30000
#连接超时时间
connection-timeout: 30000
#自动提交
auto-commit: true
#solr检索库地址
solr:
solrUrl: http://172.16.36.5:8993/solr/
#批量匹配时,返回的匹配数量
row: 5
#被测件上传存储路径
codeResourcePath: D:\codeResourcePath
logging:
level:
com.txlc.dwh.job.common.interceptor.PerformanceInterceptor: debug

@ -1,15 +1,45 @@
server:
port: 8001
spring: spring:
application: application:
name: compose-analysis name: compose-analysis
cloud: data:
nacos: mongodb:
discovery: uri: mongodb://127.0.0.1:27017/KEYSWAN
server-addr: 127.0.0.1:8848 redis:
namespace: 4ce70f33-8b88-4931-a88c-2b68e7259bd7 host: 127.0.0.1
config: port: 6379
server-addr: 127.0.0.1:8848 password: 123456
namespace: 4ce70f33-8b88-4931-a88c-2b68e7259bd7
file-extension: yaml datasource:
config: driver-class-name: com.mysql.cj.jdbc.Driver
import: nacos:compose-analysis-dev.yaml url: jdbc:mysql://127.0.0.1:3306/keyswan?useUnicode=true&characterEncoding=utf-8&useSSL=true&serverTimezone=GMT%2B8
username: root
password: 123456
hikari:
#最小连接数
minimum-idle: 5
#最大连接数
maximum-pool-size: 30
#最大空闲时间
idle-timeout: 30000
#连接超时时间
connection-timeout: 30000
#自动提交
auto-commit: true
#solr检索库地址
solr:
solrUrl: http://172.16.36.7:8993/solr/
#批量匹配时,返回的匹配数量
row: 5
#被测件上传存储路径
codeResourcePath: D:\codeResourcePath
logging:
level:
com.txlc.dwh.job.common.interceptor.PerformanceInterceptor: debug

Loading…
Cancel
Save