Compare commits

..

4 Commits

  1. 36
      pom.xml
  2. 4
      src/main/java/com/keyware/composeanalysis/ComposeAnalysisApplication.java
  3. 24
      src/main/java/com/keyware/composeanalysis/config/GlobalExceptionHandler.java
  4. 7
      src/main/java/com/keyware/composeanalysis/service/impl/AnalysisTaskServiceImpl.java
  5. 5
      src/main/java/com/keyware/composeanalysis/task/CodeBlockAnalysisTask.java
  6. 5
      src/main/java/com/keyware/composeanalysis/task/FileAnalysisTask.java
  7. 12
      src/main/java/com/keyware/composeanalysis/task/FunctionAnalysisTask.java
  8. 6
      src/main/java/com/keyware/composeanalysis/task/LineAnalysisTask.java
  9. 76
      src/main/java/com/keyware/composeanalysis/task/PorjectAnalysisTask.java
  10. 2
      src/main/java/com/keyware/composeanalysis/util/SolrUtils.java
  11. 42
      src/main/resources/application-dev.yaml
  12. 52
      src/main/resources/application.yaml

@ -20,24 +20,24 @@
<artifactId>spring-boot-starter-web</artifactId> <artifactId>spring-boot-starter-web</artifactId>
</dependency> </dependency>
<!-- &lt;!&ndash; nacos 服务的注册发现 &ndash;&gt;--> <!-- nacos 服务的注册发现 -->
<!-- <dependency>--> <dependency>
<!-- <groupId>com.alibaba.cloud</groupId>--> <groupId>com.alibaba.cloud</groupId>
<!-- <artifactId>spring-cloud-starter-alibaba-nacos-discovery</artifactId>--> <artifactId>spring-cloud-starter-alibaba-nacos-discovery</artifactId>
<!-- </dependency>--> </dependency>
<!-- &lt;!&ndash;客户端负载均衡loadbalancer&ndash;&gt;--> <!--客户端负载均衡loadbalancer-->
<!-- <dependency>--> <dependency>
<!-- <groupId>org.springframework.cloud</groupId>--> <groupId>org.springframework.cloud</groupId>
<!-- <artifactId>spring-cloud-starter-loadbalancer</artifactId>--> <artifactId>spring-cloud-starter-loadbalancer</artifactId>
<!-- </dependency>--> </dependency>
<!-- &lt;!&ndash; nacos 配置中心做依赖管理 &ndash;&gt;--> <!-- nacos 配置中心做依赖管理 -->
<!-- <dependency>--> <dependency>
<!-- <groupId>com.alibaba.cloud</groupId>--> <groupId>com.alibaba.cloud</groupId>
<!-- <artifactId>spring-cloud-starter-alibaba-nacos-config</artifactId>--> <artifactId>spring-cloud-starter-alibaba-nacos-config</artifactId>
<!-- </dependency>--> </dependency>
<!-- mongodb --> <!-- mongodb -->
<dependency> <dependency>

@ -3,10 +3,14 @@ package com.keyware.composeanalysis;
import org.mybatis.spring.annotation.MapperScan; import org.mybatis.spring.annotation.MapperScan;
import org.springframework.boot.SpringApplication; import org.springframework.boot.SpringApplication;
import org.springframework.boot.autoconfigure.SpringBootApplication; import org.springframework.boot.autoconfigure.SpringBootApplication;
import org.springframework.cloud.client.discovery.EnableDiscoveryClient;
import org.springframework.cloud.context.config.annotation.RefreshScope;
import org.springframework.scheduling.annotation.EnableAsync; import org.springframework.scheduling.annotation.EnableAsync;
@MapperScan("com.keyware.composeanalysis.mapper") @MapperScan("com.keyware.composeanalysis.mapper")
@SpringBootApplication @SpringBootApplication
@EnableDiscoveryClient
@RefreshScope
@EnableAsync @EnableAsync
public class ComposeAnalysisApplication { public class ComposeAnalysisApplication {

@ -13,20 +13,20 @@ import org.springframework.web.bind.annotation.RestControllerAdvice;
@RestControllerAdvice @RestControllerAdvice
public class GlobalExceptionHandler { public class GlobalExceptionHandler {
//全局异常处理 //全局异常处理
@ExceptionHandler(value = Exception.class) @ExceptionHandler(value = Exception.class)
public Result defaultErrorHandler(Exception e) { public Result defaultErrorHandler(Exception e) {
log.error("全局异常信息,ex={}", e.getMessage(), e); log.error("全局异常信息,ex={}",e.getMessage(),e);
return Result.fail(ResultCode.FAIL.getCode(), e.getMessage()); return Result.fail(ResultCode.FAIL.getCode(), e.getMessage());
} }
//自定义异常处理 //自定义异常处理
//业务异常 //业务异常
@ExceptionHandler(value = BusinessException.class) @ExceptionHandler(value = BusinessException.class)
@ResponseBody @ResponseBody
public Result businessExceptionHandler(BusinessException e) { public Result businessExceptionHandler(BusinessException e) {
log.error("业务异常信息", e); log.error("业务异常信息",e);
return Result.fail(e.getCode(), e.getMsg()); return Result.fail(e.getCode(), e.getMsg());
} }
} }

@ -74,6 +74,7 @@ public class AnalysisTaskServiceImpl extends ServiceImpl<AnalyzeTaskMapper, Anal
updateWrapper.eq(AnalysisTask::getId, analysisTask.getId()) updateWrapper.eq(AnalysisTask::getId, analysisTask.getId())
.set(AnalysisTask::getComposeFlag, AnalysisStatusEnum.ANALYSISING.getCode()); .set(AnalysisTask::getComposeFlag, AnalysisStatusEnum.ANALYSISING.getCode());
this.update(null,updateWrapper); this.update(null,updateWrapper);
MongoTemplate mongoTemplate = new MongoTemplate(mongoClient, MongoDBConst.DB_NAME_PREFIX + analysisTask.getId()); MongoTemplate mongoTemplate = new MongoTemplate(mongoClient, MongoDBConst.DB_NAME_PREFIX + analysisTask.getId());
AnalysisLogUtil.insert(mongoTemplate, "【成分分析】开始:" + analysisTask.getFileName()); AnalysisLogUtil.insert(mongoTemplate, "【成分分析】开始:" + analysisTask.getFileName());
@ -88,8 +89,9 @@ public class AnalysisTaskServiceImpl extends ServiceImpl<AnalyzeTaskMapper, Anal
checkProjectIfOpen(mongoTemplate,analysisTask); checkProjectIfOpen(mongoTemplate,analysisTask);
//修改成分分析状态为完成 //修改成分分析状态为完成
updateWrapper.set(AnalysisTask::getComposeFlag, AnalysisStatusEnum.ANALYSIS_DONE.getCode()) updateWrapper.eq(AnalysisTask::getId, analysisTask.getId())
.set(AnalysisTask::getOpenType, analysisTask.getOpenType()); .set(AnalysisTask::getComposeFlag, AnalysisStatusEnum.ANALYSIS_DONE.getCode())
.set(AnalysisTask::getOpenType, analysisTask.getOpenType());
this.update(null,updateWrapper); this.update(null,updateWrapper);
//插入分析日志 //插入分析日志
@ -166,7 +168,6 @@ public class AnalysisTaskServiceImpl extends ServiceImpl<AnalyzeTaskMapper, Anal
//引入解压缩有可能会很慢,这里添加重试机制,最多重试6次,60s //引入解压缩有可能会很慢,这里添加重试机制,最多重试6次,60s
//todo 主程序 后续可以添加压缩标志位
private boolean retryGetDecompressionFlag(AnalysisTask analysisTask) { private boolean retryGetDecompressionFlag(AnalysisTask analysisTask) {
int retryCount = 0; int retryCount = 0;
while (retryCount < 60) { while (retryCount < 60) {

@ -172,6 +172,7 @@ public class CodeBlockAnalysisTask extends IAnalysisTask {
return; return;
} }
//保存所有匹配的特征代码块MD5信息,方便统计总的匹配行数 //保存所有匹配的特征代码块MD5信息,方便统计总的匹配行数
Set<String> matchingTraitLineSet = new HashSet<>(); Set<String> matchingTraitLineSet = new HashSet<>();
@ -191,7 +192,6 @@ public class CodeBlockAnalysisTask extends IAnalysisTask {
//特征行总数 //特征行总数
int totalFeatureLineCount = traitsFeatureMd5AndFeatureLineNumMap.values().stream().mapToInt(Integer::intValue).sum(); int totalFeatureLineCount = traitsFeatureMd5AndFeatureLineNumMap.values().stream().mapToInt(Integer::intValue).sum();
//计算文件的总体特征相似度 //计算文件的总体特征相似度
BigDecimal featureSimilarity = new BigDecimal(matchCodeBlockLineCount).divide(new BigDecimal(totalFeatureLineCount), 4, RoundingMode.HALF_UP).multiply(new BigDecimal(100)).setScale(2); BigDecimal featureSimilarity = new BigDecimal(matchCodeBlockLineCount).divide(new BigDecimal(totalFeatureLineCount), 4, RoundingMode.HALF_UP).multiply(new BigDecimal(100)).setScale(2);
@ -346,13 +346,12 @@ public class CodeBlockAnalysisTask extends IAnalysisTask {
} }
/** /**
* 获取每个特征代码块包含的特征行数 * 或者特征代码块的md5 当前md5包含的特征行数
* *
* @param codeBlockInfos * @param codeBlockInfos
* @return * @return
*/ */
private Map<String, Integer> getTraitsFeatureMd5AndFeatureLineNumMap(List<LineModel> codeBlockInfos) { private Map<String, Integer> getTraitsFeatureMd5AndFeatureLineNumMap(List<LineModel> codeBlockInfos) {
//按照特征代码块的md5进行分组
Map<String, List<LineModel>> traitMd5GroupMap = codeBlockInfos.stream().collect(Collectors.groupingBy(LineModel::getTraitLineMd5)); Map<String, List<LineModel>> traitMd5GroupMap = codeBlockInfos.stream().collect(Collectors.groupingBy(LineModel::getTraitLineMd5));
Map<String, Integer> resultMap = new HashMap<>(); Map<String, Integer> resultMap = new HashMap<>();
for (String traitMd5 : traitMd5GroupMap.keySet()) { for (String traitMd5 : traitMd5GroupMap.keySet()) {

@ -4,7 +4,6 @@ import cn.hutool.core.collection.CollUtil;
import cn.hutool.core.io.FileUtil; import cn.hutool.core.io.FileUtil;
import cn.hutool.core.lang.Pair; import cn.hutool.core.lang.Pair;
import com.keyware.common.constant.enums.AnalysisStatusEnum; import com.keyware.common.constant.enums.AnalysisStatusEnum;
import com.keyware.common.exception.BusinessException;
import com.keyware.composeanalysis.constant.FixedValue; import com.keyware.composeanalysis.constant.FixedValue;
import com.keyware.composeanalysis.constant.RedisConst; import com.keyware.composeanalysis.constant.RedisConst;
import com.keyware.composeanalysis.constant.enums.AnalysisLevelEnum; import com.keyware.composeanalysis.constant.enums.AnalysisLevelEnum;
@ -83,9 +82,6 @@ public class FileAnalysisTask extends IAnalysisTask {
String featureCoreName = FixedValue.SUFFIX_SOLR_FILE.get(analysisFile.getSuffix()); String featureCoreName = FixedValue.SUFFIX_SOLR_FILE.get(analysisFile.getSuffix());
//根据文件名称,获取文件解析器 //根据文件名称,获取文件解析器
Analysis analysis = AnalysisFactory.getAnalysis(fileName); Analysis analysis = AnalysisFactory.getAnalysis(fileName);
if (analysis == null){
throw new BusinessException("获取文件解析器失败,文件名称:" + fileName);
}
//如果文件大小超过3M,则不进行文件级行级特征提取 //如果文件大小超过3M,则不进行文件级行级特征提取
CodeFile codeFile = analysis.analysisFile(analysisFile.getFileUrl(), "1", "0"); CodeFile codeFile = analysis.analysisFile(analysisFile.getFileUrl(), "1", "0");
@ -101,6 +97,7 @@ public class FileAnalysisTask extends IAnalysisTask {
.matching(where("_id").is(analysisFile.getId())) .matching(where("_id").is(analysisFile.getId()))
.replaceWith(analysisFile) .replaceWith(analysisFile)
.findAndReplace(); .findAndReplace();
AnalysisLogUtil.insert(mongoTemplate, "【文件级分析】成功" + fileName); AnalysisLogUtil.insert(mongoTemplate, "【文件级分析】成功" + fileName);
} catch (Exception e) { } catch (Exception e) {
AnalysisLogUtil.insertErrorInfo(mongoTemplate, "【文件级】分析失败" + fileName, e); AnalysisLogUtil.insertErrorInfo(mongoTemplate, "【文件级】分析失败" + fileName, e);

@ -106,15 +106,15 @@ public class FunctionAnalysisTask extends IAnalysisTask {
//根据文件的名称获取函数解析器 //根据文件的名称获取函数解析器
Analysis analysis = AnalysisFactory.getAnalysis(fileName); Analysis analysis = AnalysisFactory.getAnalysis(fileName);
if (analysis == null) { if (analysis == null){
throw new BusinessException("获取文件解析器失败,文件名称:" + fileName); throw new BusinessException("获取文件解析器失败,文件名称:"+fileName);
} }
//解析文件 //解析文件
CodeFile codeFile = analysis.analysisFile(new FileInputStream(filePath)); CodeFile codeFile = analysis.analysisFile(new FileInputStream(filePath));
//根据函数特征去匹配到开源文件 //根据函数特征去匹配到开源文件
SolrDocumentList matchOpenFiles = getFeatureSimilarityFromSolr(featureCoreName, codeFile.getFunctionList()); SolrDocumentList matchOpenFiles = getFeatureSimilarityFromSolr(featureCoreName, codeFile.getFunctionList());
//计算开源率 //计算开源率
doAnalysis(matchOpenFiles, sourceFileBaseCoreName, codeFile); doAnalysis(matchOpenFiles, sourceFileBaseCoreName, codeFile);
@ -238,7 +238,7 @@ public class FunctionAnalysisTask extends IAnalysisTask {
//被测件文本内容 //被测件文本内容
String sourcefileContent = FileUtil.readUtf8String(analysisFile.getFileUrl()); String sourcefileContent= FileUtil.readUtf8String(analysisFile.getFileUrl());
//将文本内容解析成行信息,用于后续文件的开源率计算 //将文本内容解析成行信息,用于后续文件的开源率计算
List<String> analysisFileLineInfo = SimilarityUtil.getSplitWords(sourcefileContent); List<String> analysisFileLineInfo = SimilarityUtil.getSplitWords(sourcefileContent);
@ -291,9 +291,7 @@ public class FunctionAnalysisTask extends IAnalysisTask {
SolrDocument openEntries = md5VersionInfoMap.get(openSourceFileMd5); SolrDocument openEntries = md5VersionInfoMap.get(openSourceFileMd5);
VersionTree versionInfo = versionIdVersionInfoMap.get(openEntries.get("versionId")); VersionTree versionInfo = versionIdVersionInfoMap.get(openEntries.get("versionId"));
String openFilePath = (String) md5VersionInfoMap.get(openSourceFileMd5).getFieldValue("fullPath"); String openFilePath = (String) md5VersionInfoMap.get(openSourceFileMd5).getFieldValue("fullPath");
if (versionInfo == null){
throw new BusinessException("根据版本ID,未查询到相关的版本信息。versionId:" + openEntries.get("versionId"));
}
//组装当前开源文件的开源项目信息 //组装当前开源文件的开源项目信息
MatchOpenFile matchOpenFileInfo = new MatchOpenFile(); MatchOpenFile matchOpenFileInfo = new MatchOpenFile();
matchOpenFileInfo.setId(IdGenerator.uuid32()) matchOpenFileInfo.setId(IdGenerator.uuid32())

@ -1,6 +1,5 @@
package com.keyware.composeanalysis.task; package com.keyware.composeanalysis.task;
import cn.hutool.core.collection.CollectionUtil; import cn.hutool.core.collection.CollectionUtil;
import cn.hutool.core.io.FileUtil; import cn.hutool.core.io.FileUtil;
import cn.hutool.core.lang.Pair; import cn.hutool.core.lang.Pair;
@ -142,7 +141,6 @@ public class LineAnalysisTask extends IAnalysisTask {
return; return;
} }
//保存所有匹配的行数信息,方便统计总的匹配行数 //保存所有匹配的行数信息,方便统计总的匹配行数
Set<String> matchedFeatureMd5 = new HashSet<>(); Set<String> matchedFeatureMd5 = new HashSet<>();
@ -258,10 +256,6 @@ public class LineAnalysisTask extends IAnalysisTask {
BigDecimal featureSimilarity = new BigDecimal(currentFileMatchFeatureLineCount).divide(new BigDecimal(lineFeatureList.size()), 4, RoundingMode.HALF_UP).multiply(new BigDecimal(100)).setScale(2); BigDecimal featureSimilarity = new BigDecimal(currentFileMatchFeatureLineCount).divide(new BigDecimal(lineFeatureList.size()), 4, RoundingMode.HALF_UP).multiply(new BigDecimal(100)).setScale(2);
SolrDocument openEntries = md5VersionInfoMap.get(openSourceFileMd5); SolrDocument openEntries = md5VersionInfoMap.get(openSourceFileMd5);
if (openEntries == null){
log.error("根据开源文件MD5,未查询到相关的开源文件版本信息,md5:{}", openSourceFileMd5);
continue;
}
VersionTree versionInfo = versionIdVersionInfoMap.get(openEntries.get("versionId")); VersionTree versionInfo = versionIdVersionInfoMap.get(openEntries.get("versionId"));
if (versionInfo == null) { if (versionInfo == null) {
log.error("根据版本ID,未查询到相关的版本信息。versionId:{}", openEntries.get("versionId")); log.error("根据版本ID,未查询到相关的版本信息。versionId:{}", openEntries.get("versionId"));

@ -1,12 +1,10 @@
package com.keyware.composeanalysis.task; package com.keyware.composeanalysis.task;
import cn.hutool.core.collection.CollUtil;
import cn.hutool.core.collection.CollectionUtil; import cn.hutool.core.collection.CollectionUtil;
import cn.hutool.core.io.FileUtil; import cn.hutool.core.io.FileUtil;
import com.baomidou.mybatisplus.core.conditions.update.LambdaUpdateWrapper; import com.baomidou.mybatisplus.core.conditions.update.LambdaUpdateWrapper;
import com.google.common.collect.Sets; import com.google.common.collect.Sets;
import com.keyware.common.constant.enums.AnalysisStatusEnum; import com.keyware.common.constant.enums.AnalysisStatusEnum;
import com.keyware.common.exception.BusinessException;
import com.keyware.composeanalysis.constant.FixedValue; import com.keyware.composeanalysis.constant.FixedValue;
import com.keyware.composeanalysis.constant.MongoDBConst; import com.keyware.composeanalysis.constant.MongoDBConst;
import com.keyware.composeanalysis.constant.enums.AnalysisLevelEnum; import com.keyware.composeanalysis.constant.enums.AnalysisLevelEnum;
@ -78,7 +76,7 @@ public class PorjectAnalysisTask {
if (!matchedPrject) { if (!matchedPrject) {
List<FileDataMongoDto> unMatchedFiles = matchByAllFilesMd5(); List<FileDataMongoDto> unMatchedFiles = matchByAllFilesMd5();
//剩余没有匹配文件,用文件的md5去批量匹配solr库的versionTree //剩余没有匹配文件,用文件的md5去匹配solr库的versionTree
if (CollectionUtils.isNotEmpty(unMatchedFiles)) { if (CollectionUtils.isNotEmpty(unMatchedFiles)) {
matchByFileMd5s(unMatchedFiles); matchByFileMd5s(unMatchedFiles);
} }
@ -86,7 +84,7 @@ public class PorjectAnalysisTask {
//todo 如果整体耗时较长,將matchOpenFileInfo存储到数据库的逻辑修改成异步的 //todo 如果整体耗时较长,將matchOpenFileInfo存储到数据库的逻辑修改成异步的
log.info("项目级分析完成,用时:" + (System.currentTimeMillis() - startTime) / 1000 + "s"); log.info("项目级分析完成,用时:" + (System.currentTimeMillis() - startTime) / 1000 + "s");
} catch (Exception e) { } catch (Exception e) {
AnalysisLogUtil.insert(mongoTemplate, "成分分析失败:" + e.getMessage()); AnalysisLogUtil.insert(mongoTemplate, "成分分析失败:" + e.getStackTrace());
log.error("项目级分析失败,项目名称:" + analysisTask.getFileName(), e); log.error("项目级分析失败,项目名称:" + analysisTask.getFileName(), e);
LambdaUpdateWrapper<AnalysisTask> updateWrapper = new LambdaUpdateWrapper<>(); LambdaUpdateWrapper<AnalysisTask> updateWrapper = new LambdaUpdateWrapper<>();
updateWrapper.eq(AnalysisTask::getId, analysisTask.getId()) updateWrapper.eq(AnalysisTask::getId, analysisTask.getId())
@ -125,16 +123,19 @@ public class PorjectAnalysisTask {
//当前文件开源信息存入数据库中 //当前文件开源信息存入数据库中
mongoTemplate.insert(projectAssembly); mongoTemplate.insert(projectAssembly);
//更新文件分析的状态
mongoTemplate.update(FileDataMongoDto.class)
.matching(where("isDirectory").is(false))
.apply(new Update().set("openType", true)
.set("openRate", 100.00d)
.set("fileAnalysisStatus", FileAnalysisStatusEnum.ANALYSIS_DONE.getCode()))
.all();
//保存具体开源文件信息 //保存具体开源文件信息
VersionTree openProject = solrUtils.queryVersionTreeByVersionId(openSourceProject.getVersionId()); VersionTree openProjectList = solrUtils.queryVersionTreeByVersionId(openSourceProject.getVersionId());
if (openProject == null) {
throw new BusinessException("查询开源项目信息失败,项目versionId:" + openSourceProject.getVersionId());
}
Query fileQuery = new Query(where("isDirectory").is(false)); Query fileQuery = new Query(where("isDirectory").is(false));
List<FileDataMongoDto> fileDataMongoDtos = mongoTemplate.find(fileQuery, FileDataMongoDto.class); List<FileDataMongoDto> fileDataMongoDtos = mongoTemplate.find(fileQuery, FileDataMongoDto.class);
Set<String> openFileMd5s = fileDataMongoDtos.stream().map(FileDataMongoDto::getMd5).collect(Collectors.toSet()); saveProjectOpenInfo(openProjectList, fileDataMongoDtos);
//匹配到了整个项目,则将被测件的所有文件设置为开源
saveProjectOpenInfo(openProject, fileDataMongoDtos,openFileMd5s);
return true; return true;
} }
return false; return false;
@ -171,10 +172,11 @@ public class PorjectAnalysisTask {
//通过md5去*_SourceFileBase中匹配版本Id //通过md5去*_SourceFileBase中匹配版本Id
Set<String> fileMd5s = data.stream().map(FileDataMongoDto::getMd5).collect(Collectors.toSet()); Set<String> fileMd5s = data.stream().map(FileDataMongoDto::getMd5).collect(Collectors.toSet());
Map<String, SolrDocument> md5VersionObjMap = solrUtils.batchQueryVersionIdFromSourceFileBaseBySourceMd5(currentCoreName, fileMd5s); Map<String, SolrDocument> md5VersionObjMap = solrUtils.batchQueryVersionIdFromSourceFileBaseBySourceMd5(currentCoreName, fileMd5s);
if (CollectionUtil.isNotEmpty(md5VersionObjMap)) { if (CollectionUtil.isEmpty(md5VersionObjMap)) {
//保存结果数据 return;
saveMatchOpenFileInfo(md5VersionObjMap, data);
} }
//保存结果数据
saveMatchOpenFileInfo(md5VersionObjMap, data);
} else { } else {
//非主流语言的,没有单独的特征库,统一到默认的特征库进行检索 //非主流语言的,没有单独的特征库,统一到默认的特征库进行检索
otherLanguageFiles.addAll(data); otherLanguageFiles.addAll(data);
@ -190,9 +192,9 @@ public class PorjectAnalysisTask {
//暂时忽略字符流md5的匹配,因为大部分都是一样的 //暂时忽略字符流md5的匹配,因为大部分都是一样的
Set<String> fileMd5s = otherLanguageFiles.stream().map(FileDataMongoDto::getMd5).collect(Collectors.toSet()); Set<String> fileMd5s = otherLanguageFiles.stream().map(FileDataMongoDto::getMd5).collect(Collectors.toSet());
Map<String, SolrDocument> md5VersionIdMap = solrUtils.batchQueryVersionIdFromSourceFileBaseBySourceMd5(MongoDBConst.TABLE_NAME_SOURCE_FILE_BASE, fileMd5s); Map<String, SolrDocument> md5VersionIdMap = solrUtils.batchQueryVersionIdFromSourceFileBaseBySourceMd5(MongoDBConst.TABLE_NAME_SOURCE_FILE_BASE, fileMd5s);
if (CollUtil.isEmpty(md5VersionIdMap)) { if (md5VersionIdMap == null || md5VersionIdMap.isEmpty()) {
//如果没有匹配到,直接更新文件分析状态已完成,因为非32种语言的文件,无法进行解析,通过源文件的MD5匹配不到,就匹配不到了,无需进行下一步的匹配 //如果没有匹配到,直接更新文件分析状态已完成,因为非32种语言的文件,无法进行解析,通过源文件的MD5匹配不到,就匹配不到了,无需进行下一步的匹配
updateFileAnalysisStatus(fileMd5s,false); updateFileAnalysisStatus(fileMd5s,true);
return; return;
} }
saveMatchOpenFileInfo(md5VersionIdMap, otherLanguageFiles); saveMatchOpenFileInfo(md5VersionIdMap, otherLanguageFiles);
@ -224,37 +226,46 @@ public class PorjectAnalysisTask {
MatchOpenFileMongoDto matchOpenFile = getMatchOpenFile(versionInfo, fileDataMongoDto, versionObj.get("fullPath").toString()); MatchOpenFileMongoDto matchOpenFile = getMatchOpenFile(versionInfo, fileDataMongoDto, versionObj.get("fullPath").toString());
batchInsertCache.add(matchOpenFile); batchInsertCache.add(matchOpenFile);
}); });
if (CollectionUtils.isNotEmpty(batchInsertCache)) { if (CollectionUtils.isNotEmpty(batchInsertCache)) {
mongoTemplate.insert(batchInsertCache, MatchOpenFileMongoDto.class); mongoTemplate.insert(batchInsertCache, MatchOpenFileMongoDto.class);
Set<String> openFileIds = batchInsertCache.stream().map(MatchOpenFileMongoDto::getId).collect(Collectors.toSet()); //更新文件分析的状态
updateFileAnalysisStatus(openFileIds,true); updateFileAnalysisStatus(md5VersionIdMap.keySet(),true);
} }
} }
//匹配到开源项目后,保存匹配到的各个文件的开源信息 //匹配到开源项目后,保存各个文件的开源信息
private void saveProjectOpenInfo(VersionTree versionInfo, List<FileDataMongoDto> originalFiles,Set<String> matchedFileMd5s) { private void saveProjectOpenInfo(VersionTree versionInfo, List<FileDataMongoDto> originalFiles) {
Map<String, FileDataMongoDto> originalMd5ObjMap = originalFiles.stream().collect(Collectors.toMap(FileDataMongoDto::getMd5, Function.identity(), (key1, key2) -> key1)); Map<String, FileDataMongoDto> originalMd5ObjMap = originalFiles.stream().collect(Collectors.toMap(FileDataMongoDto::getMd5, Function.identity(), (key1, key2) -> key1));
Set<String> matchedMd5s = new HashSet<>();
List<MatchOpenFileMongoDto> batchInsertCache = new ArrayList<>(); List<MatchOpenFileMongoDto> batchInsertCache = new ArrayList<>();
List<VersionTreeNode> fileInfos = versionInfo.getDirTree(); List<VersionTreeNode> fileInfos = versionInfo.getDirTree();
//todo 这里会出现重复的md5数据,后续需要处理
Map<String, String> md5ToFullPathMap = fileInfos.stream().collect(Collectors.toMap(VersionTreeNode::getSourceFileMd5, VersionTreeNode::getFullPath, (key1, key2) -> key1));
matchedFileMd5s.forEach(fileMd5 -> { fileInfos.forEach(versionTreeNodeObj->{
//匹配的文件只保存一次 String openFileMd5 = versionTreeNodeObj.getSourceFileMd5();
MatchOpenFileMongoDto matchOpenFile = getMatchOpenFile(versionInfo, originalMd5ObjMap.get(fileMd5), md5ToFullPathMap.get(fileMd5)); //看是否和被测件的md5匹配
batchInsertCache.add(matchOpenFile); if (originalMd5ObjMap.keySet().contains(openFileMd5)) {
//匹配的文件只保存一次
if (!matchedMd5s.contains(openFileMd5)) {
MatchOpenFileMongoDto matchOpenFile = getMatchOpenFile(versionInfo, originalMd5ObjMap.get(openFileMd5),versionTreeNodeObj.getFullPath());
batchInsertCache.add(matchOpenFile);
matchedMd5s.add(openFileMd5);
}
}
//分批保存,防止单个项目太大,撑爆内存 或 超过 mongodb最大插入数 //分批保存,防止单个项目太大,撑爆内存 或 超过 mongodb最大插入数
if (batchInsertCache.size() >= 1000) { if (batchInsertCache.size() >= 1000) {
mongoTemplate.insert(batchInsertCache, MatchOpenFileMongoDto.class); mongoTemplate.insert(batchInsertCache, MatchOpenFileMongoDto.class);
batchInsertCache.clear(); batchInsertCache.clear();
} }
}); });
if (!batchInsertCache.isEmpty()) {
if (batchInsertCache.size() != 0) {
mongoTemplate.insert(batchInsertCache, MatchOpenFileMongoDto.class); mongoTemplate.insert(batchInsertCache, MatchOpenFileMongoDto.class);
} }
//更新文件分析的状态 //更新文件分析的状态
updateFileAnalysisStatus(matchedFileMd5s,true); updateFileAnalysisStatus(matchedMd5s,true);
} }
//获取匹配到的开源文件信息 //获取匹配到的开源文件信息
@ -311,19 +322,18 @@ public class PorjectAnalysisTask {
VersionTree openProject = solrUtils.queryVersionTree(queryStr); VersionTree openProject = solrUtils.queryVersionTree(queryStr);
log.info("query versionTree cost:{}s", (System.currentTimeMillis() - startTime) / 1000); log.info("query versionTree cost:{}s", (System.currentTimeMillis() - startTime) / 1000);
//如果存在没有匹配到开源数据的情况,直接退出循环匹配 //如果存在没有匹配到开源数据的情况,直接退出循环匹配
if (openProject == null) { if (openProject == null){
break; break;
} }
//保存匹配的开源文件信息
saveProjectOpenInfo(openProject, projectFiles);
//获取开源项目的所有文件md5集合 //获取开源项目的所有文件md5集合
List<String> openFilesMd5 = openProject.getDirTree().stream().map(VersionTreeNode::getSourceFileMd5).collect(Collectors.toList()); List<String> openFilesMd5 = openProject.getDirTree().stream().map(VersionTreeNode::getSourceFileMd5).collect(Collectors.toList());
//获取被测件和开源项目相同的文件 //获取被测件和开源项目相同的文件
Set<String> matchedFiles = unMatchedFileMd5s.stream().filter(item -> openFilesMd5.contains(item)).collect(Collectors.toSet()); Set<String> matchedFiles = unMatchedFileMd5s.stream().filter(item -> openFilesMd5.contains(item)).collect(Collectors.toSet());
//保存匹配的开源文件信息
//todo 这里会重复保存数据, 需要优化
saveProjectOpenInfo(openProject, projectFiles, matchedFiles);
//保存已匹配的文件md5,后续需要统计整体的开源率 //保存已匹配的文件md5,后续需要统计整体的开源率
matchedFileMd5Set.addAll(matchedFiles); matchedFileMd5Set.addAll(matchedFiles);

@ -7,6 +7,7 @@ import lombok.Data;
import lombok.extern.log4j.Log4j2; import lombok.extern.log4j.Log4j2;
import org.apache.commons.collections.CollectionUtils; import org.apache.commons.collections.CollectionUtils;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
import org.apache.solr.client.solrj.SolrClient;
import org.apache.solr.client.solrj.SolrRequest; import org.apache.solr.client.solrj.SolrRequest;
import org.apache.solr.client.solrj.SolrServerException; import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.impl.HttpSolrClient; import org.apache.solr.client.solrj.impl.HttpSolrClient;
@ -188,7 +189,6 @@ public class SolrUtils {
if (openSourceContent == null) { if (openSourceContent == null) {
log.error("根据开源文件MD5:{}未找到对应的开源文件源码", openSourceFileMd5); log.error("根据开源文件MD5:{}未找到对应的开源文件源码", openSourceFileMd5);
return "";
} }
return openSourceContent.getFieldValue("sourceContent").toString(); return openSourceContent.getFieldValue("sourceContent").toString();
} }

@ -1,42 +0,0 @@
server:
port: 8001
spring:
application:
name: compose-analysis
data:
mongodb:
uri: mongodb://127.0.0.1:27017/KEYSWAN
redis:
host: 172.16.36.7
port: 6379
password: 123456
datasource:
driver-class-name: com.mysql.cj.jdbc.Driver
url: jdbc:mysql://127.0.0.1:3306/keyswan?useUnicode=true&characterEncoding=utf-8&useSSL=true&serverTimezone=GMT%2B8
username: root
password: 123456
hikari:
#最小连接数
minimum-idle: 5
#最大连接数
maximum-pool-size: 30
#最大空闲时间
idle-timeout: 30000
#连接超时时间
connection-timeout: 30000
#自动提交
auto-commit: true
#solr检索库地址
solr:
solrUrl: http://172.16.36.5:8993/solr/
#批量匹配时,返回的匹配数量
row: 5
#被测件上传存储路径
codeResourcePath: D:\codeResourcePath
logging:
level:
com.txlc.dwh.job.common.interceptor.PerformanceInterceptor: debug

@ -1,45 +1,15 @@
server:
port: 8001
spring: spring:
application: application:
name: compose-analysis name: compose-analysis
data: cloud:
mongodb: nacos:
uri: mongodb://127.0.0.1:27017/KEYSWAN discovery:
redis: server-addr: 172.16.36.7:8848
host: 127.0.0.1 namespace: 85e3c56e-90d3-4192-9913-4bdfb16b3db1
port: 6379 config:
password: 123456 server-addr: 172.16.36.7:8848
namespace: 85e3c56e-90d3-4192-9913-4bdfb16b3db1
datasource: file-extension: yaml
driver-class-name: com.mysql.cj.jdbc.Driver config:
url: jdbc:mysql://127.0.0.1:3306/keyswan?useUnicode=true&characterEncoding=utf-8&useSSL=true&serverTimezone=GMT%2B8 import: nacos:compose-analysis-dev.yaml
username: root
password: 123456
hikari:
#最小连接数
minimum-idle: 5
#最大连接数
maximum-pool-size: 30
#最大空闲时间
idle-timeout: 30000
#连接超时时间
connection-timeout: 30000
#自动提交
auto-commit: true
#solr检索库地址
solr:
solrUrl: http://172.16.36.7:8993/solr/
#批量匹配时,返回的匹配数量
row: 5
#被测件上传存储路径
codeResourcePath: D:\codeResourcePath
logging:
level:
com.txlc.dwh.job.common.interceptor.PerformanceInterceptor: debug

Loading…
Cancel
Save