diff --git a/pom.xml b/pom.xml
index df357c1..f56cef2 100644
--- a/pom.xml
+++ b/pom.xml
@@ -14,10 +14,6 @@
compose-analysis
compose-analysis 源码溯源服务
-
- 17
-
-
org.springframework.boot
@@ -30,6 +26,13 @@
spring-cloud-starter-alibaba-nacos-discovery
+
+
+ org.springframework.cloud
+ spring-cloud-starter-loadbalancer
+
+
+
com.alibaba.cloud
@@ -86,35 +89,6 @@
-
-
- keyware-repos
- KeyWare Repository
- http://218.30.67.85:19201/nexus/content/groups/public/
-
-
- keyware-repos-2
- KeyWare Repository-2
- http://218.30.67.85:19201/nexus/content/repositories/releases/
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
@@ -126,8 +100,8 @@
org.apache.maven.plugins
maven-compiler-plugin
- 16
- 16
+ 17
+ 17
diff --git a/src/main/java/com/keyware/composeanalysis/config/GlobalExceptionHandler.java b/src/main/java/com/keyware/composeanalysis/config/GlobalExceptionHandler.java
new file mode 100644
index 0000000..14c2da8
--- /dev/null
+++ b/src/main/java/com/keyware/composeanalysis/config/GlobalExceptionHandler.java
@@ -0,0 +1,32 @@
+package com.keyware.composeanalysis.config;
+
+import com.keyware.common.base.response.Result;
+import com.keyware.common.constant.enums.ResultCode;
+import com.keyware.common.exception.BusinessException;
+import lombok.extern.log4j.Log4j2;
+import org.springframework.web.bind.annotation.ExceptionHandler;
+import org.springframework.web.bind.annotation.ResponseBody;
+import org.springframework.web.bind.annotation.RestControllerAdvice;
+
+
+@Log4j2
+@RestControllerAdvice
+public class GlobalExceptionHandler {
+
+ //全局异常处理
+ @ExceptionHandler(value = Exception.class)
+ public Result defaultErrorHandler(Exception e) {
+ log.error("全局异常信息,ex={}",e.getMessage(),e);
+ return Result.fail(ResultCode.FAIL.getCode(), e.getMessage());
+ }
+
+ //自定义异常处理
+ //业务异常
+ @ExceptionHandler(value = BusinessException.class)
+ @ResponseBody
+ public Result businessExceptionHandler(BusinessException e) {
+ log.error("业务异常信息",e);
+ return Result.fail(e.getCode(), e.getMsg());
+ }
+
+}
diff --git a/src/main/java/com/keyware/composeanalysis/mongo/MatchOpenFile.java b/src/main/java/com/keyware/composeanalysis/mongo/MatchOpenFile.java
index cb279e7..8d342d5 100644
--- a/src/main/java/com/keyware/composeanalysis/mongo/MatchOpenFile.java
+++ b/src/main/java/com/keyware/composeanalysis/mongo/MatchOpenFile.java
@@ -19,15 +19,11 @@ import java.util.List;
@Accessors(chain =true)
public class MatchOpenFile implements Serializable {
- //ID
@Id
private String id;
- //开源项目版本名称
- private String version;
-
- //组件版本id
- private String versionId;
+ //匹配的开源文件名称
+ private String fileName;
//开源项目名称
private String pName;
@@ -35,28 +31,28 @@ public class MatchOpenFile implements Serializable {
//开源项目id
private String pId;
+ //匹配的开源文件所在项目版本名称
+ private String version;
+
+ //版本id
+ private String versionId;
+
+ //开源文件的详细路径
+ private String sourceFilePath;
+
//与被测文件的特征相似度
private Float featureSimilarity;
+ //文件开源率
+ private Float openRate;
+
//开源地址
private String sourceUrl;
- //开源文件的详细路径
- private String sourceFilePath;
-
//开源许可协议类型
private List licenseType;
- //长度
- private Integer fileSize;
-
//文件MD5值
private String md5;
- //分析类型 (0文件,1函数 2:代码块 3:行)
- private int analyzeType;
-
- //文件开源率
- private Float openRate;
-
}
diff --git a/src/main/java/com/keyware/composeanalysis/mongo/MatchOpenFileMongoDto.java b/src/main/java/com/keyware/composeanalysis/mongo/MatchOpenFileMongoDto.java
index 55b663e..2033db8 100644
--- a/src/main/java/com/keyware/composeanalysis/mongo/MatchOpenFileMongoDto.java
+++ b/src/main/java/com/keyware/composeanalysis/mongo/MatchOpenFileMongoDto.java
@@ -20,39 +20,32 @@ import java.util.List;
@Accessors(chain =true)
public class MatchOpenFileMongoDto implements Serializable {
+ //ID
@Id
private String id;
- /**
- * 文件名称
- */
+ //文件名称
private String fileName;
- /**
- * 文件路径
- */
- @Deprecated
+ //文件路径
private String filePath;
- /**
- * 当前 文件 是否 开源 false:不开源 true:开源
- */
- private Boolean openType;
+ //分析类型
+ private Integer analysisType;
+ //与被测文件的特征相似度
+ private Float featureSimilarity;
- /**
- * 当前文件的开源率
- */
- private float openRate;
+ //文件开源率
+ private Float openRate;
- /**
- * 特征相似度
- */
- private Float featureSimilarity;
+ //文件MD5值
+ private String md5;
+
+ //当前文件是否开源
+ private Boolean openType;
- /**
- * 匹配的开源文件信息
- */
- List matchOpenFile;
+ //匹配的开源文件信息
+ List subMatchOpenFiles;
}
diff --git a/src/main/java/com/keyware/composeanalysis/solr/FunctionInfo.java b/src/main/java/com/keyware/composeanalysis/solr/FunctionInfo.java
new file mode 100644
index 0000000..ebe77fa
--- /dev/null
+++ b/src/main/java/com/keyware/composeanalysis/solr/FunctionInfo.java
@@ -0,0 +1,26 @@
+package com.keyware.composeanalysis.solr;
+
+import lombok.Data;
+import lombok.experimental.Accessors;
+
+import java.math.BigDecimal;
+
+/**
+ * @author liuzongren
+ * @date 2024/9/23
+ * @description solr库中 函数结构体对象
+ */
+@Data
+@Accessors(chain = true)
+public class FunctionInfo {
+ //函数名称
+ private String funName;
+ //特征函数MD5
+ private String traitFunMd5;
+ //函数字符长度
+ private BigDecimal funSize;
+ //函数有效代码行数
+ private int codeRowNum;
+ //原函数MD5
+ private String cutFunMd5;
+}
diff --git a/src/main/java/com/keyware/composeanalysis/task/CodeBlockAnalysisTask.java b/src/main/java/com/keyware/composeanalysis/task/CodeBlockAnalysisTask.java
index c6f92ad..ec4dc35 100644
--- a/src/main/java/com/keyware/composeanalysis/task/CodeBlockAnalysisTask.java
+++ b/src/main/java/com/keyware/composeanalysis/task/CodeBlockAnalysisTask.java
@@ -3,6 +3,7 @@ package com.keyware.composeanalysis.task;
import cn.hutool.core.collection.CollUtil;
import cn.hutool.core.collection.CollectionUtil;
+import cn.hutool.core.io.FileUtil;
import cn.hutool.core.lang.Pair;
import com.alibaba.fastjson.JSONArray;
import com.keyware.common.constant.enums.AnalysisStatusEnum;
@@ -203,16 +204,20 @@ public class CodeBlockAnalysisTask extends IAnalysisTask {
analysisFile.setOpenType(true);
}
+
+ //保存当前文件开源行数
+ analysisFile.setOpenLineCount(matchedLineRowsNum.size());
+
//保存当前文件的开源信息到mongo库中
MatchOpenFileMongoDto matchOpenFileMongo = new MatchOpenFileMongoDto();
matchOpenFileMongo.setId(IdGenerator.uuid32())
- .setFilePath(analysisFile.getFileUrl())
.setFileName(analysisFile.getName())
+ .setFilePath(analysisFile.getFileUrl())
.setFeatureSimilarity(featureSimilarity.floatValue())
.setOpenRate(openRate.floatValue())
.setOpenType(analysisFile.getOpenType())
- .setMatchOpenFile(matchOpenFilesRes);
-
+ .setAnalysisType(AnalysisLevelEnum.BLOCK_LEVEL.getCode())
+ .setSubMatchOpenFiles(matchOpenFilesRes);
mongoTemplate.save(matchOpenFileMongo);
}
@@ -241,6 +246,13 @@ public class CodeBlockAnalysisTask extends IAnalysisTask {
//按照特征行进行分组,一次匹配中,将所有的特征行进行累加
Map traitsFeatureMd5AndFeatureLineNumMap = getTraitsFeatureMd5AndFeatureLineNumMap(fileAnalysisRes.getLine_hay());
+
+ //被测件文本内容
+ String sourcefileContent= FileUtil.readUtf8String(analysisFile.getFileUrl());
+
+ //将文本内容解析成行信息,用于后续文件的开源率计算
+ List analysisFileLineInfo = SimilarityUtil.getSplitWords(sourcefileContent);
+
for (SolrDocument matchFile : matchOpenFiles) {
//开源文件md5
@@ -280,7 +292,7 @@ public class CodeBlockAnalysisTask extends IAnalysisTask {
}
//当前文件的开源率
- Pair> openRateAndSaveRowNum = getOpenRateAndSaveRowNum(fileAnalysisRes.getSourceFileContent(), openSourceContent.getFieldValue("sourceContent").toString());
+ Pair> openRateAndSaveRowNum = getOpenRateAndSaveRowNum(analysisFileLineInfo, openSourceContent.getFieldValue("sourceContent").toString());
//将当前文件匹配的行号,存储到缓存中,方便统计整体的开源率
matchLineRowsNum.addAll(openRateAndSaveRowNum.getValue());
@@ -295,16 +307,21 @@ public class CodeBlockAnalysisTask extends IAnalysisTask {
continue;
}
+ String openFilePath = (String) openEntries.get("fullPath");
//组装当前开源文件的开源项目信息
MatchOpenFile matchOpenFileInfo = new MatchOpenFile();
- matchOpenFileInfo.setPId(versionInfo.getProId())
+ matchOpenFileInfo.setId(IdGenerator.uuid32())
+ .setFileName(FileUtil.getName(openFilePath))
.setPName(versionInfo.getProName())
- .setSourceUrl((String) openEntries.get("fullPath"))
- .setFeatureSimilarity(featureSimilarity.floatValue())
- .setOpenRate(openRateAndSaveRowNum.getKey())
+ .setPId(versionInfo.getProId())
.setVersion(versionInfo.getVersionName())
+ .setVersionId(versionInfo.getVersionId())
+ .setSourceFilePath(openFilePath)
+ .setSourceUrl(versionInfo.getDownUrl())
.setLicenseType(versionInfo.getLicenseType())
- .setAnalyzeType(AnalysisLevelEnum.BLOCK_LEVEL.getCode());
+ .setFeatureSimilarity(featureSimilarity.floatValue())
+ .setOpenRate(openRateAndSaveRowNum.getKey())
+ .setMd5(openSourceFileMd5);
matchOpenFilesRes.add(matchOpenFileInfo);
}
return matchOpenFilesRes;
diff --git a/src/main/java/com/keyware/composeanalysis/task/FileAnalysisTask.java b/src/main/java/com/keyware/composeanalysis/task/FileAnalysisTask.java
index 339d0b8..97fb6c3 100644
--- a/src/main/java/com/keyware/composeanalysis/task/FileAnalysisTask.java
+++ b/src/main/java/com/keyware/composeanalysis/task/FileAnalysisTask.java
@@ -1,5 +1,7 @@
package com.keyware.composeanalysis.task;
+import cn.hutool.core.collection.CollUtil;
+import cn.hutool.core.io.FileUtil;
import cn.hutool.core.lang.Pair;
import com.keyware.common.constant.enums.AnalysisStatusEnum;
import com.keyware.composeanalysis.constant.FixedValue;
@@ -25,11 +27,8 @@ import org.springframework.data.mongodb.core.query.Update;
import java.io.IOException;
import java.math.BigDecimal;
import java.math.RoundingMode;
-import java.nio.file.Files;
-import java.nio.file.Paths;
import java.util.*;
import java.util.concurrent.CountDownLatch;
-import java.util.function.Function;
import java.util.stream.Collectors;
import static org.springframework.data.mongodb.core.query.Criteria.where;
@@ -140,6 +139,10 @@ public class FileAnalysisTask extends IAnalysisTask {
*/
private void ananlyzeFileOpenRate(SolrDocumentList openSourceFileList, CodeFile fileAnalysisRes) {
+ if (CollUtil.isEmpty(openSourceFileList)){
+ return;
+ }
+
HashSet openLineNum = new HashSet<>();
//计算每个文件的开源率和特征相似度
@@ -148,7 +151,7 @@ public class FileAnalysisTask extends IAnalysisTask {
//获取开源率阈值,判断当前文件是否开源
Integer openRateThreshold = analysisTask.getOpenRateThreshold();
- BigDecimal openRate = new BigDecimal(openLineNum.size()).divide(fileAnalysisRes.getCodeRowNum(), 4, RoundingMode.HALF_UP).multiply(new BigDecimal(100)).setScale(2);
+ BigDecimal openRate = new BigDecimal(openLineNum.size()).divide(new BigDecimal(analysisFile.getCodeRowNum()), 4, RoundingMode.HALF_UP).multiply(new BigDecimal(100)).setScale(2);
//超过阈值,则认为当前文件是开源文件
if (openRate.compareTo(new BigDecimal(openRateThreshold)) > 0) {
@@ -163,17 +166,18 @@ public class FileAnalysisTask extends IAnalysisTask {
//保存当前文件的开源信息到mongo库中
- MatchOpenFileMongoDto matchOpenFileMongo = new MatchOpenFileMongoDto();
- matchOpenFileMongo.setId(IdGenerator.uuid32())
- .setFilePath(analysisFile.getFileUrl())
+ MatchOpenFileMongoDto matchOpenFileInfo = new MatchOpenFileMongoDto();
+ matchOpenFileInfo.setId(IdGenerator.uuid32())
.setFileName(analysisFile.getName())
+ .setFilePath(analysisFile.getFileUrl())
+ .setOpenType(true)
.setFeatureSimilarity(100.00f)
.setOpenRate(openRate.floatValue())
- .setOpenType(analysisFile.getOpenType())
- .setMatchOpenFile(matchOpenFilesRes);
+ .setAnalysisType(AnalysisLevelEnum.FILE_LEVEL.getCode())
+ .setSubMatchOpenFiles(matchOpenFilesRes);
//保存当前开源信息数据
- mongoTemplate.insert(matchOpenFileMongo);
+ mongoTemplate.insert(matchOpenFileInfo);
}
@@ -202,6 +206,13 @@ public class FileAnalysisTask extends IAnalysisTask {
List versionTrees = solrUtils.queryBatchVersionInfoByVersionIds(openSourceFileVersionIds);
Map versionIdVersionInfoMap = versionTrees.stream().collect(Collectors.toMap(VersionTree::getVersionId, java.util.function.Function.identity()));
+
+ //被测件文本内容
+ String sourcefileContent= FileUtil.readUtf8String(analysisFile.getFileUrl());
+
+ //将文本内容解析成行信息,用于后续文件的开源率计算
+ List analysisFileLineInfo = SimilarityUtil.getSplitWords(sourcefileContent);
+
for (SolrDocument openSourceFile : matchOpenFiles) {
//开源文件md5
@@ -210,7 +221,7 @@ public class FileAnalysisTask extends IAnalysisTask {
String openFileContent = solrUtils.getOpenFileContentByMd5(openSourceFileMd5);
//当前文件的开源率
- Pair> openRateAndSaveRowNum = SimilarityUtil.getOpenRateAndSaveRowNum(fileAnalysisRes.getSourceFileContent(), openFileContent);
+ Pair> openRateAndSaveRowNum = SimilarityUtil.getOpenRateAndSaveRowNum(analysisFileLineInfo, openFileContent);
//将当前文件匹配的行号,存储到缓存中,方便统计整体的开源率
matchLineRowsNum.addAll(openRateAndSaveRowNum.getValue());
@@ -220,17 +231,21 @@ public class FileAnalysisTask extends IAnalysisTask {
log.error("找不到开源文件版本信息,versionId:{}", openEntries.get("versionId"));
}
+ String openFilePath = (String) openEntries.get("fullPath");
//组装当前开源文件的开源项目信息
- MatchOpenFile matchOpenFileInfo = new MatchOpenFile();
- matchOpenFileInfo.setPId(versionInfo.getProId())
+ MatchOpenFile matchOpenFile = new MatchOpenFile();
+ matchOpenFile.setId(IdGenerator.uuid32())
+ .setFileName(FileUtil.getName(openFilePath))
.setPName(versionInfo.getProName())
- .setSourceUrl((String) openEntries.get("fullPath"))
- .setFeatureSimilarity(100.00f)
- .setOpenRate(openRateAndSaveRowNum.getKey())
+ .setPId(versionInfo.getProId())
.setVersion(versionInfo.getVersionName())
+ .setVersionId(versionInfo.getVersionId())
+ .setSourceFilePath(openFilePath)
+ .setSourceUrl(versionInfo.getDownUrl())
.setLicenseType(versionInfo.getLicenseType())
- .setAnalyzeType(AnalysisLevelEnum.FILE_LEVEL.getCode());
- matchOpenFilesRes.add(matchOpenFileInfo);
+ .setFeatureSimilarity(100.00f)
+ .setOpenRate(openRateAndSaveRowNum.getKey());
+ matchOpenFilesRes.add(matchOpenFile);
}
return matchOpenFilesRes;
}
diff --git a/src/main/java/com/keyware/composeanalysis/task/FunctionAnalysisTask.java b/src/main/java/com/keyware/composeanalysis/task/FunctionAnalysisTask.java
index e4f9884..40c5118 100644
--- a/src/main/java/com/keyware/composeanalysis/task/FunctionAnalysisTask.java
+++ b/src/main/java/com/keyware/composeanalysis/task/FunctionAnalysisTask.java
@@ -3,9 +3,11 @@ package com.keyware.composeanalysis.task;
import cn.hutool.core.collection.CollUtil;
import cn.hutool.core.collection.CollectionUtil;
+import cn.hutool.core.io.FileUtil;
import cn.hutool.core.lang.Pair;
import com.alibaba.fastjson.JSONArray;
import com.keyware.common.constant.enums.AnalysisStatusEnum;
+import com.keyware.common.exception.BusinessException;
import com.keyware.composeanalysis.constant.FixedValue;
import com.keyware.composeanalysis.constant.RedisConst;
import com.keyware.composeanalysis.constant.SolrDBConst;
@@ -16,6 +18,7 @@ import com.keyware.composeanalysis.mongo.FileDataMongoDto;
import com.keyware.composeanalysis.mongo.LineDataMongoDto;
import com.keyware.composeanalysis.mongo.MatchOpenFile;
import com.keyware.composeanalysis.mongo.MatchOpenFileMongoDto;
+import com.keyware.composeanalysis.solr.FunctionInfo;
import com.keyware.composeanalysis.solr.VersionTree;
import com.keyware.composeanalysis.util.*;
import com.keyware.keyswan.common.LineModel;
@@ -101,7 +104,11 @@ public class FunctionAnalysisTask extends IAnalysisTask {
String sourceFileBaseCoreName = FixedValue.SUFFIX_SOLR_VERSION.get(analysisFile.getSuffix());
//根据文件的名称获取函数解析器
- Analysis analysis = AnalysisFactory.getAnalysis(filePath);
+ Analysis analysis = AnalysisFactory.getAnalysis(fileName);
+
+ if (analysis == null){
+ throw new BusinessException("获取文件解析器失败,文件名称:"+fileName);
+ }
//解析文件
CodeFile codeFile = analysis.analysisFile(new FileInputStream(filePath));
@@ -123,7 +130,7 @@ public class FunctionAnalysisTask extends IAnalysisTask {
log.info("文件" + fileName + ":函数级分析完成");
} catch (Exception e) {
AnalysisLogUtil.insertErrorInfo(mongoTemplate, "【函数级级分析】失败" + fileName, e);
- log.error("文件:" + fileName + "函数级别特征提取失败!", e);
+ log.error("文件:" + fileName + "【函数级级分析】失败!", e);
//修改当前文件分析状态未失败
mongoTemplate.update(FileDataMongoDto.class)
.matching(where("_id").is(analysisFile.getId()))
@@ -152,7 +159,7 @@ public class FunctionAnalysisTask extends IAnalysisTask {
Map> featureMd5FunctionMap = fileAnalysisRes.getFunctionList().stream().collect(Collectors.groupingBy(Function::getMd5));
//函数代码总函数
- int totalFunctionLineCount = fileAnalysisRes.getFunctionList().stream().mapToInt(Function::getCodeRowNum).sum();
+ BigDecimal totalFunctionLineCount = new BigDecimal(fileAnalysisRes.getFunctionList().stream().mapToInt(Function::getCodeRowNum).sum());
//匹配到的特征函数Md5
Set matchFeatureFunctionMd5s = new HashSet();
@@ -169,7 +176,7 @@ public class FunctionAnalysisTask extends IAnalysisTask {
matchFunctionLineCount += featureMd5FunctionMap.get(matchFeatureFunctionMd5).stream().mapToInt(Function::getCodeRowNum).sum();
}
- BigDecimal featureSimilarity = new BigDecimal(matchFunctionLineCount).divide(new BigDecimal(totalFunctionLineCount), 4, RoundingMode.HALF_UP).multiply(new BigDecimal(100)).setScale(2);
+ BigDecimal featureSimilarity = new BigDecimal(matchFunctionLineCount).divide(totalFunctionLineCount, 4, RoundingMode.HALF_UP).multiply(new BigDecimal(100)).setScale(2);
//计算文件的总体开源率
BigDecimal openRate = new BigDecimal(matchOpenLineRowsNum.size()).divide(new BigDecimal(analysisFile.getCodeRowNum()), 4, RoundingMode.HALF_UP).multiply(new BigDecimal(100)).setScale(2);
@@ -182,16 +189,19 @@ public class FunctionAnalysisTask extends IAnalysisTask {
analysisFile.setOpenType(true);
}
+ //保存当前文件开源行数
+ analysisFile.setOpenLineCount(matchOpenLineRowsNum.size());
+
//保存当前文件的开源信息到mongo库中
MatchOpenFileMongoDto matchOpenFileMongo = new MatchOpenFileMongoDto();
matchOpenFileMongo.setId(IdGenerator.uuid32())
- .setFilePath(analysisFile.getFileUrl())
.setFileName(analysisFile.getName())
+ .setFilePath(analysisFile.getFileUrl())
+ .setOpenType(analysisFile.getOpenType())
.setFeatureSimilarity(featureSimilarity.floatValue())
.setOpenRate(openRate.floatValue())
- .setOpenType(analysisFile.getOpenType())
- .setMatchOpenFile(matchOpenFilesRes);
-
+ .setAnalysisType(AnalysisLevelEnum.FUNCTION_LEVEL.getCode())
+ .setSubMatchOpenFiles(matchOpenFilesRes);
mongoTemplate.save(matchOpenFileMongo);
}
@@ -226,13 +236,20 @@ public class FunctionAnalysisTask extends IAnalysisTask {
//函数总行数
BigDecimal totalFunctionLineCount = new BigDecimal(fileAnalysisRes.getFunctionList().stream().mapToInt(Function::getCodeRowNum).sum());
+
+ //被测件文本内容
+ String sourcefileContent= FileUtil.readUtf8String(analysisFile.getFileUrl());
+
+ //将文本内容解析成行信息,用于后续文件的开源率计算
+ List analysisFileLineInfo = SimilarityUtil.getSplitWords(sourcefileContent);
+
for (SolrDocument openSourceFile : matchOpenFiles) {
//开源文件md5
String openSourceFileMd5 = openSourceFile.getFieldValue("sourceMd5").toString();
//解析文件的函数特征值
- List openFileFunctionList = getOpenFileFunctionList(openSourceFile);
+ List openFileFunctionList = getOpenFileFunctionList(openSourceFile);
//根据源文件的MD5确定需要查询源码库的序号
String openSourceCodeCoreIndex = openSourceFileMd5.substring(0, 1) + SolrDBConst.CORE_NAME_SUFFIX_SOURCE_FILE_INFO;
@@ -250,8 +267,8 @@ public class FunctionAnalysisTask extends IAnalysisTask {
for (String funFeatureMd5 : featureMd5FunctionMap.keySet()) {
List currentFueatureFunctionList = featureMd5FunctionMap.get(funFeatureMd5);
//源文件的特征函数列表
- for (Function openFunction : openFileFunctionList) {
- if (funFeatureMd5.equals(openFunction.getMd5())) {
+ for (FunctionInfo openFunction : openFileFunctionList) {
+ if (funFeatureMd5.equals(openFunction.getTraitFunMd5())) {
//每个特征函数 不能多次匹配,影响整体特征相似度
//匹配成功后,相同的特征行 一并加上
if (!currentFileMatchFeatureFunctionMd5.contains(funFeatureMd5)) {
@@ -264,7 +281,7 @@ public class FunctionAnalysisTask extends IAnalysisTask {
}
//当前文件的开源率
- Pair> openRateAndSaveRowNum = SimilarityUtil.getOpenRateAndSaveRowNum(new String(fileAnalysisRes.getFileContent()), openSourceContent.getFieldValue("sourceContent").toString());
+ Pair> openRateAndSaveRowNum = SimilarityUtil.getOpenRateAndSaveRowNum(analysisFileLineInfo, openSourceContent.getFieldValue("sourceContent").toString());
//将当前文件匹配的行号,存储到缓存中,方便统计整体的开源率
matchLineRowsNum.addAll(openRateAndSaveRowNum.getValue());
@@ -273,16 +290,22 @@ public class FunctionAnalysisTask extends IAnalysisTask {
SolrDocument openEntries = md5VersionInfoMap.get(openSourceFileMd5);
VersionTree versionInfo = versionIdVersionInfoMap.get(openEntries.get("versionId"));
+ String openFilePath = (String) md5VersionInfoMap.get(openSourceFileMd5).getFieldValue("fullPath");
+
//组装当前开源文件的开源项目信息
MatchOpenFile matchOpenFileInfo = new MatchOpenFile();
- matchOpenFileInfo.setPId(versionInfo.getProId())
+ matchOpenFileInfo.setId(IdGenerator.uuid32())
+ .setFileName(FileUtil.getName(openFilePath))
.setPName(versionInfo.getProName())
- .setSourceUrl((String) openEntries.get("fullPath"))
- .setFeatureSimilarity(featureSimilarity.floatValue())
- .setOpenRate(openRateAndSaveRowNum.getKey())
+ .setPId(versionInfo.getProId())
.setVersion(versionInfo.getVersionName())
+ .setVersionId(versionInfo.getVersionId())
+ .setSourceFilePath(openFilePath)
+ .setSourceUrl(versionInfo.getDownUrl())
.setLicenseType(versionInfo.getLicenseType())
- .setAnalyzeType(AnalysisLevelEnum.FUNCTION_LEVEL.getCode());
+ .setFeatureSimilarity(featureSimilarity.floatValue())
+ .setOpenRate(openRateAndSaveRowNum.getKey())
+ .setMd5(openSourceFileMd5);
matchOpenFilesRes.add(matchOpenFileInfo);
}
return matchOpenFilesRes;
@@ -316,18 +339,20 @@ public class FunctionAnalysisTask extends IAnalysisTask {
* @param matchOpenFile
* @return
*/
- private List getOpenFileFunctionList(SolrDocument matchOpenFile) {
+ private List getOpenFileFunctionList(SolrDocument matchOpenFile) {
try {
//解析文件的函数特征值
String lineFeatureMd5s = matchOpenFile.getFieldValue("fun_hay").toString();
- lineFeatureMd5s = lineFeatureMd5s.replace("\\", "")
+ lineFeatureMd5s = lineFeatureMd5s
+ .replace("\\\\\\\"", "")
+ .replace("\\", "")
.replace("\"{", "{")
.replace("}\"", "}");
- return JSONArray.parseArray(lineFeatureMd5s, Function.class);
+ return JSONArray.parseArray(lineFeatureMd5s, FunctionInfo.class);
} catch (Exception e) {
log.error("解析文件特征值失败", e);
}
- return new ArrayList();
+ return new ArrayList<>();
}
/**
diff --git a/src/main/java/com/keyware/composeanalysis/task/LineAnalysisTask.java b/src/main/java/com/keyware/composeanalysis/task/LineAnalysisTask.java
index ed0588c..4489aed 100644
--- a/src/main/java/com/keyware/composeanalysis/task/LineAnalysisTask.java
+++ b/src/main/java/com/keyware/composeanalysis/task/LineAnalysisTask.java
@@ -2,6 +2,7 @@ package com.keyware.composeanalysis.task;
import cn.hutool.core.collection.CollectionUtil;
+import cn.hutool.core.io.FileUtil;
import cn.hutool.core.lang.Pair;
import cn.hutool.core.util.StrUtil;
import com.keyware.common.constant.enums.AnalysisStatusEnum;
@@ -170,6 +171,9 @@ public class LineAnalysisTask extends IAnalysisTask {
analysisFile.setOpenType(true);
}
+ //保存当前文件开源行数
+ analysisFile.setOpenLineCount(matchLineRowsNum.size());
+
//保存当前文件的开源信息到mongo库中
MatchOpenFileMongoDto matchOpenFileMongo = new MatchOpenFileMongoDto();
matchOpenFileMongo.setId(IdGenerator.uuid32())
@@ -178,8 +182,8 @@ public class LineAnalysisTask extends IAnalysisTask {
.setFeatureSimilarity(featureSimilarity.floatValue())
.setOpenRate(openRate.floatValue())
.setOpenType(analysisFile.getOpenType())
- .setMatchOpenFile(matchOpenFilesRes);
- log.info("文件" + analysisFile.getName() + ":开源率:" + openRate.floatValue() + ",特征相似度:" + featureSimilarity.floatValue());
+ .setAnalysisType(AnalysisLevelEnum.LINE_LEVEL.getCode())
+ .setSubMatchOpenFiles(matchOpenFilesRes);
mongoTemplate.save(matchOpenFileMongo);
}
@@ -211,6 +215,12 @@ public class LineAnalysisTask extends IAnalysisTask {
String traitFileLineMd5 = fileAnalysisRes.getTraitFileLineMd5();
List lineFeatureList = Arrays.asList(traitFileLineMd5.split(","));
+ //被测件文本内容
+ String sourcefileContent= FileUtil.readUtf8String(analysisFile.getFileUrl());
+
+ //将文本内容解析成行信息,用于后续文件的开源率计算
+ List analysisFileLineInfo = SimilarityUtil.getSplitWords(sourcefileContent);
+
for (SolrDocument openSourceFile : matchOpenFiles) {
//开源文件MD5
@@ -239,7 +249,7 @@ public class LineAnalysisTask extends IAnalysisTask {
}
//当前文件的开源率
- Pair> openRateAndSaveRowNum = SimilarityUtil.getOpenRateAndSaveRowNum(fileAnalysisRes.getSourceFileContent(), openSourceContent);
+ Pair> openRateAndSaveRowNum = SimilarityUtil.getOpenRateAndSaveRowNum(analysisFileLineInfo, openSourceContent);
//将当前文件匹配的行号,存储到缓存中,方便统计整体的开源率
matchLineRowsNum.addAll(openRateAndSaveRowNum.getValue());
@@ -254,16 +264,21 @@ public class LineAnalysisTask extends IAnalysisTask {
continue;
}
+ String openFilePath = (String) openEntries.get("fullPath");
//组装当前开源文件的开源项目信息
MatchOpenFile matchOpenFileInfo = new MatchOpenFile();
- matchOpenFileInfo.setPId(versionInfo.getProId())
+ matchOpenFileInfo.setId(IdGenerator.uuid32())
+ .setFileName(FileUtil.getName(openFilePath))
.setPName(versionInfo.getProName())
- .setSourceUrl((String) openEntries.get("fullPath"))
- .setFeatureSimilarity(featureSimilarity.floatValue())
- .setOpenRate(openRateAndSaveRowNum.getKey())
+ .setPId(versionInfo.getProId())
.setVersion(versionInfo.getVersionName())
+ .setVersionId(versionInfo.getVersionId())
+ .setSourceFilePath(openFilePath)
+ .setSourceUrl(versionInfo.getDownUrl())
.setLicenseType(versionInfo.getLicenseType())
- .setAnalyzeType(AnalysisLevelEnum.FUNCTION_LEVEL.getCode());
+ .setFeatureSimilarity(featureSimilarity.floatValue())
+ .setOpenRate(openRateAndSaveRowNum.getKey())
+ .setMd5(openSourceFileMd5);
matchOpenFilesRes.add(matchOpenFileInfo);
}
return matchOpenFilesRes;
diff --git a/src/main/java/com/keyware/composeanalysis/task/PorjectAnalysisTask.java b/src/main/java/com/keyware/composeanalysis/task/PorjectAnalysisTask.java
index 40561e1..8306735 100644
--- a/src/main/java/com/keyware/composeanalysis/task/PorjectAnalysisTask.java
+++ b/src/main/java/com/keyware/composeanalysis/task/PorjectAnalysisTask.java
@@ -1,6 +1,7 @@
package com.keyware.composeanalysis.task;
import cn.hutool.core.collection.CollectionUtil;
+import cn.hutool.core.io.FileUtil;
import com.google.common.collect.Sets;
import com.keyware.common.constant.enums.AnalysisStatusEnum;
import com.keyware.composeanalysis.constant.FixedValue;
@@ -14,14 +15,12 @@ import com.keyware.composeanalysis.solr.VersionTree;
import com.keyware.composeanalysis.solr.VersionTreeNode;
import com.keyware.composeanalysis.util.AnalysisLogUtil;
import com.keyware.composeanalysis.util.SolrUtils;
-import com.keyware.composeanalysis.util.SpringContextUtils;
import com.keyware.utils.IdGenerator;
import com.mongodb.client.MongoClient;
import lombok.extern.log4j.Log4j2;
import org.apache.commons.collections.CollectionUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.solr.common.SolrDocument;
-import org.springframework.core.task.TaskExecutor;
import org.springframework.data.mongodb.core.MongoTemplate;
import org.springframework.data.mongodb.core.query.Query;
import org.springframework.data.mongodb.core.query.Update;
@@ -48,7 +47,6 @@ public class PorjectAnalysisTask {
private AnalysisTask analysisTask;
private AnalysisTaskServiceImpl analysisService;
private SolrUtils solrUtils;
- private TaskExecutor taskExecutor;
/**
* 项目级分析
@@ -64,7 +62,6 @@ public class PorjectAnalysisTask {
this.mongoTemplate = new MongoTemplate(mongoClient, MongoDBConst.DB_NAME_PREFIX + analysisTask.getId());
this.analysisTask = analysisTask;
this.solrUtils = solrUtils;
- this.taskExecutor = SpringContextUtils.getBean(TaskExecutor.class);
}
@@ -123,8 +120,6 @@ public class PorjectAnalysisTask {
//当前文件开源信息存入数据库中
mongoTemplate.insert(projectAssembly);
- analysisService.updateById(analysisTask);
-
//更新文件分析的状态
mongoTemplate.update(FileDataMongoDto.class)
.matching(where("isDirectory").is(false))
@@ -196,12 +191,13 @@ public class PorjectAnalysisTask {
Map md5VersionIdMap = solrUtils.batchQueryVersionIdFromSourceFileBaseBySourceMd5(MongoDBConst.TABLE_NAME_SOURCE_FILE_BASE, fileMd5s);
if (md5VersionIdMap == null || md5VersionIdMap.isEmpty()) {
//如果没有匹配到,直接更新文件分析状态已完成,因为非32种语言的文件,无法进行解析,通过源文件的MD5匹配不到,就匹配不到了,无需进行下一步的匹配
- updateFileAnalysisStatus(fileMd5s);
+ updateFileAnalysisStatus(fileMd5s,true);
return;
}
saveMatchOpenFileInfo(md5VersionIdMap, otherLanguageFiles);
- //直接更改没有匹配的文件分析状态
- updateFileAnalysisStatus(Sets.difference(fileMd5s, md5VersionIdMap.keySet()));
+ //直接更改没有匹配的文件分析状态,因为没有匹配上的文件,无法进行下一步的匹配(下一步的匹配只针对32种主流的语言)
+ Set notMatchFileMd5s = Sets.difference(fileMd5s, md5VersionIdMap.keySet());
+ updateFileAnalysisStatus(notMatchFileMd5s,false);
}
}
@@ -231,7 +227,7 @@ public class PorjectAnalysisTask {
if (CollectionUtils.isNotEmpty(batchInsertCache)) {
mongoTemplate.insert(batchInsertCache, MatchOpenFileMongoDto.class);
//更新文件分析的状态
- updateFileAnalysisStatus(md5VersionIdMap.keySet());
+ updateFileAnalysisStatus(md5VersionIdMap.keySet(),true);
}
}
@@ -266,7 +262,7 @@ public class PorjectAnalysisTask {
}
//更新文件分析的状态
- updateFileAnalysisStatus(matchedMd5s);
+ updateFileAnalysisStatus(matchedMd5s,true);
}
//获取匹配到的开源文件信息
@@ -274,14 +270,15 @@ public class PorjectAnalysisTask {
//设置匹配文件的信息
MatchOpenFile matchOpenFile = new MatchOpenFile();
matchOpenFile.setId(IdGenerator.uuid32())
+ .setFileName(FileUtil.getName(openFilePath))
+ .setPName(versionInfo.getProName())
+ .setPId(versionInfo.getProId())
+ .setVersion(versionInfo.getVersionName())
.setVersionId(versionInfo.getVersionId())
.setSourceFilePath(openFilePath)
.setSourceUrl(versionInfo.getDownUrl())
- .setPId(versionInfo.getProId())
- .setPName(versionInfo.getProName())
.setLicenseType(versionInfo.getLicenseType())
- .setAnalyzeType(AnalysisLevelEnum.FILE_LEVEL.getCode())
- .setVersion(versionInfo.getVersionName())
+ .setMd5(originalFile.getMd5())
.setFeatureSimilarity(100.00f)
.setOpenRate(100.00f);
@@ -293,7 +290,8 @@ public class PorjectAnalysisTask {
.setOpenType(true)
.setFeatureSimilarity(100.00f)
.setOpenRate(100.00f)
- .setMatchOpenFile(Arrays.asList(matchOpenFile));
+ .setAnalysisType(AnalysisLevelEnum.FILE_LEVEL.getCode())
+ .setSubMatchOpenFiles(Arrays.asList(matchOpenFile));
return matchOpenFileInfo;
}
@@ -325,8 +323,8 @@ public class PorjectAnalysisTask {
break;
}
- //异步保存匹配的开源文件信息
- taskExecutor.execute(() -> saveProjectOpenInfo(openProject, projectFiles));
+ //保存匹配的开源文件信息
+ saveProjectOpenInfo(openProject, projectFiles);
//获取开源项目的所有文件md5集合
List openFilesMd5 = openProject.getDirTree().stream().map(VersionTreeNode::getSourceFileMd5).collect(Collectors.toList());
@@ -339,8 +337,8 @@ public class PorjectAnalysisTask {
//计算与当前项目的相似度
BigDecimal semblance = new BigDecimal(matchedFiles.size()).divide(new BigDecimal(projectFilesMd5.size()), 4, RoundingMode.HALF_UP).multiply(new BigDecimal(100));
- //当相似度小于30%,不保存项目级的信息
- if (semblance.compareTo(new BigDecimal(30)) < 0){
+ //当相似度小于20%,不保存项目级的信息
+ if (semblance.compareTo(new BigDecimal(20)) < 0){
break;
}
@@ -366,11 +364,11 @@ public class PorjectAnalysisTask {
}
//更新文件分析的状态
- private void updateFileAnalysisStatus(Set fileMd5Set) {
+ private void updateFileAnalysisStatus(Set fileMd5Set, Boolean openType) {
mongoTemplate.update(FileDataMongoDto.class)
.matching(where("md5").in(fileMd5Set))
- .apply(new Update().set("openType", true)
- .set("openRate", 100.00f)
+ .apply(new Update().set("openType", openType)
+ .set("openRate", openType ? 100.00f : 0f)
.set("fileAnalysisStatus", FileAnalysisStatusEnum.ANALYSIS_DONE.getCode()))
.all();
}
diff --git a/src/main/java/com/keyware/composeanalysis/util/SimilarityUtil.java b/src/main/java/com/keyware/composeanalysis/util/SimilarityUtil.java
index cbc1565..8f57954 100644
--- a/src/main/java/com/keyware/composeanalysis/util/SimilarityUtil.java
+++ b/src/main/java/com/keyware/composeanalysis/util/SimilarityUtil.java
@@ -1,5 +1,6 @@
package com.keyware.composeanalysis.util;
+import cn.hutool.core.collection.CollUtil;
import cn.hutool.core.lang.Pair;
import cn.hutool.core.util.ArrayUtil;
import cn.hutool.core.util.ByteUtil;
@@ -54,65 +55,6 @@ public class SimilarityUtil {
}
- /**
- * 获取开源率和开源行号
- * @param analysisFile 被测件内容
- * @param openSourceFile 开源文件内容
- * @return
- */
-// public static Pair> getOpenRateAndSaveRowNum(String analysisFile, String openSourceFile) {
-// if (StrUtil.hasBlank(analysisFile,openSourceFile)){
-// return new Pair<>(0.00f,new HashSet<>());
-// }
-// //匹配到的行号
-// HashSet matchedRowsNum = new HashSet<>();
-//
-// //被测件文件行
-// List analysisFileLineInfo = getSplitWords(analysisFile);
-//
-// //溯源到文件行
-// HashSet openSourceFileLineInfo = getSplitWords1(openSourceFile);
-//
-// for (int i = 0; i < analysisFileLineInfo.size(); i++) {
-// String sent1Word = analysisFileLineInfo.get(i);
-// if (openSourceFileLineInfo.contains(sent1Word)) {
-// matchedRowsNum.add(i);
-// }
-// }
-//
-// //计算开源率
-// BigDecimal openRate = new BigDecimal(matchedRowsNum.size()).divide(new BigDecimal(analysisFileLineInfo.size()), 4, RoundingMode.HALF_UP).multiply(new BigDecimal(100)).setScale(2);
-//
-// return new Pair<>(openRate.toString(), matchedRowsNum);
-// }
-
-
-// public static Pair> getOpenRateAndSaveRowNum(byte[] analysisFile, byte[] openSourceFile) {
-// if (ArrayUtil.hasNull(analysisFile,openSourceFile)){
-// return new Pair<>(0.00f,new HashSet<>());
-// }
-// //匹配到的行号
-// HashSet matchedRowsNum = new HashSet<>();
-//
-// //被测件文件行
-// List analysisFileLineInfo = getSplitWords(new String(analysisFile));
-//
-// //溯源到文件行
-// HashSet openSourceFileLineInfo = getSplitWords1(new String(openSourceFile));
-//
-// for (int i = 0; i < analysisFileLineInfo.size(); i++) {
-// String sent1Word = analysisFileLineInfo.get(i);
-// if (openSourceFileLineInfo.contains(sent1Word)) {
-// matchedRowsNum.add(i);
-// }
-// }
-//
-// //计算开源率
-// BigDecimal openRate = new BigDecimal(matchedRowsNum.size()).divide(new BigDecimal(analysisFileLineInfo.size()), 4, RoundingMode.HALF_UP).multiply(new BigDecimal(100)).setScale(2);
-//
-// return new Pair<>(openRate.toString(), matchedRowsNum);
-// }
-
public static Pair> getOpenRateAndSaveRowNum(String analysisFile, String openSourceFile) {
if (StrUtil.hasBlank(analysisFile,openSourceFile)){
return new Pair<>(0.00f,new HashSet<>());
@@ -140,6 +82,35 @@ public class SimilarityUtil {
}
+ public static Pair> getOpenRateAndSaveRowNum(List analysisFileLineInfo , String openSourceFile) {
+ if (CollUtil.isEmpty(analysisFileLineInfo) || StrUtil.isBlank(openSourceFile)){
+ return new Pair<>(0.00f,new HashSet<>());
+ }
+
+ //匹配到的行号
+ HashSet matchedRowsNum = new HashSet<>();
+
+ //溯源到文件行
+ HashSet openSourceFileLineInfo = getSplitWords1(openSourceFile);
+
+ for (int i = 0; i < analysisFileLineInfo.size(); i++) {
+ String sent1Word = analysisFileLineInfo.get(i);
+ if (openSourceFileLineInfo.contains(sent1Word)) {
+ matchedRowsNum.add(i);
+ }
+ }
+
+ //计算开源率
+ BigDecimal openRate = new BigDecimal(matchedRowsNum.size()).divide(new BigDecimal(analysisFileLineInfo.size()), 4, RoundingMode.HALF_UP).multiply(new BigDecimal(100)).setScale(2);
+
+ return new Pair<>(openRate.floatValue(), matchedRowsNum);
+ }
+
+
+
+
+
+
/**
* 获得两个文件的相似度,并将被匹配的行
* @param matchLineInfos 被匹配的行信息
diff --git a/src/main/resources/application.yaml b/src/main/resources/application.yaml
index fc22299..e9f435a 100644
--- a/src/main/resources/application.yaml
+++ b/src/main/resources/application.yaml
@@ -1,12 +1,14 @@
spring:
+ application:
+ name: compose-analysis
cloud:
nacos:
discovery:
- server-addr: 172.16.36.7:8848
- namespace: 2fad0ca9-bc32-4afd-9f2e-ebc133d5e781
+ server-addr: 127.0.0.1:8848
+ namespace: 4ce70f33-8b88-4931-a88c-2b68e7259bd7
config:
- server-addr: 172.16.36.7:8848
- namespace: 2fad0ca9-bc32-4afd-9f2e-ebc133d5e781
+ server-addr: 127.0.0.1:8848
+ namespace: 4ce70f33-8b88-4931-a88c-2b68e7259bd7
file-extension: yaml
config:
import: nacos:compose-analysis-dev.yaml