1.优化相似度对比逻辑

master
liuzongren 7 months ago
parent 380e5b282a
commit 890ed17d84
  1. 44
      pom.xml
  2. 32
      src/main/java/com/keyware/composeanalysis/config/GlobalExceptionHandler.java
  3. 32
      src/main/java/com/keyware/composeanalysis/mongo/MatchOpenFile.java
  4. 39
      src/main/java/com/keyware/composeanalysis/mongo/MatchOpenFileMongoDto.java
  5. 26
      src/main/java/com/keyware/composeanalysis/solr/FunctionInfo.java
  6. 35
      src/main/java/com/keyware/composeanalysis/task/CodeBlockAnalysisTask.java
  7. 51
      src/main/java/com/keyware/composeanalysis/task/FileAnalysisTask.java
  8. 67
      src/main/java/com/keyware/composeanalysis/task/FunctionAnalysisTask.java
  9. 31
      src/main/java/com/keyware/composeanalysis/task/LineAnalysisTask.java
  10. 44
      src/main/java/com/keyware/composeanalysis/task/PorjectAnalysisTask.java
  11. 89
      src/main/java/com/keyware/composeanalysis/util/SimilarityUtil.java
  12. 10
      src/main/resources/application.yaml

@ -14,10 +14,6 @@
<name>compose-analysis</name> <name>compose-analysis</name>
<description>compose-analysis 源码溯源服务</description> <description>compose-analysis 源码溯源服务</description>
<properties>
<java.version>17</java.version>
</properties>
<dependencies> <dependencies>
<dependency> <dependency>
<groupId>org.springframework.boot</groupId> <groupId>org.springframework.boot</groupId>
@ -30,6 +26,13 @@
<artifactId>spring-cloud-starter-alibaba-nacos-discovery</artifactId> <artifactId>spring-cloud-starter-alibaba-nacos-discovery</artifactId>
</dependency> </dependency>
<!--客户端负载均衡loadbalancer-->
<dependency>
<groupId>org.springframework.cloud</groupId>
<artifactId>spring-cloud-starter-loadbalancer</artifactId>
</dependency>
<!-- nacos 配置中心做依赖管理 --> <!-- nacos 配置中心做依赖管理 -->
<dependency> <dependency>
<groupId>com.alibaba.cloud</groupId> <groupId>com.alibaba.cloud</groupId>
@ -86,35 +89,6 @@
</dependencies> </dependencies>
<repositories>
<repository>
<id>keyware-repos</id>
<name>KeyWare Repository</name>
<url>http://218.30.67.85:19201/nexus/content/groups/public/</url>
</repository>
<repository>
<id>keyware-repos-2</id>
<name>KeyWare Repository-2</name>
<url>http://218.30.67.85:19201/nexus/content/repositories/releases/</url>
</repository>
<!-- <repository>-->
<!-- <id>aliyun-repository</id>-->
<!-- <name>aliyun repository</name>-->
<!-- <url>https://maven.aliyun.com/repository/public/</url>-->
<!-- </repository>-->
<!-- <repository>-->
<!-- <id>aliyun-repos</id>-->
<!-- <name>Aliyun Repository</name>-->
<!-- <url>http://maven.aliyun.com/nexus/content/groups/public</url>-->
<!-- <releases>-->
<!-- <enabled>true</enabled>-->
<!-- </releases>-->
<!-- <snapshots>-->
<!-- <enabled>false</enabled>-->
<!-- </snapshots>-->
<!-- </repository>-->
</repositories>
<build> <build>
<plugins> <plugins>
<plugin> <plugin>
@ -126,8 +100,8 @@
<groupId>org.apache.maven.plugins</groupId> <groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId> <artifactId>maven-compiler-plugin</artifactId>
<configuration> <configuration>
<source>16</source> <source>17</source>
<target>16</target> <target>17</target>
</configuration> </configuration>
</plugin> </plugin>
</plugins> </plugins>

@ -0,0 +1,32 @@
package com.keyware.composeanalysis.config;
import com.keyware.common.base.response.Result;
import com.keyware.common.constant.enums.ResultCode;
import com.keyware.common.exception.BusinessException;
import lombok.extern.log4j.Log4j2;
import org.springframework.web.bind.annotation.ExceptionHandler;
import org.springframework.web.bind.annotation.ResponseBody;
import org.springframework.web.bind.annotation.RestControllerAdvice;
@Log4j2
@RestControllerAdvice
public class GlobalExceptionHandler {
//全局异常处理
@ExceptionHandler(value = Exception.class)
public Result defaultErrorHandler(Exception e) {
log.error("全局异常信息,ex={}",e.getMessage(),e);
return Result.fail(ResultCode.FAIL.getCode(), e.getMessage());
}
//自定义异常处理
//业务异常
@ExceptionHandler(value = BusinessException.class)
@ResponseBody
public Result businessExceptionHandler(BusinessException e) {
log.error("业务异常信息",e);
return Result.fail(e.getCode(), e.getMsg());
}
}

@ -19,15 +19,11 @@ import java.util.List;
@Accessors(chain =true) @Accessors(chain =true)
public class MatchOpenFile implements Serializable { public class MatchOpenFile implements Serializable {
//ID
@Id @Id
private String id; private String id;
//开源项目版本名称 //匹配的开源文件名称
private String version; private String fileName;
//组件版本id
private String versionId;
//开源项目名称 //开源项目名称
private String pName; private String pName;
@ -35,28 +31,28 @@ public class MatchOpenFile implements Serializable {
//开源项目id //开源项目id
private String pId; private String pId;
//匹配的开源文件所在项目版本名称
private String version;
//版本id
private String versionId;
//开源文件的详细路径
private String sourceFilePath;
//与被测文件的特征相似度 //与被测文件的特征相似度
private Float featureSimilarity; private Float featureSimilarity;
//文件开源率
private Float openRate;
//开源地址 //开源地址
private String sourceUrl; private String sourceUrl;
//开源文件的详细路径
private String sourceFilePath;
//开源许可协议类型 //开源许可协议类型
private List<String> licenseType; private List<String> licenseType;
//长度
private Integer fileSize;
//文件MD5值 //文件MD5值
private String md5; private String md5;
//分析类型 (0文件,1函数 2:代码块 3:行)
private int analyzeType;
//文件开源率
private Float openRate;
} }

@ -20,39 +20,32 @@ import java.util.List;
@Accessors(chain =true) @Accessors(chain =true)
public class MatchOpenFileMongoDto implements Serializable { public class MatchOpenFileMongoDto implements Serializable {
//ID
@Id @Id
private String id; private String id;
/** //文件名称
* 文件名称
*/
private String fileName; private String fileName;
/** //文件路径
* 文件路径
*/
@Deprecated
private String filePath; private String filePath;
/** //分析类型
* 当前 文件 是否 开源 false:不开源 true:开源 private Integer analysisType;
*/
private Boolean openType;
//与被测文件的特征相似度
private Float featureSimilarity;
/** //文件开源率
* 当前文件的开源率 private Float openRate;
*/
private float openRate;
/** //文件MD5值
* 特征相似度 private String md5;
*/
private Float featureSimilarity;
/** //当前文件是否开源
* 匹配的开源文件信息 private Boolean openType;
*/
List<MatchOpenFile> matchOpenFile; //匹配的开源文件信息
List<MatchOpenFile> subMatchOpenFiles;
} }

@ -0,0 +1,26 @@
package com.keyware.composeanalysis.solr;
import lombok.Data;
import lombok.experimental.Accessors;
import java.math.BigDecimal;
/**
* @author liuzongren
* @date 2024/9/23
* @description solr库中 函数结构体对象
*/
@Data
@Accessors(chain = true)
public class FunctionInfo {
//函数名称
private String funName;
//特征函数MD5
private String traitFunMd5;
//函数字符长度
private BigDecimal funSize;
//函数有效代码行数
private int codeRowNum;
//原函数MD5
private String cutFunMd5;
}

@ -3,6 +3,7 @@ package com.keyware.composeanalysis.task;
import cn.hutool.core.collection.CollUtil; import cn.hutool.core.collection.CollUtil;
import cn.hutool.core.collection.CollectionUtil; import cn.hutool.core.collection.CollectionUtil;
import cn.hutool.core.io.FileUtil;
import cn.hutool.core.lang.Pair; import cn.hutool.core.lang.Pair;
import com.alibaba.fastjson.JSONArray; import com.alibaba.fastjson.JSONArray;
import com.keyware.common.constant.enums.AnalysisStatusEnum; import com.keyware.common.constant.enums.AnalysisStatusEnum;
@ -203,16 +204,20 @@ public class CodeBlockAnalysisTask extends IAnalysisTask {
analysisFile.setOpenType(true); analysisFile.setOpenType(true);
} }
//保存当前文件开源行数
analysisFile.setOpenLineCount(matchedLineRowsNum.size());
//保存当前文件的开源信息到mongo库中 //保存当前文件的开源信息到mongo库中
MatchOpenFileMongoDto matchOpenFileMongo = new MatchOpenFileMongoDto(); MatchOpenFileMongoDto matchOpenFileMongo = new MatchOpenFileMongoDto();
matchOpenFileMongo.setId(IdGenerator.uuid32()) matchOpenFileMongo.setId(IdGenerator.uuid32())
.setFilePath(analysisFile.getFileUrl())
.setFileName(analysisFile.getName()) .setFileName(analysisFile.getName())
.setFilePath(analysisFile.getFileUrl())
.setFeatureSimilarity(featureSimilarity.floatValue()) .setFeatureSimilarity(featureSimilarity.floatValue())
.setOpenRate(openRate.floatValue()) .setOpenRate(openRate.floatValue())
.setOpenType(analysisFile.getOpenType()) .setOpenType(analysisFile.getOpenType())
.setMatchOpenFile(matchOpenFilesRes); .setAnalysisType(AnalysisLevelEnum.BLOCK_LEVEL.getCode())
.setSubMatchOpenFiles(matchOpenFilesRes);
mongoTemplate.save(matchOpenFileMongo); mongoTemplate.save(matchOpenFileMongo);
} }
@ -241,6 +246,13 @@ public class CodeBlockAnalysisTask extends IAnalysisTask {
//按照特征行进行分组,一次匹配中,将所有的特征行进行累加 //按照特征行进行分组,一次匹配中,将所有的特征行进行累加
Map<String, Integer> traitsFeatureMd5AndFeatureLineNumMap = getTraitsFeatureMd5AndFeatureLineNumMap(fileAnalysisRes.getLine_hay()); Map<String, Integer> traitsFeatureMd5AndFeatureLineNumMap = getTraitsFeatureMd5AndFeatureLineNumMap(fileAnalysisRes.getLine_hay());
//被测件文本内容
String sourcefileContent= FileUtil.readUtf8String(analysisFile.getFileUrl());
//将文本内容解析成行信息,用于后续文件的开源率计算
List<String> analysisFileLineInfo = SimilarityUtil.getSplitWords(sourcefileContent);
for (SolrDocument matchFile : matchOpenFiles) { for (SolrDocument matchFile : matchOpenFiles) {
//开源文件md5 //开源文件md5
@ -280,7 +292,7 @@ public class CodeBlockAnalysisTask extends IAnalysisTask {
} }
//当前文件的开源率 //当前文件的开源率
Pair<Float, HashSet<Integer>> openRateAndSaveRowNum = getOpenRateAndSaveRowNum(fileAnalysisRes.getSourceFileContent(), openSourceContent.getFieldValue("sourceContent").toString()); Pair<Float, HashSet<Integer>> openRateAndSaveRowNum = getOpenRateAndSaveRowNum(analysisFileLineInfo, openSourceContent.getFieldValue("sourceContent").toString());
//将当前文件匹配的行号,存储到缓存中,方便统计整体的开源率 //将当前文件匹配的行号,存储到缓存中,方便统计整体的开源率
matchLineRowsNum.addAll(openRateAndSaveRowNum.getValue()); matchLineRowsNum.addAll(openRateAndSaveRowNum.getValue());
@ -295,16 +307,21 @@ public class CodeBlockAnalysisTask extends IAnalysisTask {
continue; continue;
} }
String openFilePath = (String) openEntries.get("fullPath");
//组装当前开源文件的开源项目信息 //组装当前开源文件的开源项目信息
MatchOpenFile matchOpenFileInfo = new MatchOpenFile(); MatchOpenFile matchOpenFileInfo = new MatchOpenFile();
matchOpenFileInfo.setPId(versionInfo.getProId()) matchOpenFileInfo.setId(IdGenerator.uuid32())
.setFileName(FileUtil.getName(openFilePath))
.setPName(versionInfo.getProName()) .setPName(versionInfo.getProName())
.setSourceUrl((String) openEntries.get("fullPath")) .setPId(versionInfo.getProId())
.setFeatureSimilarity(featureSimilarity.floatValue())
.setOpenRate(openRateAndSaveRowNum.getKey())
.setVersion(versionInfo.getVersionName()) .setVersion(versionInfo.getVersionName())
.setVersionId(versionInfo.getVersionId())
.setSourceFilePath(openFilePath)
.setSourceUrl(versionInfo.getDownUrl())
.setLicenseType(versionInfo.getLicenseType()) .setLicenseType(versionInfo.getLicenseType())
.setAnalyzeType(AnalysisLevelEnum.BLOCK_LEVEL.getCode()); .setFeatureSimilarity(featureSimilarity.floatValue())
.setOpenRate(openRateAndSaveRowNum.getKey())
.setMd5(openSourceFileMd5);
matchOpenFilesRes.add(matchOpenFileInfo); matchOpenFilesRes.add(matchOpenFileInfo);
} }
return matchOpenFilesRes; return matchOpenFilesRes;

@ -1,5 +1,7 @@
package com.keyware.composeanalysis.task; package com.keyware.composeanalysis.task;
import cn.hutool.core.collection.CollUtil;
import cn.hutool.core.io.FileUtil;
import cn.hutool.core.lang.Pair; import cn.hutool.core.lang.Pair;
import com.keyware.common.constant.enums.AnalysisStatusEnum; import com.keyware.common.constant.enums.AnalysisStatusEnum;
import com.keyware.composeanalysis.constant.FixedValue; import com.keyware.composeanalysis.constant.FixedValue;
@ -25,11 +27,8 @@ import org.springframework.data.mongodb.core.query.Update;
import java.io.IOException; import java.io.IOException;
import java.math.BigDecimal; import java.math.BigDecimal;
import java.math.RoundingMode; import java.math.RoundingMode;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.*; import java.util.*;
import java.util.concurrent.CountDownLatch; import java.util.concurrent.CountDownLatch;
import java.util.function.Function;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import static org.springframework.data.mongodb.core.query.Criteria.where; import static org.springframework.data.mongodb.core.query.Criteria.where;
@ -140,6 +139,10 @@ public class FileAnalysisTask extends IAnalysisTask {
*/ */
private void ananlyzeFileOpenRate(SolrDocumentList openSourceFileList, CodeFile fileAnalysisRes) { private void ananlyzeFileOpenRate(SolrDocumentList openSourceFileList, CodeFile fileAnalysisRes) {
if (CollUtil.isEmpty(openSourceFileList)){
return;
}
HashSet<Integer> openLineNum = new HashSet<>(); HashSet<Integer> openLineNum = new HashSet<>();
//计算每个文件的开源率和特征相似度 //计算每个文件的开源率和特征相似度
@ -148,7 +151,7 @@ public class FileAnalysisTask extends IAnalysisTask {
//获取开源率阈值,判断当前文件是否开源 //获取开源率阈值,判断当前文件是否开源
Integer openRateThreshold = analysisTask.getOpenRateThreshold(); Integer openRateThreshold = analysisTask.getOpenRateThreshold();
BigDecimal openRate = new BigDecimal(openLineNum.size()).divide(fileAnalysisRes.getCodeRowNum(), 4, RoundingMode.HALF_UP).multiply(new BigDecimal(100)).setScale(2); BigDecimal openRate = new BigDecimal(openLineNum.size()).divide(new BigDecimal(analysisFile.getCodeRowNum()), 4, RoundingMode.HALF_UP).multiply(new BigDecimal(100)).setScale(2);
//超过阈值,则认为当前文件是开源文件 //超过阈值,则认为当前文件是开源文件
if (openRate.compareTo(new BigDecimal(openRateThreshold)) > 0) { if (openRate.compareTo(new BigDecimal(openRateThreshold)) > 0) {
@ -163,17 +166,18 @@ public class FileAnalysisTask extends IAnalysisTask {
//保存当前文件的开源信息到mongo库中 //保存当前文件的开源信息到mongo库中
MatchOpenFileMongoDto matchOpenFileMongo = new MatchOpenFileMongoDto(); MatchOpenFileMongoDto matchOpenFileInfo = new MatchOpenFileMongoDto();
matchOpenFileMongo.setId(IdGenerator.uuid32()) matchOpenFileInfo.setId(IdGenerator.uuid32())
.setFilePath(analysisFile.getFileUrl())
.setFileName(analysisFile.getName()) .setFileName(analysisFile.getName())
.setFilePath(analysisFile.getFileUrl())
.setOpenType(true)
.setFeatureSimilarity(100.00f) .setFeatureSimilarity(100.00f)
.setOpenRate(openRate.floatValue()) .setOpenRate(openRate.floatValue())
.setOpenType(analysisFile.getOpenType()) .setAnalysisType(AnalysisLevelEnum.FILE_LEVEL.getCode())
.setMatchOpenFile(matchOpenFilesRes); .setSubMatchOpenFiles(matchOpenFilesRes);
//保存当前开源信息数据 //保存当前开源信息数据
mongoTemplate.insert(matchOpenFileMongo); mongoTemplate.insert(matchOpenFileInfo);
} }
@ -202,6 +206,13 @@ public class FileAnalysisTask extends IAnalysisTask {
List<VersionTree> versionTrees = solrUtils.queryBatchVersionInfoByVersionIds(openSourceFileVersionIds); List<VersionTree> versionTrees = solrUtils.queryBatchVersionInfoByVersionIds(openSourceFileVersionIds);
Map<String, VersionTree> versionIdVersionInfoMap = versionTrees.stream().collect(Collectors.toMap(VersionTree::getVersionId, java.util.function.Function.identity())); Map<String, VersionTree> versionIdVersionInfoMap = versionTrees.stream().collect(Collectors.toMap(VersionTree::getVersionId, java.util.function.Function.identity()));
//被测件文本内容
String sourcefileContent= FileUtil.readUtf8String(analysisFile.getFileUrl());
//将文本内容解析成行信息,用于后续文件的开源率计算
List<String> analysisFileLineInfo = SimilarityUtil.getSplitWords(sourcefileContent);
for (SolrDocument openSourceFile : matchOpenFiles) { for (SolrDocument openSourceFile : matchOpenFiles) {
//开源文件md5 //开源文件md5
@ -210,7 +221,7 @@ public class FileAnalysisTask extends IAnalysisTask {
String openFileContent = solrUtils.getOpenFileContentByMd5(openSourceFileMd5); String openFileContent = solrUtils.getOpenFileContentByMd5(openSourceFileMd5);
//当前文件的开源率 //当前文件的开源率
Pair<Float, HashSet<Integer>> openRateAndSaveRowNum = SimilarityUtil.getOpenRateAndSaveRowNum(fileAnalysisRes.getSourceFileContent(), openFileContent); Pair<Float, HashSet<Integer>> openRateAndSaveRowNum = SimilarityUtil.getOpenRateAndSaveRowNum(analysisFileLineInfo, openFileContent);
//将当前文件匹配的行号,存储到缓存中,方便统计整体的开源率 //将当前文件匹配的行号,存储到缓存中,方便统计整体的开源率
matchLineRowsNum.addAll(openRateAndSaveRowNum.getValue()); matchLineRowsNum.addAll(openRateAndSaveRowNum.getValue());
@ -220,17 +231,21 @@ public class FileAnalysisTask extends IAnalysisTask {
log.error("找不到开源文件版本信息,versionId:{}", openEntries.get("versionId")); log.error("找不到开源文件版本信息,versionId:{}", openEntries.get("versionId"));
} }
String openFilePath = (String) openEntries.get("fullPath");
//组装当前开源文件的开源项目信息 //组装当前开源文件的开源项目信息
MatchOpenFile matchOpenFileInfo = new MatchOpenFile(); MatchOpenFile matchOpenFile = new MatchOpenFile();
matchOpenFileInfo.setPId(versionInfo.getProId()) matchOpenFile.setId(IdGenerator.uuid32())
.setFileName(FileUtil.getName(openFilePath))
.setPName(versionInfo.getProName()) .setPName(versionInfo.getProName())
.setSourceUrl((String) openEntries.get("fullPath")) .setPId(versionInfo.getProId())
.setFeatureSimilarity(100.00f)
.setOpenRate(openRateAndSaveRowNum.getKey())
.setVersion(versionInfo.getVersionName()) .setVersion(versionInfo.getVersionName())
.setVersionId(versionInfo.getVersionId())
.setSourceFilePath(openFilePath)
.setSourceUrl(versionInfo.getDownUrl())
.setLicenseType(versionInfo.getLicenseType()) .setLicenseType(versionInfo.getLicenseType())
.setAnalyzeType(AnalysisLevelEnum.FILE_LEVEL.getCode()); .setFeatureSimilarity(100.00f)
matchOpenFilesRes.add(matchOpenFileInfo); .setOpenRate(openRateAndSaveRowNum.getKey());
matchOpenFilesRes.add(matchOpenFile);
} }
return matchOpenFilesRes; return matchOpenFilesRes;
} }

@ -3,9 +3,11 @@ package com.keyware.composeanalysis.task;
import cn.hutool.core.collection.CollUtil; import cn.hutool.core.collection.CollUtil;
import cn.hutool.core.collection.CollectionUtil; import cn.hutool.core.collection.CollectionUtil;
import cn.hutool.core.io.FileUtil;
import cn.hutool.core.lang.Pair; import cn.hutool.core.lang.Pair;
import com.alibaba.fastjson.JSONArray; import com.alibaba.fastjson.JSONArray;
import com.keyware.common.constant.enums.AnalysisStatusEnum; import com.keyware.common.constant.enums.AnalysisStatusEnum;
import com.keyware.common.exception.BusinessException;
import com.keyware.composeanalysis.constant.FixedValue; import com.keyware.composeanalysis.constant.FixedValue;
import com.keyware.composeanalysis.constant.RedisConst; import com.keyware.composeanalysis.constant.RedisConst;
import com.keyware.composeanalysis.constant.SolrDBConst; import com.keyware.composeanalysis.constant.SolrDBConst;
@ -16,6 +18,7 @@ import com.keyware.composeanalysis.mongo.FileDataMongoDto;
import com.keyware.composeanalysis.mongo.LineDataMongoDto; import com.keyware.composeanalysis.mongo.LineDataMongoDto;
import com.keyware.composeanalysis.mongo.MatchOpenFile; import com.keyware.composeanalysis.mongo.MatchOpenFile;
import com.keyware.composeanalysis.mongo.MatchOpenFileMongoDto; import com.keyware.composeanalysis.mongo.MatchOpenFileMongoDto;
import com.keyware.composeanalysis.solr.FunctionInfo;
import com.keyware.composeanalysis.solr.VersionTree; import com.keyware.composeanalysis.solr.VersionTree;
import com.keyware.composeanalysis.util.*; import com.keyware.composeanalysis.util.*;
import com.keyware.keyswan.common.LineModel; import com.keyware.keyswan.common.LineModel;
@ -101,7 +104,11 @@ public class FunctionAnalysisTask extends IAnalysisTask {
String sourceFileBaseCoreName = FixedValue.SUFFIX_SOLR_VERSION.get(analysisFile.getSuffix()); String sourceFileBaseCoreName = FixedValue.SUFFIX_SOLR_VERSION.get(analysisFile.getSuffix());
//根据文件的名称获取函数解析器 //根据文件的名称获取函数解析器
Analysis analysis = AnalysisFactory.getAnalysis(filePath); Analysis analysis = AnalysisFactory.getAnalysis(fileName);
if (analysis == null){
throw new BusinessException("获取文件解析器失败,文件名称:"+fileName);
}
//解析文件 //解析文件
CodeFile codeFile = analysis.analysisFile(new FileInputStream(filePath)); CodeFile codeFile = analysis.analysisFile(new FileInputStream(filePath));
@ -123,7 +130,7 @@ public class FunctionAnalysisTask extends IAnalysisTask {
log.info("文件" + fileName + ":函数级分析完成"); log.info("文件" + fileName + ":函数级分析完成");
} catch (Exception e) { } catch (Exception e) {
AnalysisLogUtil.insertErrorInfo(mongoTemplate, "【函数级级分析】失败" + fileName, e); AnalysisLogUtil.insertErrorInfo(mongoTemplate, "【函数级级分析】失败" + fileName, e);
log.error("文件:" + fileName + "函数级别特征提取失败!", e); log.error("文件:" + fileName + "【函数级级分析】失败!", e);
//修改当前文件分析状态未失败 //修改当前文件分析状态未失败
mongoTemplate.update(FileDataMongoDto.class) mongoTemplate.update(FileDataMongoDto.class)
.matching(where("_id").is(analysisFile.getId())) .matching(where("_id").is(analysisFile.getId()))
@ -152,7 +159,7 @@ public class FunctionAnalysisTask extends IAnalysisTask {
Map<String, List<Function>> featureMd5FunctionMap = fileAnalysisRes.getFunctionList().stream().collect(Collectors.groupingBy(Function::getMd5)); Map<String, List<Function>> featureMd5FunctionMap = fileAnalysisRes.getFunctionList().stream().collect(Collectors.groupingBy(Function::getMd5));
//函数代码总函数 //函数代码总函数
int totalFunctionLineCount = fileAnalysisRes.getFunctionList().stream().mapToInt(Function::getCodeRowNum).sum(); BigDecimal totalFunctionLineCount = new BigDecimal(fileAnalysisRes.getFunctionList().stream().mapToInt(Function::getCodeRowNum).sum());
//匹配到的特征函数Md5 //匹配到的特征函数Md5
Set<String> matchFeatureFunctionMd5s = new HashSet(); Set<String> matchFeatureFunctionMd5s = new HashSet();
@ -169,7 +176,7 @@ public class FunctionAnalysisTask extends IAnalysisTask {
matchFunctionLineCount += featureMd5FunctionMap.get(matchFeatureFunctionMd5).stream().mapToInt(Function::getCodeRowNum).sum(); matchFunctionLineCount += featureMd5FunctionMap.get(matchFeatureFunctionMd5).stream().mapToInt(Function::getCodeRowNum).sum();
} }
BigDecimal featureSimilarity = new BigDecimal(matchFunctionLineCount).divide(new BigDecimal(totalFunctionLineCount), 4, RoundingMode.HALF_UP).multiply(new BigDecimal(100)).setScale(2); BigDecimal featureSimilarity = new BigDecimal(matchFunctionLineCount).divide(totalFunctionLineCount, 4, RoundingMode.HALF_UP).multiply(new BigDecimal(100)).setScale(2);
//计算文件的总体开源率 //计算文件的总体开源率
BigDecimal openRate = new BigDecimal(matchOpenLineRowsNum.size()).divide(new BigDecimal(analysisFile.getCodeRowNum()), 4, RoundingMode.HALF_UP).multiply(new BigDecimal(100)).setScale(2); BigDecimal openRate = new BigDecimal(matchOpenLineRowsNum.size()).divide(new BigDecimal(analysisFile.getCodeRowNum()), 4, RoundingMode.HALF_UP).multiply(new BigDecimal(100)).setScale(2);
@ -182,16 +189,19 @@ public class FunctionAnalysisTask extends IAnalysisTask {
analysisFile.setOpenType(true); analysisFile.setOpenType(true);
} }
//保存当前文件开源行数
analysisFile.setOpenLineCount(matchOpenLineRowsNum.size());
//保存当前文件的开源信息到mongo库中 //保存当前文件的开源信息到mongo库中
MatchOpenFileMongoDto matchOpenFileMongo = new MatchOpenFileMongoDto(); MatchOpenFileMongoDto matchOpenFileMongo = new MatchOpenFileMongoDto();
matchOpenFileMongo.setId(IdGenerator.uuid32()) matchOpenFileMongo.setId(IdGenerator.uuid32())
.setFilePath(analysisFile.getFileUrl())
.setFileName(analysisFile.getName()) .setFileName(analysisFile.getName())
.setFilePath(analysisFile.getFileUrl())
.setOpenType(analysisFile.getOpenType())
.setFeatureSimilarity(featureSimilarity.floatValue()) .setFeatureSimilarity(featureSimilarity.floatValue())
.setOpenRate(openRate.floatValue()) .setOpenRate(openRate.floatValue())
.setOpenType(analysisFile.getOpenType()) .setAnalysisType(AnalysisLevelEnum.FUNCTION_LEVEL.getCode())
.setMatchOpenFile(matchOpenFilesRes); .setSubMatchOpenFiles(matchOpenFilesRes);
mongoTemplate.save(matchOpenFileMongo); mongoTemplate.save(matchOpenFileMongo);
} }
@ -226,13 +236,20 @@ public class FunctionAnalysisTask extends IAnalysisTask {
//函数总行数 //函数总行数
BigDecimal totalFunctionLineCount = new BigDecimal(fileAnalysisRes.getFunctionList().stream().mapToInt(Function::getCodeRowNum).sum()); BigDecimal totalFunctionLineCount = new BigDecimal(fileAnalysisRes.getFunctionList().stream().mapToInt(Function::getCodeRowNum).sum());
//被测件文本内容
String sourcefileContent= FileUtil.readUtf8String(analysisFile.getFileUrl());
//将文本内容解析成行信息,用于后续文件的开源率计算
List<String> analysisFileLineInfo = SimilarityUtil.getSplitWords(sourcefileContent);
for (SolrDocument openSourceFile : matchOpenFiles) { for (SolrDocument openSourceFile : matchOpenFiles) {
//开源文件md5 //开源文件md5
String openSourceFileMd5 = openSourceFile.getFieldValue("sourceMd5").toString(); String openSourceFileMd5 = openSourceFile.getFieldValue("sourceMd5").toString();
//解析文件的函数特征值 //解析文件的函数特征值
List<Function> openFileFunctionList = getOpenFileFunctionList(openSourceFile); List<FunctionInfo> openFileFunctionList = getOpenFileFunctionList(openSourceFile);
//根据源文件的MD5确定需要查询源码库的序号 //根据源文件的MD5确定需要查询源码库的序号
String openSourceCodeCoreIndex = openSourceFileMd5.substring(0, 1) + SolrDBConst.CORE_NAME_SUFFIX_SOURCE_FILE_INFO; String openSourceCodeCoreIndex = openSourceFileMd5.substring(0, 1) + SolrDBConst.CORE_NAME_SUFFIX_SOURCE_FILE_INFO;
@ -250,8 +267,8 @@ public class FunctionAnalysisTask extends IAnalysisTask {
for (String funFeatureMd5 : featureMd5FunctionMap.keySet()) { for (String funFeatureMd5 : featureMd5FunctionMap.keySet()) {
List<Function> currentFueatureFunctionList = featureMd5FunctionMap.get(funFeatureMd5); List<Function> currentFueatureFunctionList = featureMd5FunctionMap.get(funFeatureMd5);
//源文件的特征函数列表 //源文件的特征函数列表
for (Function openFunction : openFileFunctionList) { for (FunctionInfo openFunction : openFileFunctionList) {
if (funFeatureMd5.equals(openFunction.getMd5())) { if (funFeatureMd5.equals(openFunction.getTraitFunMd5())) {
//每个特征函数 不能多次匹配,影响整体特征相似度 //每个特征函数 不能多次匹配,影响整体特征相似度
//匹配成功后,相同的特征行 一并加上 //匹配成功后,相同的特征行 一并加上
if (!currentFileMatchFeatureFunctionMd5.contains(funFeatureMd5)) { if (!currentFileMatchFeatureFunctionMd5.contains(funFeatureMd5)) {
@ -264,7 +281,7 @@ public class FunctionAnalysisTask extends IAnalysisTask {
} }
//当前文件的开源率 //当前文件的开源率
Pair<Float, HashSet<Integer>> openRateAndSaveRowNum = SimilarityUtil.getOpenRateAndSaveRowNum(new String(fileAnalysisRes.getFileContent()), openSourceContent.getFieldValue("sourceContent").toString()); Pair<Float, HashSet<Integer>> openRateAndSaveRowNum = SimilarityUtil.getOpenRateAndSaveRowNum(analysisFileLineInfo, openSourceContent.getFieldValue("sourceContent").toString());
//将当前文件匹配的行号,存储到缓存中,方便统计整体的开源率 //将当前文件匹配的行号,存储到缓存中,方便统计整体的开源率
matchLineRowsNum.addAll(openRateAndSaveRowNum.getValue()); matchLineRowsNum.addAll(openRateAndSaveRowNum.getValue());
@ -273,16 +290,22 @@ public class FunctionAnalysisTask extends IAnalysisTask {
SolrDocument openEntries = md5VersionInfoMap.get(openSourceFileMd5); SolrDocument openEntries = md5VersionInfoMap.get(openSourceFileMd5);
VersionTree versionInfo = versionIdVersionInfoMap.get(openEntries.get("versionId")); VersionTree versionInfo = versionIdVersionInfoMap.get(openEntries.get("versionId"));
String openFilePath = (String) md5VersionInfoMap.get(openSourceFileMd5).getFieldValue("fullPath");
//组装当前开源文件的开源项目信息 //组装当前开源文件的开源项目信息
MatchOpenFile matchOpenFileInfo = new MatchOpenFile(); MatchOpenFile matchOpenFileInfo = new MatchOpenFile();
matchOpenFileInfo.setPId(versionInfo.getProId()) matchOpenFileInfo.setId(IdGenerator.uuid32())
.setFileName(FileUtil.getName(openFilePath))
.setPName(versionInfo.getProName()) .setPName(versionInfo.getProName())
.setSourceUrl((String) openEntries.get("fullPath")) .setPId(versionInfo.getProId())
.setFeatureSimilarity(featureSimilarity.floatValue())
.setOpenRate(openRateAndSaveRowNum.getKey())
.setVersion(versionInfo.getVersionName()) .setVersion(versionInfo.getVersionName())
.setVersionId(versionInfo.getVersionId())
.setSourceFilePath(openFilePath)
.setSourceUrl(versionInfo.getDownUrl())
.setLicenseType(versionInfo.getLicenseType()) .setLicenseType(versionInfo.getLicenseType())
.setAnalyzeType(AnalysisLevelEnum.FUNCTION_LEVEL.getCode()); .setFeatureSimilarity(featureSimilarity.floatValue())
.setOpenRate(openRateAndSaveRowNum.getKey())
.setMd5(openSourceFileMd5);
matchOpenFilesRes.add(matchOpenFileInfo); matchOpenFilesRes.add(matchOpenFileInfo);
} }
return matchOpenFilesRes; return matchOpenFilesRes;
@ -316,18 +339,20 @@ public class FunctionAnalysisTask extends IAnalysisTask {
* @param matchOpenFile * @param matchOpenFile
* @return * @return
*/ */
private List<Function> getOpenFileFunctionList(SolrDocument matchOpenFile) { private List<FunctionInfo> getOpenFileFunctionList(SolrDocument matchOpenFile) {
try { try {
//解析文件的函数特征值 //解析文件的函数特征值
String lineFeatureMd5s = matchOpenFile.getFieldValue("fun_hay").toString(); String lineFeatureMd5s = matchOpenFile.getFieldValue("fun_hay").toString();
lineFeatureMd5s = lineFeatureMd5s.replace("\\", "") lineFeatureMd5s = lineFeatureMd5s
.replace("\\\\\\\"", "")
.replace("\\", "")
.replace("\"{", "{") .replace("\"{", "{")
.replace("}\"", "}"); .replace("}\"", "}");
return JSONArray.parseArray(lineFeatureMd5s, Function.class); return JSONArray.parseArray(lineFeatureMd5s, FunctionInfo.class);
} catch (Exception e) { } catch (Exception e) {
log.error("解析文件特征值失败", e); log.error("解析文件特征值失败", e);
} }
return new ArrayList<Function>(); return new ArrayList<>();
} }
/** /**

@ -2,6 +2,7 @@ package com.keyware.composeanalysis.task;
import cn.hutool.core.collection.CollectionUtil; import cn.hutool.core.collection.CollectionUtil;
import cn.hutool.core.io.FileUtil;
import cn.hutool.core.lang.Pair; import cn.hutool.core.lang.Pair;
import cn.hutool.core.util.StrUtil; import cn.hutool.core.util.StrUtil;
import com.keyware.common.constant.enums.AnalysisStatusEnum; import com.keyware.common.constant.enums.AnalysisStatusEnum;
@ -170,6 +171,9 @@ public class LineAnalysisTask extends IAnalysisTask {
analysisFile.setOpenType(true); analysisFile.setOpenType(true);
} }
//保存当前文件开源行数
analysisFile.setOpenLineCount(matchLineRowsNum.size());
//保存当前文件的开源信息到mongo库中 //保存当前文件的开源信息到mongo库中
MatchOpenFileMongoDto matchOpenFileMongo = new MatchOpenFileMongoDto(); MatchOpenFileMongoDto matchOpenFileMongo = new MatchOpenFileMongoDto();
matchOpenFileMongo.setId(IdGenerator.uuid32()) matchOpenFileMongo.setId(IdGenerator.uuid32())
@ -178,8 +182,8 @@ public class LineAnalysisTask extends IAnalysisTask {
.setFeatureSimilarity(featureSimilarity.floatValue()) .setFeatureSimilarity(featureSimilarity.floatValue())
.setOpenRate(openRate.floatValue()) .setOpenRate(openRate.floatValue())
.setOpenType(analysisFile.getOpenType()) .setOpenType(analysisFile.getOpenType())
.setMatchOpenFile(matchOpenFilesRes); .setAnalysisType(AnalysisLevelEnum.LINE_LEVEL.getCode())
log.info("文件" + analysisFile.getName() + ":开源率:" + openRate.floatValue() + ",特征相似度:" + featureSimilarity.floatValue()); .setSubMatchOpenFiles(matchOpenFilesRes);
mongoTemplate.save(matchOpenFileMongo); mongoTemplate.save(matchOpenFileMongo);
} }
@ -211,6 +215,12 @@ public class LineAnalysisTask extends IAnalysisTask {
String traitFileLineMd5 = fileAnalysisRes.getTraitFileLineMd5(); String traitFileLineMd5 = fileAnalysisRes.getTraitFileLineMd5();
List<String> lineFeatureList = Arrays.asList(traitFileLineMd5.split(",")); List<String> lineFeatureList = Arrays.asList(traitFileLineMd5.split(","));
//被测件文本内容
String sourcefileContent= FileUtil.readUtf8String(analysisFile.getFileUrl());
//将文本内容解析成行信息,用于后续文件的开源率计算
List<String> analysisFileLineInfo = SimilarityUtil.getSplitWords(sourcefileContent);
for (SolrDocument openSourceFile : matchOpenFiles) { for (SolrDocument openSourceFile : matchOpenFiles) {
//开源文件MD5 //开源文件MD5
@ -239,7 +249,7 @@ public class LineAnalysisTask extends IAnalysisTask {
} }
//当前文件的开源率 //当前文件的开源率
Pair<Float, HashSet<Integer>> openRateAndSaveRowNum = SimilarityUtil.getOpenRateAndSaveRowNum(fileAnalysisRes.getSourceFileContent(), openSourceContent); Pair<Float, HashSet<Integer>> openRateAndSaveRowNum = SimilarityUtil.getOpenRateAndSaveRowNum(analysisFileLineInfo, openSourceContent);
//将当前文件匹配的行号,存储到缓存中,方便统计整体的开源率 //将当前文件匹配的行号,存储到缓存中,方便统计整体的开源率
matchLineRowsNum.addAll(openRateAndSaveRowNum.getValue()); matchLineRowsNum.addAll(openRateAndSaveRowNum.getValue());
@ -254,16 +264,21 @@ public class LineAnalysisTask extends IAnalysisTask {
continue; continue;
} }
String openFilePath = (String) openEntries.get("fullPath");
//组装当前开源文件的开源项目信息 //组装当前开源文件的开源项目信息
MatchOpenFile matchOpenFileInfo = new MatchOpenFile(); MatchOpenFile matchOpenFileInfo = new MatchOpenFile();
matchOpenFileInfo.setPId(versionInfo.getProId()) matchOpenFileInfo.setId(IdGenerator.uuid32())
.setFileName(FileUtil.getName(openFilePath))
.setPName(versionInfo.getProName()) .setPName(versionInfo.getProName())
.setSourceUrl((String) openEntries.get("fullPath")) .setPId(versionInfo.getProId())
.setFeatureSimilarity(featureSimilarity.floatValue())
.setOpenRate(openRateAndSaveRowNum.getKey())
.setVersion(versionInfo.getVersionName()) .setVersion(versionInfo.getVersionName())
.setVersionId(versionInfo.getVersionId())
.setSourceFilePath(openFilePath)
.setSourceUrl(versionInfo.getDownUrl())
.setLicenseType(versionInfo.getLicenseType()) .setLicenseType(versionInfo.getLicenseType())
.setAnalyzeType(AnalysisLevelEnum.FUNCTION_LEVEL.getCode()); .setFeatureSimilarity(featureSimilarity.floatValue())
.setOpenRate(openRateAndSaveRowNum.getKey())
.setMd5(openSourceFileMd5);
matchOpenFilesRes.add(matchOpenFileInfo); matchOpenFilesRes.add(matchOpenFileInfo);
} }
return matchOpenFilesRes; return matchOpenFilesRes;

@ -1,6 +1,7 @@
package com.keyware.composeanalysis.task; package com.keyware.composeanalysis.task;
import cn.hutool.core.collection.CollectionUtil; import cn.hutool.core.collection.CollectionUtil;
import cn.hutool.core.io.FileUtil;
import com.google.common.collect.Sets; import com.google.common.collect.Sets;
import com.keyware.common.constant.enums.AnalysisStatusEnum; import com.keyware.common.constant.enums.AnalysisStatusEnum;
import com.keyware.composeanalysis.constant.FixedValue; import com.keyware.composeanalysis.constant.FixedValue;
@ -14,14 +15,12 @@ import com.keyware.composeanalysis.solr.VersionTree;
import com.keyware.composeanalysis.solr.VersionTreeNode; import com.keyware.composeanalysis.solr.VersionTreeNode;
import com.keyware.composeanalysis.util.AnalysisLogUtil; import com.keyware.composeanalysis.util.AnalysisLogUtil;
import com.keyware.composeanalysis.util.SolrUtils; import com.keyware.composeanalysis.util.SolrUtils;
import com.keyware.composeanalysis.util.SpringContextUtils;
import com.keyware.utils.IdGenerator; import com.keyware.utils.IdGenerator;
import com.mongodb.client.MongoClient; import com.mongodb.client.MongoClient;
import lombok.extern.log4j.Log4j2; import lombok.extern.log4j.Log4j2;
import org.apache.commons.collections.CollectionUtils; import org.apache.commons.collections.CollectionUtils;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
import org.apache.solr.common.SolrDocument; import org.apache.solr.common.SolrDocument;
import org.springframework.core.task.TaskExecutor;
import org.springframework.data.mongodb.core.MongoTemplate; import org.springframework.data.mongodb.core.MongoTemplate;
import org.springframework.data.mongodb.core.query.Query; import org.springframework.data.mongodb.core.query.Query;
import org.springframework.data.mongodb.core.query.Update; import org.springframework.data.mongodb.core.query.Update;
@ -48,7 +47,6 @@ public class PorjectAnalysisTask {
private AnalysisTask analysisTask; private AnalysisTask analysisTask;
private AnalysisTaskServiceImpl analysisService; private AnalysisTaskServiceImpl analysisService;
private SolrUtils solrUtils; private SolrUtils solrUtils;
private TaskExecutor taskExecutor;
/** /**
* 项目级分析 * 项目级分析
@ -64,7 +62,6 @@ public class PorjectAnalysisTask {
this.mongoTemplate = new MongoTemplate(mongoClient, MongoDBConst.DB_NAME_PREFIX + analysisTask.getId()); this.mongoTemplate = new MongoTemplate(mongoClient, MongoDBConst.DB_NAME_PREFIX + analysisTask.getId());
this.analysisTask = analysisTask; this.analysisTask = analysisTask;
this.solrUtils = solrUtils; this.solrUtils = solrUtils;
this.taskExecutor = SpringContextUtils.getBean(TaskExecutor.class);
} }
@ -123,8 +120,6 @@ public class PorjectAnalysisTask {
//当前文件开源信息存入数据库中 //当前文件开源信息存入数据库中
mongoTemplate.insert(projectAssembly); mongoTemplate.insert(projectAssembly);
analysisService.updateById(analysisTask);
//更新文件分析的状态 //更新文件分析的状态
mongoTemplate.update(FileDataMongoDto.class) mongoTemplate.update(FileDataMongoDto.class)
.matching(where("isDirectory").is(false)) .matching(where("isDirectory").is(false))
@ -196,12 +191,13 @@ public class PorjectAnalysisTask {
Map<String, SolrDocument> md5VersionIdMap = solrUtils.batchQueryVersionIdFromSourceFileBaseBySourceMd5(MongoDBConst.TABLE_NAME_SOURCE_FILE_BASE, fileMd5s); Map<String, SolrDocument> md5VersionIdMap = solrUtils.batchQueryVersionIdFromSourceFileBaseBySourceMd5(MongoDBConst.TABLE_NAME_SOURCE_FILE_BASE, fileMd5s);
if (md5VersionIdMap == null || md5VersionIdMap.isEmpty()) { if (md5VersionIdMap == null || md5VersionIdMap.isEmpty()) {
//如果没有匹配到,直接更新文件分析状态已完成,因为非32种语言的文件,无法进行解析,通过源文件的MD5匹配不到,就匹配不到了,无需进行下一步的匹配 //如果没有匹配到,直接更新文件分析状态已完成,因为非32种语言的文件,无法进行解析,通过源文件的MD5匹配不到,就匹配不到了,无需进行下一步的匹配
updateFileAnalysisStatus(fileMd5s); updateFileAnalysisStatus(fileMd5s,true);
return; return;
} }
saveMatchOpenFileInfo(md5VersionIdMap, otherLanguageFiles); saveMatchOpenFileInfo(md5VersionIdMap, otherLanguageFiles);
//直接更改没有匹配的文件分析状态 //直接更改没有匹配的文件分析状态,因为没有匹配上的文件,无法进行下一步的匹配(下一步的匹配只针对32种主流的语言)
updateFileAnalysisStatus(Sets.difference(fileMd5s, md5VersionIdMap.keySet())); Set<String> notMatchFileMd5s = Sets.difference(fileMd5s, md5VersionIdMap.keySet());
updateFileAnalysisStatus(notMatchFileMd5s,false);
} }
} }
@ -231,7 +227,7 @@ public class PorjectAnalysisTask {
if (CollectionUtils.isNotEmpty(batchInsertCache)) { if (CollectionUtils.isNotEmpty(batchInsertCache)) {
mongoTemplate.insert(batchInsertCache, MatchOpenFileMongoDto.class); mongoTemplate.insert(batchInsertCache, MatchOpenFileMongoDto.class);
//更新文件分析的状态 //更新文件分析的状态
updateFileAnalysisStatus(md5VersionIdMap.keySet()); updateFileAnalysisStatus(md5VersionIdMap.keySet(),true);
} }
} }
@ -266,7 +262,7 @@ public class PorjectAnalysisTask {
} }
//更新文件分析的状态 //更新文件分析的状态
updateFileAnalysisStatus(matchedMd5s); updateFileAnalysisStatus(matchedMd5s,true);
} }
//获取匹配到的开源文件信息 //获取匹配到的开源文件信息
@ -274,14 +270,15 @@ public class PorjectAnalysisTask {
//设置匹配文件的信息 //设置匹配文件的信息
MatchOpenFile matchOpenFile = new MatchOpenFile(); MatchOpenFile matchOpenFile = new MatchOpenFile();
matchOpenFile.setId(IdGenerator.uuid32()) matchOpenFile.setId(IdGenerator.uuid32())
.setFileName(FileUtil.getName(openFilePath))
.setPName(versionInfo.getProName())
.setPId(versionInfo.getProId())
.setVersion(versionInfo.getVersionName())
.setVersionId(versionInfo.getVersionId()) .setVersionId(versionInfo.getVersionId())
.setSourceFilePath(openFilePath) .setSourceFilePath(openFilePath)
.setSourceUrl(versionInfo.getDownUrl()) .setSourceUrl(versionInfo.getDownUrl())
.setPId(versionInfo.getProId())
.setPName(versionInfo.getProName())
.setLicenseType(versionInfo.getLicenseType()) .setLicenseType(versionInfo.getLicenseType())
.setAnalyzeType(AnalysisLevelEnum.FILE_LEVEL.getCode()) .setMd5(originalFile.getMd5())
.setVersion(versionInfo.getVersionName())
.setFeatureSimilarity(100.00f) .setFeatureSimilarity(100.00f)
.setOpenRate(100.00f); .setOpenRate(100.00f);
@ -293,7 +290,8 @@ public class PorjectAnalysisTask {
.setOpenType(true) .setOpenType(true)
.setFeatureSimilarity(100.00f) .setFeatureSimilarity(100.00f)
.setOpenRate(100.00f) .setOpenRate(100.00f)
.setMatchOpenFile(Arrays.asList(matchOpenFile)); .setAnalysisType(AnalysisLevelEnum.FILE_LEVEL.getCode())
.setSubMatchOpenFiles(Arrays.asList(matchOpenFile));
return matchOpenFileInfo; return matchOpenFileInfo;
} }
@ -325,8 +323,8 @@ public class PorjectAnalysisTask {
break; break;
} }
//异步保存匹配的开源文件信息 //保存匹配的开源文件信息
taskExecutor.execute(() -> saveProjectOpenInfo(openProject, projectFiles)); saveProjectOpenInfo(openProject, projectFiles);
//获取开源项目的所有文件md5集合 //获取开源项目的所有文件md5集合
List<String> openFilesMd5 = openProject.getDirTree().stream().map(VersionTreeNode::getSourceFileMd5).collect(Collectors.toList()); List<String> openFilesMd5 = openProject.getDirTree().stream().map(VersionTreeNode::getSourceFileMd5).collect(Collectors.toList());
@ -339,8 +337,8 @@ public class PorjectAnalysisTask {
//计算与当前项目的相似度 //计算与当前项目的相似度
BigDecimal semblance = new BigDecimal(matchedFiles.size()).divide(new BigDecimal(projectFilesMd5.size()), 4, RoundingMode.HALF_UP).multiply(new BigDecimal(100)); BigDecimal semblance = new BigDecimal(matchedFiles.size()).divide(new BigDecimal(projectFilesMd5.size()), 4, RoundingMode.HALF_UP).multiply(new BigDecimal(100));
//当相似度小于30%,不保存项目级的信息 //当相似度小于20%,不保存项目级的信息
if (semblance.compareTo(new BigDecimal(30)) < 0){ if (semblance.compareTo(new BigDecimal(20)) < 0){
break; break;
} }
@ -366,11 +364,11 @@ public class PorjectAnalysisTask {
} }
//更新文件分析的状态 //更新文件分析的状态
private void updateFileAnalysisStatus(Set<String> fileMd5Set) { private void updateFileAnalysisStatus(Set<String> fileMd5Set, Boolean openType) {
mongoTemplate.update(FileDataMongoDto.class) mongoTemplate.update(FileDataMongoDto.class)
.matching(where("md5").in(fileMd5Set)) .matching(where("md5").in(fileMd5Set))
.apply(new Update().set("openType", true) .apply(new Update().set("openType", openType)
.set("openRate", 100.00f) .set("openRate", openType ? 100.00f : 0f)
.set("fileAnalysisStatus", FileAnalysisStatusEnum.ANALYSIS_DONE.getCode())) .set("fileAnalysisStatus", FileAnalysisStatusEnum.ANALYSIS_DONE.getCode()))
.all(); .all();
} }

@ -1,5 +1,6 @@
package com.keyware.composeanalysis.util; package com.keyware.composeanalysis.util;
import cn.hutool.core.collection.CollUtil;
import cn.hutool.core.lang.Pair; import cn.hutool.core.lang.Pair;
import cn.hutool.core.util.ArrayUtil; import cn.hutool.core.util.ArrayUtil;
import cn.hutool.core.util.ByteUtil; import cn.hutool.core.util.ByteUtil;
@ -54,65 +55,6 @@ public class SimilarityUtil {
} }
/**
* 获取开源率和开源行号
* @param analysisFile 被测件内容
* @param openSourceFile 开源文件内容
* @return
*/
// public static Pair<Float, HashSet<Integer>> getOpenRateAndSaveRowNum(String analysisFile, String openSourceFile) {
// if (StrUtil.hasBlank(analysisFile,openSourceFile)){
// return new Pair<>(0.00f,new HashSet<>());
// }
// //匹配到的行号
// HashSet<Integer> matchedRowsNum = new HashSet<>();
//
// //被测件文件行
// List<String> analysisFileLineInfo = getSplitWords(analysisFile);
//
// //溯源到文件行
// HashSet<String> openSourceFileLineInfo = getSplitWords1(openSourceFile);
//
// for (int i = 0; i < analysisFileLineInfo.size(); i++) {
// String sent1Word = analysisFileLineInfo.get(i);
// if (openSourceFileLineInfo.contains(sent1Word)) {
// matchedRowsNum.add(i);
// }
// }
//
// //计算开源率
// BigDecimal openRate = new BigDecimal(matchedRowsNum.size()).divide(new BigDecimal(analysisFileLineInfo.size()), 4, RoundingMode.HALF_UP).multiply(new BigDecimal(100)).setScale(2);
//
// return new Pair<>(openRate.toString(), matchedRowsNum);
// }
// public static Pair<Float, HashSet<Integer>> getOpenRateAndSaveRowNum(byte[] analysisFile, byte[] openSourceFile) {
// if (ArrayUtil.hasNull(analysisFile,openSourceFile)){
// return new Pair<>(0.00f,new HashSet<>());
// }
// //匹配到的行号
// HashSet<Integer> matchedRowsNum = new HashSet<>();
//
// //被测件文件行
// List<String> analysisFileLineInfo = getSplitWords(new String(analysisFile));
//
// //溯源到文件行
// HashSet<String> openSourceFileLineInfo = getSplitWords1(new String(openSourceFile));
//
// for (int i = 0; i < analysisFileLineInfo.size(); i++) {
// String sent1Word = analysisFileLineInfo.get(i);
// if (openSourceFileLineInfo.contains(sent1Word)) {
// matchedRowsNum.add(i);
// }
// }
//
// //计算开源率
// BigDecimal openRate = new BigDecimal(matchedRowsNum.size()).divide(new BigDecimal(analysisFileLineInfo.size()), 4, RoundingMode.HALF_UP).multiply(new BigDecimal(100)).setScale(2);
//
// return new Pair<>(openRate.toString(), matchedRowsNum);
// }
public static Pair<Float, HashSet<Integer>> getOpenRateAndSaveRowNum(String analysisFile, String openSourceFile) { public static Pair<Float, HashSet<Integer>> getOpenRateAndSaveRowNum(String analysisFile, String openSourceFile) {
if (StrUtil.hasBlank(analysisFile,openSourceFile)){ if (StrUtil.hasBlank(analysisFile,openSourceFile)){
return new Pair<>(0.00f,new HashSet<>()); return new Pair<>(0.00f,new HashSet<>());
@ -140,6 +82,35 @@ public class SimilarityUtil {
} }
public static Pair<Float, HashSet<Integer>> getOpenRateAndSaveRowNum(List<String> analysisFileLineInfo , String openSourceFile) {
if (CollUtil.isEmpty(analysisFileLineInfo) || StrUtil.isBlank(openSourceFile)){
return new Pair<>(0.00f,new HashSet<>());
}
//匹配到的行号
HashSet<Integer> matchedRowsNum = new HashSet<>();
//溯源到文件行
HashSet<String> openSourceFileLineInfo = getSplitWords1(openSourceFile);
for (int i = 0; i < analysisFileLineInfo.size(); i++) {
String sent1Word = analysisFileLineInfo.get(i);
if (openSourceFileLineInfo.contains(sent1Word)) {
matchedRowsNum.add(i);
}
}
//计算开源率
BigDecimal openRate = new BigDecimal(matchedRowsNum.size()).divide(new BigDecimal(analysisFileLineInfo.size()), 4, RoundingMode.HALF_UP).multiply(new BigDecimal(100)).setScale(2);
return new Pair<>(openRate.floatValue(), matchedRowsNum);
}
/** /**
* 获得两个文件的相似度,并将被匹配的行 * 获得两个文件的相似度,并将被匹配的行
* @param matchLineInfos 被匹配的行信息 * @param matchLineInfos 被匹配的行信息

@ -1,12 +1,14 @@
spring: spring:
application:
name: compose-analysis
cloud: cloud:
nacos: nacos:
discovery: discovery:
server-addr: 172.16.36.7:8848 server-addr: 127.0.0.1:8848
namespace: 2fad0ca9-bc32-4afd-9f2e-ebc133d5e781 namespace: 4ce70f33-8b88-4931-a88c-2b68e7259bd7
config: config:
server-addr: 172.16.36.7:8848 server-addr: 127.0.0.1:8848
namespace: 2fad0ca9-bc32-4afd-9f2e-ebc133d5e781 namespace: 4ce70f33-8b88-4931-a88c-2b68e7259bd7
file-extension: yaml file-extension: yaml
config: config:
import: nacos:compose-analysis-dev.yaml import: nacos:compose-analysis-dev.yaml

Loading…
Cancel
Save