matchFilesMd5;
+
+ //与开源项目相似度 matchFileNum / projectFileNum
+ private Double similarity;
+
+}
diff --git a/src/main/java/com/keyware/composeanalysis/mongo/ProjectAssemblyMongoDto.java b/src/main/java/com/keyware/composeanalysis/mongo/ProjectAssemblyMongoDto.java
new file mode 100644
index 0000000..de8894c
--- /dev/null
+++ b/src/main/java/com/keyware/composeanalysis/mongo/ProjectAssemblyMongoDto.java
@@ -0,0 +1,75 @@
+package com.keyware.composeanalysis.mongo;
+
+import lombok.Data;
+import lombok.experimental.Accessors;
+import org.springframework.data.annotation.Id;
+import org.springframework.data.mongodb.core.mapping.Document;
+
+import java.util.List;
+
+
+/**
+ * 此类映射到名为 "project_assembly" 的 MongoDB 集合。
+ *
+ * 当前项目匹配到的开源项目的信息
+ *
+ * @author liuzongren
+ * @date 2024/7/9
+ */
+@Document(collection = "project_assembly")
+@Data
+@Accessors(chain = true)
+public class ProjectAssemblyMongoDto {
+
+ /**
+ * MongoDB 文档的唯一标识符。
+ */
+ @Id
+ private String id;
+
+ /**
+ * 项目的文件数量
+ */
+ private Integer fileCount;
+
+ /**
+ * 匹配到的开源文件的数量
+ */
+ private Integer matchFileCount;
+
+ /**
+ * 匹配到的开源项目版本Id
+ */
+ private String versionId;
+
+ /**
+ * 匹配到的开源项目版本名称
+ */
+ private String versionName;
+
+ /**
+ * 匹配到的开源项目在开源网站的项目序号
+ */
+ private String projectId;
+
+ /**
+ * 匹配到的开源项目的名称。
+ */
+ private String projectName;
+
+ /**
+ * 开源项目的地址
+ */
+ private String openSourceUrl;
+
+ /**
+ * 被测件和当前项目的整体相似度
+ */
+ private Double semblance;
+
+ /**
+ * 当前开源项目的开源协议
+ */
+ private List licenseType;
+
+}
\ No newline at end of file
diff --git a/src/main/java/com/keyware/composeanalysis/mongo/ProjectBaseDataMongoDto.java b/src/main/java/com/keyware/composeanalysis/mongo/ProjectBaseDataMongoDto.java
new file mode 100644
index 0000000..d20c0df
--- /dev/null
+++ b/src/main/java/com/keyware/composeanalysis/mongo/ProjectBaseDataMongoDto.java
@@ -0,0 +1,75 @@
+package com.keyware.composeanalysis.mongo;
+
+import lombok.Data;
+import org.springframework.data.annotation.Id;
+import org.springframework.data.mongodb.core.mapping.Document;
+import org.springframework.data.mongodb.core.mapping.Field;
+
+import java.io.Serializable;
+
+/**
+ *
+ * 项目基本信息
+ *
+ *
+ * @author liuzongren
+ * @since 2024-07-23
+ */
+@Data
+@Document(collection = "PROJECTBASEDATA")
+public class ProjectBaseDataMongoDto implements Serializable {
+
+ private static final long serialVersionUID = 1L;
+
+ @Id
+ private String id;
+
+ /**
+ * 项目id
+ */
+ @Field("ID")
+ private String projectId;
+
+ /**
+ * 项目名称
+ */
+ @Field("NAME")
+ private String name;
+
+ /**
+ * 项目类型
+ */
+ @Field("TYPE")
+ private String type;
+
+ /**
+ * 当前开源项目被stars 的次数
+ */
+ @Field("STARS")
+ private String stars;
+
+ /**
+ * 项目描述
+ */
+ @Field("DESCRIBE")
+ private String describe;
+
+ /**
+ * 项目URL
+ */
+ @Field("URL")
+ private String url;
+
+ /**
+ * 许可类型
+ */
+ @Field("LICENSETYPE")
+ private String licenseType;
+
+ /**
+ * 创建时间
+ */
+ @Field("CREATE_TIME")
+ private String createTime;
+
+}
diff --git a/src/main/java/com/keyware/composeanalysis/mongo/VersionbasedataMongoDto.java b/src/main/java/com/keyware/composeanalysis/mongo/VersionbasedataMongoDto.java
new file mode 100644
index 0000000..4a12050
--- /dev/null
+++ b/src/main/java/com/keyware/composeanalysis/mongo/VersionbasedataMongoDto.java
@@ -0,0 +1,75 @@
+package com.keyware.composeanalysis.mongo;
+
+import lombok.Data;
+import lombok.EqualsAndHashCode;
+import org.springframework.data.annotation.Id;
+import org.springframework.data.mongodb.core.mapping.Document;
+import org.springframework.data.mongodb.core.mapping.Field;
+
+import java.io.Serializable;
+
+/**
+ *
+ * 項目的版本信息
+ *
+ *
+ * @author liuzongren
+ * @since 2024-07-23
+ */
+@Data
+@EqualsAndHashCode(callSuper = false)
+@Document(collection = "VERSIONBASEDATA")
+public class VersionbasedataMongoDto implements Serializable {
+
+ private static final long serialVersionUID = 1L;
+
+ /**
+ * 主键
+ */
+ @Id
+ private String id;
+
+ /**
+ * 版本ID
+ */
+ @Field("ID")
+ private String versionId;
+
+ /**
+ * pid 项目ID
+ */
+ @Field("PID")
+ private String projectId;
+
+ /**
+ * 版本名称
+ */
+ @Field("NAME")
+ private String versionName;
+
+ /**
+ * 版本下载地址
+ */
+ @Field("DOWNURL")
+ private String downloadUrl;
+
+ /**
+ * 項目的相对路径
+ */
+ @Field("PATH")
+ private String path;
+
+ /**
+ * 创建时间
+ */
+ @Field("CREATE_TIME")
+ private String createTime;
+
+ /**
+ * 版本描述
+ */
+ @Field("DESCRIBE")
+ private String description;
+
+
+}
diff --git a/src/main/java/com/keyware/composeanalysis/schedule/AnalysisStatusSchedule.java b/src/main/java/com/keyware/composeanalysis/schedule/AnalysisStatusSchedule.java
new file mode 100644
index 0000000..8489aa4
--- /dev/null
+++ b/src/main/java/com/keyware/composeanalysis/schedule/AnalysisStatusSchedule.java
@@ -0,0 +1,68 @@
+//package com.keyware.composeanalysis.schedule;
+//
+//import cn.hutool.core.date.DateUnit;
+//import cn.hutool.core.date.DateUtil;
+//import com.baomidou.mybatisplus.core.conditions.query.LambdaQueryWrapper;
+//import com.keyware.common.constant.enums.AnalysisStatusEnum;
+//import com.keyware.composeanalysis.constant.MongoDBConst;
+//import com.keyware.composeanalysis.constant.enums.FileAnalysisStatusEnum;
+//import com.keyware.composeanalysis.entity.AnalysisTask;
+//import com.keyware.composeanalysis.mongo.FileDataMongoDto;
+//import com.keyware.composeanalysis.service.impl.AnalysisTaskServiceImpl;
+//import com.keyware.composeanalysis.util.AnalysisLogUtil;
+//import com.mongodb.client.MongoClient;
+//import jakarta.annotation.Resource;
+//import lombok.extern.log4j.Log4j2;
+//import org.springframework.context.annotation.Configuration;
+//import org.springframework.data.mongodb.core.MongoTemplate;
+//import org.springframework.data.mongodb.core.query.Query;
+//import org.springframework.scheduling.annotation.EnableScheduling;
+//import org.springframework.scheduling.annotation.Scheduled;
+//
+//import java.util.List;
+//
+//import static org.springframework.data.mongodb.core.query.Criteria.where;
+//
+///**
+// * 定时检测 分析任务是否完成
+// */
+//@Log4j2
+//@EnableScheduling
+//@Configuration
+//public class AnalysisStatusSchedule {
+//
+// @Resource
+// private AnalysisTaskServiceImpl taskService;
+//
+// @Resource
+// private MongoClient mongoClient;
+//
+// /**
+// * 定时查询任务库 ,看是否存在已经分析完成的任务,如果存在 变更任务的状态
+// */
+// @Scheduled(cron = "*/1 * * * * ?") // 每五秒钟执行一次
+// public void startTask() {
+// //查询正在进行成分分析的任务
+// LambdaQueryWrapper taskQueryWrapper = new LambdaQueryWrapper<>();
+// taskQueryWrapper.eq(AnalysisTask::getAnalysisStatus, AnalysisStatusEnum.ANALYSISING.getCode());
+// taskQueryWrapper.eq(AnalysisTask::getDecompressionFlag,true);
+// taskQueryWrapper.eq(AnalysisTask::getComposeFlag,AnalysisStatusEnum.ANALYSISING.getCode());
+// List composeAnalysisTasks = taskService.list(taskQueryWrapper);
+//
+// //循环遍历任务状态
+// for (AnalysisTask composeTask : composeAnalysisTasks) {
+// MongoTemplate mongoTemplate = new MongoTemplate(mongoClient, MongoDBConst.DB_NAME_PREFIX+composeTask.getId());
+//
+// Query fileQuery = new Query(where("isDirectory").is(false)
+// .and("fileAnalysisStatus").in(FileAnalysisStatusEnum.FAILED_ANALYSIS.getCode(),FileAnalysisStatusEnum.ANALYSIS_DONE.getCode()));
+// Long finishedAnalysisFileCount = mongoTemplate.count(fileQuery, FileDataMongoDto.class);
+//
+// //所有文件分析完毕,将成分分析的状态 更改为已完成
+// if (finishedAnalysisFileCount.intValue() == composeTask.getFileCount()){
+// composeTask.setComposeFlag(AnalysisStatusEnum.ANALYSIS_DONE.getCode());
+// taskService.updateById(composeTask);
+// AnalysisLogUtil.insert(mongoTemplate,"成分分析已完成,耗时:"+ DateUtil.between(composeTask.getCreateTime(),DateUtil.date(), DateUnit.SECOND) +"秒");
+// }
+// }
+// }
+//}
diff --git a/src/main/java/com/keyware/composeanalysis/service/AnalysisTaskService.java b/src/main/java/com/keyware/composeanalysis/service/AnalysisTaskService.java
new file mode 100644
index 0000000..55d40b1
--- /dev/null
+++ b/src/main/java/com/keyware/composeanalysis/service/AnalysisTaskService.java
@@ -0,0 +1,56 @@
+package com.keyware.composeanalysis.service;
+
+import com.baomidou.mybatisplus.extension.service.IService;
+import com.keyware.composeanalysis.entity.AnalysisTask;
+import com.keyware.composeanalysis.response.AnalysisResp;
+import org.springframework.web.bind.annotation.GetMapping;
+import org.springframework.web.bind.annotation.PathVariable;
+import org.springframework.web.bind.annotation.PostMapping;
+
+/**
+ *
+ * 服务类
+ *
+ *
+ * @author liuzongren
+ * @since 2024-07-23
+ */
+public interface AnalysisTaskService extends IService {
+
+ /**
+ * 执行成分分析
+ *
+ * @param analysisTask
+ */
+ void doComposeAnalyze(AnalysisTask analysisTask) throws InterruptedException;
+
+ /**
+ * 停止或暂停分析任务
+ *
+ * @param taskId 任务id
+ * @return AnalysisResp 成分分析任务响应
+ * author liuzongren
+ */
+ void stopComposeAnalysisTask(String taskId);
+
+ /**
+ * 重新分析任务,
+ *
+ * @param taskId 任务id
+ * @return AnalysisResp 成分分析任务响应
+ * author liuzongren
+ */
+ Boolean restartComposeAnalysisTask(String taskId);
+
+
+ /**
+ * 恢复分析任务
+ *
+ * @param analysisTask 任务
+ * @return AnalysisResp 成分分析任务响应
+ * author liuzongren
+ */
+ void recoveryComposeAnalysisTask(AnalysisTask analysisTask);
+
+
+}
diff --git a/src/main/java/com/keyware/composeanalysis/service/impl/AnalysisTaskServiceImpl.java b/src/main/java/com/keyware/composeanalysis/service/impl/AnalysisTaskServiceImpl.java
new file mode 100644
index 0000000..6528706
--- /dev/null
+++ b/src/main/java/com/keyware/composeanalysis/service/impl/AnalysisTaskServiceImpl.java
@@ -0,0 +1,215 @@
+package com.keyware.composeanalysis.service.impl;
+
+import cn.hutool.core.date.DateUnit;
+import cn.hutool.core.date.DateUtil;
+import com.baomidou.mybatisplus.extension.service.impl.ServiceImpl;
+import com.keyware.common.constant.RedisConst;
+import com.keyware.common.constant.enums.AnalysisStatusEnum;
+import com.keyware.composeanalysis.constant.MongoDBConst;
+import com.keyware.composeanalysis.constant.enums.FileAnalysisStatusEnum;
+import com.keyware.composeanalysis.entity.AnalysisTask;
+import com.keyware.composeanalysis.mapper.AnalyzeTaskMapper;
+import com.keyware.composeanalysis.mongo.FileDataMongoDto;
+import com.keyware.composeanalysis.mongo.MatchOpenFileMongoDto;
+import com.keyware.composeanalysis.mongo.ProjectAssemblyMongoDto;
+import com.keyware.composeanalysis.service.AnalysisTaskService;
+import com.keyware.composeanalysis.task.*;
+import com.keyware.composeanalysis.util.AnalysisLogUtil;
+import com.keyware.composeanalysis.util.RedisUtil;
+import com.keyware.composeanalysis.util.SolrUtils;
+import com.mongodb.client.MongoClient;
+import jakarta.annotation.Resource;
+import lombok.extern.log4j.Log4j2;
+import org.apache.commons.collections.CollectionUtils;
+import org.springframework.core.task.TaskExecutor;
+import org.springframework.data.mongodb.core.MongoTemplate;
+import org.springframework.data.mongodb.core.query.Query;
+import org.springframework.data.mongodb.core.query.Update;
+import org.springframework.scheduling.annotation.Async;
+import org.springframework.stereotype.Service;
+
+import java.math.BigDecimal;
+import java.math.RoundingMode;
+import java.util.List;
+import java.util.concurrent.CountDownLatch;
+
+import static org.springframework.data.mongodb.core.query.Criteria.where;
+
+
+/**
+ *
+ * 成分分析服务实现类
+ *
+ *
+ * @author liuzongren
+ * @since 2024-07-23
+ */
+@Log4j2
+@Service
+public class AnalysisTaskServiceImpl extends ServiceImpl implements AnalysisTaskService {
+
+ @Resource
+ private MongoClient mongoClient;
+
+ @Resource
+ private SolrUtils solrUtils;
+
+ @Resource
+ private TaskExecutor taskExecutor;
+
+ @Resource
+ private RedisUtil redisUtil;
+
+ @Override
+ @Async
+ public void doComposeAnalyze(AnalysisTask analysisTask) throws InterruptedException {
+ long startTime = System.currentTimeMillis();
+ log.info("开始成份分析,taskName:{}",analysisTask.getFileName());
+ //校验文件压缩是否完成
+ retryGetDecompressionFlag(analysisTask);
+
+ //开始分析前,将成分分析的状态为 进行中
+ analysisTask.setComposeFlag(AnalysisStatusEnum.ANALYSISING.getCode());
+ this.updateById(analysisTask);
+ MongoTemplate mongoTemplate = new MongoTemplate(mongoClient, MongoDBConst.DB_NAME_PREFIX + analysisTask.getId());
+ AnalysisLogUtil.insert(mongoTemplate, "【成分分析】开始:" + analysisTask.getFileName());
+
+ //首先进行项目级别的分析,将所有文件的源MD5批量去solr库中匹配
+ PorjectAnalysisTask projectAnalysisTask = new PorjectAnalysisTask(mongoClient, analysisTask, solrUtils, this);
+ projectAnalysisTask.doAnalysis();
+
+ //项目级的分析完成后,没有匹配中的文件,根据分析的级别,对每个文件进行相应级别的分析
+ analysisFile(mongoTemplate,analysisTask);
+
+ //成份分析完成后,查询所有开源文件,判断当前项目是否开源
+ checkProjectIfOpen(mongoTemplate,analysisTask);
+
+ //修改成分分析状态为完成
+ analysisTask.setComposeFlag(AnalysisStatusEnum.ANALYSIS_DONE.getCode());
+ this.updateById(analysisTask);
+
+ //插入分析日志
+ AnalysisLogUtil.insert(mongoTemplate,"【成分分析】已完成,耗时:"+ DateUtil.between(analysisTask.getAnalysisStartTime(),DateUtil.date(), DateUnit.SECOND) +"秒");
+ log.info("成份分析完成,taskName:{},耗时:{}",analysisTask.getFileName(),(System.currentTimeMillis()-startTime)/1000 +"秒");
+ }
+
+ @Override
+ public void stopComposeAnalysisTask(String taskId) {
+ //将成分分析的任务状态的标志位置为暂停,让线程池中的排队的任务队列停止分析
+ redisUtil.set(String.format(RedisConst.TASK_RUNNING_STATUS_KEY_PREFIX, taskId), AnalysisStatusEnum.ANALYSIS_PAUSED.getCode());
+ }
+
+ @Override
+ public Boolean restartComposeAnalysisTask(String taskId) {
+ boolean result = false;
+ try {
+ //删除匹配的开源项目信息
+ MongoTemplate mongoTemplate = new MongoTemplate(mongoClient, MongoDBConst.DB_NAME_PREFIX + taskId);
+ mongoTemplate.remove(ProjectAssemblyMongoDto.class);
+
+ //删除项目匹配的开源文件
+ mongoTemplate.remove(MatchOpenFileMongoDto.class);
+
+ //将文件分析状态设置为未开始分析
+ mongoTemplate.update(FileDataMongoDto.class)
+ .apply(new Update().set("openType", false)
+ .set("fileAnalysisStatus", FileAnalysisStatusEnum.UN_START_ANALYSIS.getCode()))
+ .all();
+
+ //重新开始分析任务
+ doComposeAnalyze(getById(taskId));
+ result = true;
+ } catch (Exception e) {
+ log.error("重新分析失败", e);
+ }
+ return result;
+ }
+
+ @Override
+ @Async
+ public void recoveryComposeAnalysisTask(AnalysisTask analysisTask) {
+ /**
+ * todo 这里存在一个逻辑缺陷
+ * 项目级别的分析是无法终止的,当前任务恢复恢复的是文件级的成分分析,如果文件级的没有分析完成,这里可能会将所有文件进行文件级别的分析
+ */
+ try {
+ //将成分分析的任务状态的标志位置改为进行中
+ redisUtil.set(String.format(RedisConst.TASK_RUNNING_STATUS_KEY_PREFIX, analysisTask.getId()), AnalysisStatusEnum.ANALYSISING.getCode());
+
+ MongoTemplate mongoTemplate = new MongoTemplate(mongoClient, MongoDBConst.DB_NAME_PREFIX + analysisTask.getId());
+ //项目级的分析完成后
+ Query unAnalyzedFileQuery = new Query(where("fileAnalysisStatus").ne(FileAnalysisStatusEnum.UN_START_ANALYSIS.getCode())
+ .and("isDirectory").is(false));
+ List unAnalyzedFiles = mongoTemplate.find(unAnalyzedFileQuery, FileDataMongoDto.class);
+
+ if (CollectionUtils.isNotEmpty(unAnalyzedFiles)){
+ //使用线程池 并行的分析文件
+ CountDownLatch countDownLatch = new CountDownLatch(unAnalyzedFiles.size());
+ unAnalyzedFiles.parallelStream().forEach(fileDataMongoDto -> {
+ IAnalysisTask task = AnalysisTaskFactory.createAnalysisTask(analysisTask, fileDataMongoDto, mongoTemplate, countDownLatch);
+ taskExecutor.execute(task);
+ });
+ countDownLatch.await();
+ //修改成分分析状态为完成
+ analysisTask.setComposeFlag(AnalysisStatusEnum.ANALYSIS_DONE.getCode());
+ this.updateById(analysisTask);
+ AnalysisLogUtil.insert(mongoTemplate,"成分分析已完成,耗时:"+ DateUtil.between(analysisTask.getCreateTime(),DateUtil.date(), DateUnit.SECOND) +"秒");
+ }
+ } catch (Exception e) {
+ log.error("恢复分析失败", e);
+ }
+ }
+
+
+ //引入解压缩有可能会很慢,这里添加重试机制,最多重试6次,60s
+ private boolean retryGetDecompressionFlag(AnalysisTask analysisTask) {
+ int retryCount = 0;
+ while (retryCount < 60) {
+ AnalysisTask latestAnalysisTask = this.getById(analysisTask.getId());
+ if (latestAnalysisTask.getDecompressionFlag()) {
+ analysisTask.setDecompressionFlag(true);
+ analysisTask.setFileCount(latestAnalysisTask.getFileCount());
+ return true;
+ }
+ try {
+ Thread.sleep(1000);
+ } catch (InterruptedException e) {
+ log.error("线程休眠异常", e);
+ }
+ retryCount++;
+ }
+ return false;
+ }
+
+
+ //开启单个文件的分析
+ private void analysisFile(MongoTemplate mongoTemplate,AnalysisTask analysisTask) throws InterruptedException {
+ Query unAnalyzedFileQuery = new Query(where("fileAnalysisStatus").is(FileAnalysisStatusEnum.UN_START_ANALYSIS.getCode())
+ .and("isDirectory").is(false));
+ List unAnalyzedFiles = mongoTemplate.find(unAnalyzedFileQuery, FileDataMongoDto.class);
+
+ //使用线程池 并行的分析
+ CountDownLatch countDownLatch = new CountDownLatch(unAnalyzedFiles.size());
+ unAnalyzedFiles.parallelStream().forEach(fileDataMongoDto -> {
+ IAnalysisTask task = AnalysisTaskFactory.createAnalysisTask(analysisTask, fileDataMongoDto, mongoTemplate, countDownLatch);
+ taskExecutor.execute(task);
+ });
+ countDownLatch.await();
+ }
+
+ //校验当前项目是否开源
+ private void checkProjectIfOpen(MongoTemplate mongoTemplate,AnalysisTask analysisTask){
+ Query openFileQuery = new Query(where("openType").is(true));
+ Long openFilesCount = mongoTemplate.count(openFileQuery, FileDataMongoDto.class);
+ //是否开源阈值
+ Integer openThread = analysisTask.getOpenRateThreshold();
+ BigDecimal totalFileCount = new BigDecimal(analysisTask.getFileCount());
+ //统计开源率
+ BigDecimal openRate = new BigDecimal(openFilesCount).divide(totalFileCount, 4, RoundingMode.HALF_UP).multiply(new BigDecimal(100));
+ //超过阈值认为开源
+ if (openRate.compareTo(new BigDecimal(openThread)) >= 0) {
+ analysisTask.setOpenType(true);
+ }
+ }
+
+}
diff --git a/src/main/java/com/keyware/composeanalysis/solr/VersionTree.java b/src/main/java/com/keyware/composeanalysis/solr/VersionTree.java
new file mode 100644
index 0000000..669292a
--- /dev/null
+++ b/src/main/java/com/keyware/composeanalysis/solr/VersionTree.java
@@ -0,0 +1,56 @@
+package com.keyware.composeanalysis.solr;
+
+import lombok.Data;
+import lombok.experimental.Accessors;
+
+import java.util.Arrays;
+import java.util.List;
+
+/**
+ * @author liuzongren
+ * @date 2024/7/26
+ * @Description solr 库 VersionTree树信息 ,保存项目的整体信息
+ */
+@Data
+@Accessors(chain = true)
+public class VersionTree {
+
+ /**
+ * 项目ID
+ */
+ private String proId;
+
+ /**
+ * 项目名称
+ */
+ private String proName;
+
+ /**
+ * 项目编号
+ */
+ private String versionId;
+
+ /**
+ * 项目版本
+ */
+ private String versionName;
+
+ /**
+ * 项目地址
+ */
+ private String downUrl;
+
+ public void setLicenseType(String licenseType) {
+ if (licenseType != null){
+ this.licenseType = Arrays.asList(licenseType.split("@@@"));
+ }
+ }
+
+ private List licenseType;
+
+ /**
+ * 项目所有文件的信息
+ */
+ private List dirTree;
+
+}
diff --git a/src/main/java/com/keyware/composeanalysis/solr/VersionTreeNode.java b/src/main/java/com/keyware/composeanalysis/solr/VersionTreeNode.java
new file mode 100644
index 0000000..506e637
--- /dev/null
+++ b/src/main/java/com/keyware/composeanalysis/solr/VersionTreeNode.java
@@ -0,0 +1,42 @@
+package com.keyware.composeanalysis.solr;
+
+import lombok.Data;
+
+/**
+ * @author liuzongren
+ * @date 2024/7/26
+ */
+@Data
+public class VersionTreeNode {
+
+ /**
+ * 节点编号
+ */
+ private String id;
+
+ /**
+ * 父节点编号
+ */
+ private String pid;
+
+ /**
+ * 文件名称
+ */
+ private String name;
+
+ /**
+ * 是否是父节点
+ */
+ private Boolean isParent;
+
+ /**
+ * 文件的MD5
+ */
+ private String sourceFileMd5;
+
+ /**
+ * 文件在项目中的相对路径
+ */
+ private String fullPath;
+
+}
diff --git a/src/main/java/com/keyware/composeanalysis/task/AnalysisTaskFactory.java b/src/main/java/com/keyware/composeanalysis/task/AnalysisTaskFactory.java
new file mode 100644
index 0000000..de10f4a
--- /dev/null
+++ b/src/main/java/com/keyware/composeanalysis/task/AnalysisTaskFactory.java
@@ -0,0 +1,45 @@
+package com.keyware.composeanalysis.task;
+
+import com.keyware.composeanalysis.constant.enums.AnalysisLevelEnum;
+import com.keyware.composeanalysis.entity.AnalysisTask;
+import com.keyware.composeanalysis.mongo.FileDataMongoDto;
+import org.springframework.data.mongodb.core.MongoTemplate;
+
+import java.util.concurrent.CountDownLatch;
+
+/**
+ * @author liuzongren
+ * @date 2024/7/31
+ * @description
+ */
+public class AnalysisTaskFactory {
+
+ /**
+ * 根据分析类型,创建具体的分析任务
+ * @param analysisTask
+ * @param analysisFile
+ * @param mongoTemplate
+ * @param countDownLatch 任务总数控制器
+ * @return
+ */
+
+ public static IAnalysisTask createAnalysisTask(AnalysisTask analysisTask, FileDataMongoDto analysisFile, MongoTemplate mongoTemplate, CountDownLatch countDownLatch) {
+ AnalysisLevelEnum analysisLevel = AnalysisLevelEnum.getAnalysisLevelEnum(analysisTask.getAnalysisLevel());
+ switch (analysisLevel) {
+ case FILE_LEVEL:
+ return new FileAnalysisTask(analysisTask,analysisFile,mongoTemplate,countDownLatch);
+ case FUNCTION_LEVEL:
+ return new FunctionAnalysisTask(analysisTask,analysisFile,mongoTemplate,countDownLatch);
+ case BLOCK_LEVEL:
+ return new CodeBlockAnalysisTask(analysisTask,analysisFile,mongoTemplate,countDownLatch);
+ case LINE_LEVEL:
+ return new LineAnalysisTask(analysisTask,analysisFile,mongoTemplate,countDownLatch);
+ default:
+ break;
+ }
+ return null;
+ }
+
+
+
+}
diff --git a/src/main/java/com/keyware/composeanalysis/task/CodeBlockAnalysisTask.java b/src/main/java/com/keyware/composeanalysis/task/CodeBlockAnalysisTask.java
new file mode 100644
index 0000000..5a20742
--- /dev/null
+++ b/src/main/java/com/keyware/composeanalysis/task/CodeBlockAnalysisTask.java
@@ -0,0 +1,356 @@
+package com.keyware.composeanalysis.task;
+
+
+import cn.hutool.core.collection.CollectionUtil;
+import cn.hutool.core.lang.Pair;
+import com.alibaba.fastjson.JSONArray;
+import com.keyware.composeanalysis.constant.FixedValue;
+import com.keyware.composeanalysis.constant.FunctionAndAnalysisAssemblyConst;
+import com.keyware.composeanalysis.constant.RedisConst;
+import com.keyware.composeanalysis.constant.SolrDBConst;
+import com.keyware.composeanalysis.constant.enums.AnalysisLevelEnum;
+import com.keyware.composeanalysis.constant.enums.AnalysisStatusEnum;
+import com.keyware.composeanalysis.constant.enums.FileAnalysisStatusEnum;
+import com.keyware.composeanalysis.entity.AnalysisTask;
+import com.keyware.composeanalysis.mongo.FileDataMongoDto;
+import com.keyware.composeanalysis.mongo.LineDataMongoDto;
+import com.keyware.composeanalysis.mongo.MatchOpenFile;
+import com.keyware.composeanalysis.mongo.MatchOpenFileMongoDto;
+import com.keyware.composeanalysis.solr.VersionTree;
+import com.keyware.composeanalysis.util.*;
+import com.keyware.keyswan.anaysis.Analysis;
+import com.keyware.keyswan.anaysis.AnalysisFactory;
+import com.keyware.keyswan.common.CodeFile;
+import com.keyware.keyswan.common.LineModel;
+import com.keyware.utils.IdGenerator;
+import lombok.extern.log4j.Log4j2;
+import org.apache.commons.lang3.StringUtils;
+import org.apache.solr.common.SolrDocument;
+import org.apache.solr.common.SolrDocumentList;
+import org.springframework.data.mongodb.core.MongoTemplate;
+import org.springframework.data.mongodb.core.query.Update;
+
+import java.math.BigDecimal;
+import java.math.RoundingMode;
+import java.util.*;
+import java.util.concurrent.CountDownLatch;
+import java.util.stream.Collectors;
+import java.util.stream.Stream;
+
+import static com.keyware.composeanalysis.util.SimilarityUtil.getOpenRateAndSaveRowNum;
+import static org.springframework.data.mongodb.core.query.Criteria.where;
+
+/**
+ * @author liuzongren
+ * @ClassName LineAnalysisTask
+ * @description: 代码块级别溯源 任务
+ * @datetime 2024年 07月 25日 16:19
+ * @version: 1.0
+ */
+
+@Log4j2
+public class CodeBlockAnalysisTask extends IAnalysisTask {
+
+ private MongoTemplate mongoTemplate;
+ private AnalysisTask analysisTask;
+ //被测件的文件信息
+ private FileDataMongoDto analysisFile;
+
+ private SolrUtils solrUtils;
+
+ private RedisUtil redisUtil;
+
+ private CountDownLatch countDownLatch;
+
+ public CodeBlockAnalysisTask(AnalysisTask analysisTask, FileDataMongoDto analysisFile, MongoTemplate mongoTemplate, CountDownLatch countDownLatch) {
+ this.mongoTemplate = mongoTemplate;
+ this.analysisTask = analysisTask;
+ this.analysisFile = analysisFile;
+ this.countDownLatch = countDownLatch;
+ this.solrUtils = SpringContextUtils.getBean(SolrUtils.class);
+ this.redisUtil = SpringContextUtils.getBean(RedisUtil.class);
+ }
+
+ /**
+ * 方法 或者代码块 级别 源代码溯源
+ * 当前任务 需要在 文件级分析完成后 进行
+ */
+
+ @Override
+ public void run() {
+ //执行任务前,判断一下任务执行的状态
+ Object status = redisUtil.get(String.format(RedisConst.TASK_RUNNING_STATUS_KEY_PREFIX, analysisTask.getId()));
+ if (status != null && (status.equals(AnalysisStatusEnum.STOP_ANALYSIS.getCode()) || status.equals(AnalysisStatusEnum.PAUSE_ANALYSIS.getCode()))) {
+ log.info("任务已取消,fileName:{}", analysisFile.getName());
+ countDownLatch.countDown();
+ return;
+ }
+
+ //获取文件地址
+ String filePath = analysisFile.getFileUrl();
+ //获取文件名称
+ String fileName = analysisFile.getName();
+
+ try {
+ LineDataMongoDto lineDataMongoDto = new LineDataMongoDto();
+ lineDataMongoDto.setFileId(analysisFile.getId());
+ Analysis analysis = AnalysisFactory.getAnalysis(filePath);
+ //将代码块特征存入MongoDB
+ //提取文件的代码块信息
+ CodeFile codeFile = analysis.analysisFile(filePath, FunctionAndAnalysisAssemblyConst.LINE_EXTRACT, FunctionAndAnalysisAssemblyConst.LINE_EXTRACT_BY_6_LINE);
+ List lineFeatures = codeFile.getLine_hay();
+
+ //根据文件后缀判断需要查询的solr特征库库名称
+ String featureCoreName = FixedValue.SUFFIX_SOLR_FILE.get(analysisFile.getSuffix());
+
+ //从solr库中获取特征相似的文件
+ SolrDocumentList matchOpenSourceFiles = getFeatureSimilarityFromSolr(featureCoreName, lineFeatures);
+
+ //计算开源率
+ doAnalysis(matchOpenSourceFiles, codeFile);
+
+ //更新文件表的分析状态为3 行级特征以分析完毕
+ analysisFile.setFileAnalysisStatus(FileAnalysisStatusEnum.ANALYSIS_DONE.getCode());
+ mongoTemplate.update(FileDataMongoDto.class)
+ .matching(where("_id").is(analysisFile.getId()))
+ .replaceWith(analysisFile)
+ .findAndReplace();
+
+ AnalysisLogUtil.insert(mongoTemplate, "【代码块级分析】完成" + fileName);
+ log.info("文件" + fileName + ":代码块级分析完成");
+ } catch (Exception e) {
+ AnalysisLogUtil.insertErrorInfo(mongoTemplate, "【代码块分析】失败" + fileName, e);
+ log.error("文件:" + fileName + "代码块级分析失败!", e);
+ //修改当前文件分析状态未失败
+ mongoTemplate.update(FileDataMongoDto.class)
+ .matching(where("_id").is(analysisFile.getId()))
+ .apply(new Update().set("fileAnalysisStatus", FileAnalysisStatusEnum.FAILED_ANALYSIS.getCode()))
+ .first();
+ } finally {
+ countDownLatch.countDown();
+ }
+ }
+
+
+ /**
+ * 根据 特征值 从特征库中检索 具有特征相似的
+ *
+ * @param solrCoreName 检索的solr 库名称
+ * @param functionAndCodeBlockInfos
+ * @return
+ */
+ private SolrDocumentList getFeatureSimilarityFromSolr(String solrCoreName, List functionAndCodeBlockInfos) {
+ //获取函数获取代码块的特征MD5值
+ Set traitLineMd5Arr = functionAndCodeBlockInfos.stream().map(LineModel::getTraitLineMd5).collect(Collectors.toSet());
+ Set cuttLineMd5Arr = functionAndCodeBlockInfos.stream().map(LineModel::getCutLineMd5).collect(Collectors.toSet());
+ Set queryMd5Arr = Stream.concat(traitLineMd5Arr.stream(), cuttLineMd5Arr.stream()).collect(Collectors.toSet());
+ String queryStr = "line_hay:(" + StringUtils.join(queryMd5Arr, " OR ") + ")";
+ log.info("查询条件: solrCoreName:{},queryStr:{}", solrCoreName, queryStr);
+ SolrDocumentList result = solrUtils.query(solrCoreName, queryStr, "sourceMd5,line_hay");
+ log.info("查询结果: result:{}", result);
+ return result;
+ }
+
+
+ /**
+ * 计算开源率 被测件的开源率
+ *
+ * @param matcheOpenSourceFiles 匹配的开源文件信息
+ * @param fileAnalysisRes 被测件的解析结果
+ */
+ private void doAnalysis(SolrDocumentList matcheOpenSourceFiles, CodeFile fileAnalysisRes) {
+
+ if (CollectionUtil.isEmpty(matcheOpenSourceFiles)) {
+ return;
+ }
+
+ //根据文件后缀判断需要查询的文件版本库名称
+ String sourceFileBaseCoreName = FixedValue.SUFFIX_SOLR_VERSION.get(analysisFile.getSuffix());
+
+
+ //保存所有匹配的特征代码块MD5信息,方便统计总的匹配行数
+ Set matchingTraitLineSet = new HashSet<>();
+
+ //匹配的特征代码块MD5
+ Set matchedLineRowsNum = new HashSet<>();
+
+ //统计每个文件的开源率
+ List matchOpenFilesRes = calculateSimilarityAndOpenRate(matcheOpenSourceFiles, fileAnalysisRes, sourceFileBaseCoreName, matchedLineRowsNum, matchingTraitLineSet);
+
+ //计算文件的总体的特征相似度
+ Map traitMd5Map = fileAnalysisRes.getLine_hay().stream().collect(Collectors.toMap(LineModel::getTraitLineMd5, java.util.function.Function.identity()));
+
+ int matchCodeBlockLineCount = 0;
+ for (String matchFeatureFunctionMd5 : matchingTraitLineSet) {
+ LineModel lineModel = traitMd5Map.get(matchFeatureFunctionMd5);
+ matchCodeBlockLineCount += (Integer.valueOf(lineModel.getEndLine()) - Integer.valueOf(lineModel.getStartLine()));
+ }
+
+ BigDecimal featureSimilarity = new BigDecimal(matchCodeBlockLineCount).divide(new BigDecimal(analysisFile.getCodeRowNum()), 4, RoundingMode.HALF_UP).multiply(new BigDecimal(100)).setScale(2);
+
+ //计算文件的总体开源率
+ BigDecimal openRate = new BigDecimal(matchedLineRowsNum.size()).divide(new BigDecimal(analysisFile.getCodeRowNum()), 4, RoundingMode.HALF_UP).multiply(new BigDecimal(100)).setScale(2);
+
+ //获取开源率的阈值
+ Integer openRateThreshold = analysisTask.getOpenRateThreshold();
+
+ //如果开源率大于阈值,则将当前文件设置成开源
+ if (openRate.compareTo(new BigDecimal(openRateThreshold)) >= 0) {
+ analysisFile.setOpenType(true);
+ }
+
+ //保存当前文件的开源信息到mongo库中
+ MatchOpenFileMongoDto matchOpenFileMongo = new MatchOpenFileMongoDto();
+ matchOpenFileMongo.setId(IdGenerator.uuid32())
+ .setFilePath(analysisFile.getFileUrl())
+ .setFileName(analysisFile.getName())
+ .setFeatureSimilarity(featureSimilarity.floatValue())
+ .setOpenRate(openRate.floatValue())
+ .setOpenType(analysisFile.getOpenType())
+ .setMatchOpenFile(matchOpenFilesRes);
+
+ mongoTemplate.save(matchOpenFileMongo);
+ }
+
+
+ /**
+ * 计算当前文件的特征相似度 和 开源率
+ *
+ * @param matchOpenFiles 通过MD5 匹配到的所有开源文件
+ * @param sourceFileBaseCoreName 当前文件特征文件的 solr coreName
+ * @param matchLineRowsNum 所有开源文件匹配到的开源行号列表
+ * @param matchFeatureCodeBlockMd5s 所有开源文件匹配到的特征代码块MD5
+ */
+ private List calculateSimilarityAndOpenRate(SolrDocumentList matchOpenFiles, CodeFile fileAnalysisRes, String sourceFileBaseCoreName, Set matchLineRowsNum, Set matchFeatureCodeBlockMd5s) {
+
+ List matchOpenFilesRes = new ArrayList<>();
+
+ //首先根据文件的MD5查询开源文件的版本ID,和路径信息
+ Set openSourceFileMd5s = matchOpenFiles.stream().map(doc -> (String) doc.get("sourceMd5")).collect(Collectors.toSet());
+ Map md5VersionInfoMap = solrUtils.batchQueryVersionIdFromSourceFileBaseBySourceMd5(sourceFileBaseCoreName, openSourceFileMd5s);
+
+ //根据版本ID查询版本的详细信息
+ //todo 这里 查询一个版本的信息 需要检索 两个 solr 库 而且还需要检索 versioinTree 后面需要优化
+ Set openSourceFileVersionIds = md5VersionInfoMap.values().stream().map(doc -> (String) doc.get("versionId")).collect(Collectors.toSet());
+ List versionTrees = solrUtils.queryBatchVersionInfoByVersionIds(openSourceFileVersionIds);
+ Map versionIdVersionInfoMap = versionTrees.stream().collect(Collectors.toMap(VersionTree::getVersionId, java.util.function.Function.identity()));
+
+
+ for (SolrDocument matchFile : matchOpenFiles) {
+
+ //开源文件md5
+ String openSourceFileMd5 = matchFile.getFieldValue("sourceMd5").toString();
+
+ //解析文件的代码块特征值
+ List openFileCodeBlockFeatureList = getOpenFileCodeBlockList(matchFile);
+
+ //匹配的总特征行数
+ int currentFileMatchFeatureLineCount = 0;
+
+ //遍历当前文件的代码块特征,统计匹配的总行数
+ for (LineModel lineModel : fileAnalysisRes.getLine_hay()) {
+ String traitLineMd5 = lineModel.getTraitLineMd5();
+ //村换匹配到的文件的行信息
+ for (LineModel matchLine : openFileCodeBlockFeatureList) {
+ if (traitLineMd5.equals(matchLine.getTraitLineMd5())) {
+ //计算匹配的特征行数
+ currentFileMatchFeatureLineCount += (Integer.valueOf(matchLine.getEndLine()) - Integer.valueOf(matchLine.getStartLine()) + 1);
+ matchFeatureCodeBlockMd5s.add(traitLineMd5);
+ }
+ }
+ }
+
+
+ //根据源文件的MD5确定需要查询源码库的序号
+ String openSourceCodeCoreIndex = openSourceFileMd5.substring(0, 1) + SolrDBConst.CORE_NAME_SUFFIX_SOURCE_FILE_INFO;
+
+ //获取开源文件的文本信息
+ SolrDocument openSourceContent = solrUtils.queryOne(openSourceCodeCoreIndex, "sourceFileMd5:" + openSourceFileMd5, "sourceContent");
+
+ //当前文件的开源率
+ Pair> openRateAndSaveRowNum = getOpenRateAndSaveRowNum(fileAnalysisRes.getSourceFileContent(), openSourceContent.getFieldValue("sourceContent").toString());
+
+ //将当前文件匹配的行号,存储到缓存中,方便统计整体的开源率
+ matchLineRowsNum.addAll(openRateAndSaveRowNum.getValue());
+
+ //统计当前文件的特征相似度
+ BigDecimal featureSimilarity = new BigDecimal(currentFileMatchFeatureLineCount).divide(fileAnalysisRes.getCodeRowNum(), 4, RoundingMode.HALF_UP).multiply(new BigDecimal(100)).setScale(2);
+
+ SolrDocument openEntries = md5VersionInfoMap.get(openSourceFileMd5);
+ VersionTree versionInfo = versionIdVersionInfoMap.get(openEntries.get("versionId"));
+
+ //组装当前开源文件的开源项目信息
+ MatchOpenFile matchOpenFileInfo = new MatchOpenFile();
+ matchOpenFileInfo.setPId(versionInfo.getProId())
+ .setPName(versionInfo.getProName())
+ .setSourceUrl((String) openEntries.get("fullPath"))
+ .setFeatureSimilarity(featureSimilarity.floatValue())
+ .setOpenRate(openRateAndSaveRowNum.getKey())
+ .setVersion(versionInfo.getVersionName())
+ .setLicenseType(versionInfo.getLicenseType())
+ .setAnalyzeType(AnalysisLevelEnum.BLOCK_LEVEL.getCode());
+ matchOpenFilesRes.add(matchOpenFileInfo);
+ }
+ return matchOpenFilesRes;
+ }
+
+
+ /**
+ * 获取当前文件的代码块特征值
+ *
+ * @param openSourceFile
+ * @return
+ */
+ private List getOpenFileCodeBlockList(SolrDocument openSourceFile) {
+ //解析文件的代码块特征值
+ String lineFeatureMd5s = (String) openSourceFile.get("line_hay");
+ lineFeatureMd5s = lineFeatureMd5s.replace("\\", "")
+ .replace("\"{", "{")
+ .replace("}\"", "}");
+ return JSONArray.parseArray(lineFeatureMd5s, LineModel.class);
+ }
+
+
+ /**
+ * 将特征值插入到mongo库中
+ *
+ * @param features 特征集合
+ * @param lineDataMongoDto 当前分析任务 ,特征信息存储
+ * todo 后期 看看有没有插入的必要
+ * @param
+ */
+ @Deprecated
+ private void insertFeatureValue(List features, LineDataMongoDto lineDataMongoDto) {
+ List batchInsertList = new ArrayList<>();
+ if (CollectionUtil.isNotEmpty(features)) {
+ //这里的批量插入逻辑可以进行校验
+ //每10条存一次,解析的数据量如果过大,可能会超过MongoDB数据限制
+ int batchInsertStpe = 10;
+ int total = 0;
+ for (int i = 0; i < features.size(); i++) {
+ LineModel lineModel = features.get(i);
+ if (total != batchInsertStpe) {
+ batchInsertList.add(lineModel);
+ total++;
+ }
+ if (i == features.size() - 1 && total != batchInsertStpe) {
+ total = 0;
+ lineDataMongoDto.setId(IdGenerator.uuid32())
+ .setLineModels(batchInsertList);
+ mongoTemplate.insert(lineDataMongoDto);
+ }
+ if (total == batchInsertStpe) {
+ total = 0;
+ lineDataMongoDto.setId(IdGenerator.uuid32())
+ .setLineModels(batchInsertList);
+ mongoTemplate.insert(lineDataMongoDto);
+ batchInsertList.clear();
+ }
+ }
+ } else {
+ lineDataMongoDto.setId(IdGenerator.uuid32());
+ mongoTemplate.insert(lineDataMongoDto);
+ }
+ }
+
+}
diff --git a/src/main/java/com/keyware/composeanalysis/task/FileAnalysisTask.java b/src/main/java/com/keyware/composeanalysis/task/FileAnalysisTask.java
new file mode 100644
index 0000000..3a0aa4c
--- /dev/null
+++ b/src/main/java/com/keyware/composeanalysis/task/FileAnalysisTask.java
@@ -0,0 +1,232 @@
+package com.keyware.composeanalysis.task;
+
+import com.keyware.composeanalysis.constant.FixedValue;
+import com.keyware.composeanalysis.constant.RedisConst;
+import com.keyware.composeanalysis.constant.SolrDBConst;
+import com.keyware.composeanalysis.constant.enums.AnalysisLevelEnum;
+import com.keyware.composeanalysis.constant.enums.AnalysisStatusEnum;
+import com.keyware.composeanalysis.constant.enums.FileAnalysisStatusEnum;
+import com.keyware.composeanalysis.entity.AnalysisTask;
+import com.keyware.composeanalysis.mongo.FileDataMongoDto;
+import com.keyware.composeanalysis.mongo.MatchOpenFile;
+import com.keyware.composeanalysis.mongo.MatchOpenFileMongoDto;
+import com.keyware.composeanalysis.solr.VersionTree;
+import com.keyware.composeanalysis.util.*;
+import com.keyware.keyswan.anaysis.Analysis;
+import com.keyware.keyswan.anaysis.AnalysisFactory;
+import com.keyware.keyswan.common.CodeFile;
+import com.keyware.utils.IdGenerator;
+import lombok.extern.log4j.Log4j2;
+import org.apache.commons.collections.CollectionUtils;
+import org.apache.commons.lang3.StringUtils;
+import org.apache.solr.common.SolrDocument;
+import org.apache.solr.common.SolrDocumentList;
+import org.springframework.data.mongodb.core.MongoTemplate;
+import org.springframework.data.mongodb.core.query.Update;
+
+import java.io.IOException;
+import java.math.BigDecimal;
+import java.math.RoundingMode;
+import java.nio.file.Files;
+import java.nio.file.Paths;
+import java.util.*;
+import java.util.concurrent.CountDownLatch;
+import java.util.function.Function;
+import java.util.stream.Collectors;
+
+import static org.springframework.data.mongodb.core.query.Criteria.where;
+
+/**
+ * @author liuzongren
+ * @date 2024/7/23
+ * desc 文件级溯源分析任务
+ */
+@Log4j2
+public class FileAnalysisTask extends IAnalysisTask {
+
+ private MongoTemplate mongoTemplate;
+ private AnalysisTask analysisTask;
+ private SolrUtils solrUtils;
+ //文件信息
+ private FileDataMongoDto analysisFile;
+ private RedisUtil redisUtil;
+ private CountDownLatch countDownLatch;
+
+
+ public FileAnalysisTask(AnalysisTask analysisTask, FileDataMongoDto analysisFile, MongoTemplate mongoTemplate, CountDownLatch countDownLatch) {
+ this.mongoTemplate = mongoTemplate;
+ this.analysisTask = analysisTask;
+ this.analysisFile = analysisFile;
+ this.countDownLatch = countDownLatch;
+ this.solrUtils = SpringContextUtils.getBean(SolrUtils.class);
+ this.redisUtil = SpringContextUtils.getBean(RedisUtil.class);
+ }
+
+
+ /**
+ * 文件级溯源分析
+ * 当前级别溯源分析 需要在 项目级级分析完成后执行
+ * 当前文件源MD5 已经在solr库中匹配不到了,需要提取特征去匹配
+ */
+ @Override
+ public void run() {
+ //执行任务前,判断一下任务执行的状态
+ Object status = redisUtil.get(String.format(RedisConst.TASK_RUNNING_STATUS_KEY_PREFIX, analysisTask.getId()));
+ if (status != null && (status.equals(AnalysisStatusEnum.STOP_ANALYSIS.getCode()) || status.equals(AnalysisStatusEnum.PAUSE_ANALYSIS.getCode()))) {
+ log.info("任务已取消,fileName:{}", analysisFile.getName());
+ countDownLatch.countDown();
+ return;
+ }
+ //获取当前文件名称
+ String fileName = analysisFile.getName();
+
+ AnalysisLogUtil.insert(mongoTemplate, "【文件级分析】正在分析" + fileName);
+ try {
+ //只有主流语言的才能解析
+ //非32种主流语言的不能提取文件特征,在文件级MD5匹配的时候,已经做过匹配
+ if (StringUtils.isNotEmpty(analysisFile.getSuffix()) && FixedValue.SUFFIX_SOLR_VERSION.containsKey(analysisFile.getSuffix())) {
+ //根据文件后缀 查询 *_CutFileInfo库名称
+ String featureCoreName = FixedValue.SUFFIX_SOLR_FILE.get(analysisFile.getSuffix());
+ //根据文件名称,获取文件解析器
+ Analysis analysis = AnalysisFactory.getAnalysis(fileName);
+ //如果 analysis 返回值为null 说明还未支持这种语言的特征提取 可以直接通过文件的MD5值去solr库中匹配
+ if (analysis != null) {
+ //如果文件大小超过3M,则不进行文件级行级特征提取
+ Integer fileSize = analysisFile.getFileSize();
+ if (fileSize < (3 * 1024 * 1024)) {
+ CodeFile codeFile = analysis.analysisFile(analysisFile.getFileUrl(), "1", "0");
+ //根据文件的特征值,去相应文件文件后缀的特征库中进行查询
+ if (codeFile != null) {
+ String querySb = "sourceMd5:" + codeFile.getSourceMd5() + " OR cutFileMd5:" + codeFile.getCutFileMd5() + " OR traitFileMd5:" + codeFile.getTraitFileMd5();
+ SolrDocumentList openSourceFileList = solrUtils.query(featureCoreName, querySb, "sourceMd5");
+ //如果当前文件在源码库中,匹配到了数据,则统计当前文件的开源率
+ if (CollectionUtils.isNotEmpty(openSourceFileList)) {
+ ananlyzeFileOpenRate(openSourceFileList);
+ }
+ }
+ }
+ }
+ }
+ //更新文件级分析结果
+ analysisFile.setFileAnalysisStatus(FileAnalysisStatusEnum.ANALYSIS_DONE.getCode());
+ mongoTemplate.update(FileDataMongoDto.class)
+ .matching(where("_id").is(analysisFile.getId()))
+ .replaceWith(analysisFile)
+ .findAndReplace();
+ } catch (Exception e) {
+ AnalysisLogUtil.insertErrorInfo(mongoTemplate, "【文件级】提取失败" + fileName, e);
+ log.error("文件:" + fileName + "文件级别特征提取失败!", e);
+ //将当前文件的分析状态变更为失败
+ analysisFile.setFileAnalysisStatus(FileAnalysisStatusEnum.FAILED_ANALYSIS.getCode());
+ //更新文件级分析结果
+ mongoTemplate.update(FileDataMongoDto.class)
+ .matching(where("_id").is(analysisFile.getId()))
+ .apply(new Update().set("fileAnalysisStatus", FileAnalysisStatusEnum.FAILED_ANALYSIS.getCode()))
+ .first();
+ } finally {
+ countDownLatch.countDown();
+ }
+ }
+
+
+ /**
+ * 分析文件的开源率
+ *
+ * @param fileList 匹配的开源文件信息
+ * @throws IOException
+ */
+ private void ananlyzeFileOpenRate(SolrDocumentList fileList) throws IOException {
+ //创建匹配开源文件信息匹配对象
+ MatchOpenFileMongoDto matchOpenFileInfo = new MatchOpenFileMongoDto();
+ matchOpenFileInfo.setId(IdGenerator.uuid32())
+ .setFileName(analysisFile.getName())
+ .setFilePath(analysisFile.getFileUrl());
+
+ //根据匹配的开源文件的md5 获取版本ID
+ Set sourceFileMd5 = fileList.stream().map(solrDocument -> (String) solrDocument.get("sourceMd5")).collect(Collectors.toSet());
+ String sourceCoreName = FixedValue.SUFFIX_SOLR_VERSION.get(analysisFile.getSuffix());
+ Map md5VersionObjMap = solrUtils.batchQueryVersionIdFromSourceFileBaseBySourceMd5(sourceCoreName, sourceFileMd5);
+
+ //根据版本ID获取版本信息
+ Set versionIds = md5VersionObjMap.values().stream().map(solrDocument -> (String) solrDocument.get("versionId")).collect(Collectors.toSet());
+ List treeInfoList = solrUtils.queryBatchVersionInfoByVersionIds(versionIds);
+ Map versionIdMap = treeInfoList.stream().collect(Collectors.toMap(VersionTree::getVersionId, Function.identity()));
+
+ //获取被测件文本内容
+ String fileContent = new String(Files.readAllBytes(Paths.get(analysisFile.getFileUrl())), "utf-8").replaceAll(" ", "");
+
+ //将被测件的文本内容拆分成行信息,用于匹配开源信息
+ List fileLines = SimilarityUtil.getSplitWords(fileContent);
+
+ HashSet openLineNum = new HashSet<>();
+
+ //开源文件结果集合
+ ArrayList matchOpenFileList = new ArrayList<>();
+ //遍历匹配到的开源文件列表
+ for (int i = 0; i < fileList.size(); i++) {
+ String openFileMd5 = (String) fileList.get(i).get("sourceMd5");
+ SolrDocument versionObj = md5VersionObjMap.get(openFileMd5);
+ String versionId = (String) versionObj.get("versionId");
+ VersionTree versionInfo = versionIdMap.get(versionId);
+ if (versionInfo == null) {
+ log.error("未在versionTree中找到版本信息,openFileMd5:{},versionId:{}",openFileMd5, versionId);
+ continue;
+ }
+ MatchOpenFile matchOpenFile = new MatchOpenFile();
+ matchOpenFile.setId(IdGenerator.uuid32())
+ .setVersionId(versionId)
+ .setSourceFilePath((String) versionObj.get("fullPath"))
+ .setSourceUrl(versionInfo.getDownUrl())
+ .setPId(versionInfo.getProId())
+ .setPName(versionInfo.getProName())
+ .setLicenseType(versionInfo.getLicenseType())
+ .setAnalyzeType(AnalysisLevelEnum.FILE_LEVEL.getCode())
+ .setVersion(versionInfo.getVersionName())
+ .setFeatureSimilarity(100.00f);
+ //计算被测件和开源文件的文本相似度
+ //根据文件的MD5的第一位获取solr库索引名称
+ String solrNameIndex =openFileMd5.substring(0, 1) + SolrDBConst.CORE_NAME_SUFFIX_SOURCE_FILE_INFO;
+ SolrDocumentList sourceFileInfo = solrUtils.query(solrNameIndex, "sourceFileMd5:" + openFileMd5, "sourceContent");
+ if (CollectionUtils.isNotEmpty(sourceFileInfo)) {
+ String openSourceContent = String.valueOf(sourceFileInfo.get(0).getFieldValue("sourceContent"));
+ //这里存在优化空间,被测件的文件行拆分 可以拿到循环外面
+ double similarity = SimilarityUtil.getSimilarityAndSaveRowNum(fileLines, openSourceContent, openLineNum);
+ matchOpenFile.setOpenRate(new BigDecimal(similarity * 100).setScale(2, RoundingMode.HALF_UP).floatValue());
+ //如果找不到源代码,直接将原文开源率置为 100%
+ } else {
+ log.error("找不到源代码,DBname:{},sourceFileMd5:{}", solrNameIndex, openFileMd5);
+ matchOpenFile.setOpenRate(100.00f);
+ }
+ matchOpenFile.setMd5(openFileMd5);
+ matchOpenFileList.add(matchOpenFile);
+ }
+ //统计被测件的总体开源率
+ //获取开源率阈值,判断当前文件是否开源
+ Integer openRateThreshold = analysisTask.getOpenRateThreshold();
+ int openLineCount = openLineNum.size();
+ BigDecimal totalLineCount = new BigDecimal(fileLines.size());
+ BigDecimal openRate = new BigDecimal(openLineCount).divide(totalLineCount, 4, RoundingMode.HALF_UP).multiply(new BigDecimal(100));
+
+ //超过阈值,则认为当前文件是开源文件
+ if (openRate.compareTo(new BigDecimal(openRateThreshold)) > 0) {
+ analysisFile.setOpenType(true);
+ } else {
+ analysisFile.setOpenType(false);
+ }
+
+ //修改保存测试文件信息
+ analysisFile.setOpenLineCount(openLineCount)
+ .setOpenRate(openRate.floatValue());
+
+ //组装开源信息
+ matchOpenFileInfo.setFilePath(analysisFile.getFileUrl())
+ .setOpenType(analysisFile.getOpenType())
+ .setOpenRate(analysisFile.getOpenType() ? 100.00f : 0.00f)
+ .setMatchOpenFile(matchOpenFileList);
+
+ //保存当前开源信息数据
+ mongoTemplate.insert(matchOpenFileInfo);
+
+ }
+
+}
diff --git a/src/main/java/com/keyware/composeanalysis/task/FunctionAnalysisTask.java b/src/main/java/com/keyware/composeanalysis/task/FunctionAnalysisTask.java
new file mode 100644
index 0000000..100c1d8
--- /dev/null
+++ b/src/main/java/com/keyware/composeanalysis/task/FunctionAnalysisTask.java
@@ -0,0 +1,409 @@
+package com.keyware.composeanalysis.task;
+
+
+import cn.hutool.core.collection.CollectionUtil;
+import cn.hutool.core.lang.Pair;
+import cn.hutool.core.util.ObjUtil;
+import com.alibaba.fastjson.JSONArray;
+import com.keyware.composeanalysis.constant.FixedValue;
+import com.keyware.composeanalysis.constant.RedisConst;
+import com.keyware.composeanalysis.constant.SolrDBConst;
+import com.keyware.composeanalysis.constant.enums.AnalysisLevelEnum;
+import com.keyware.composeanalysis.constant.enums.AnalysisStatusEnum;
+import com.keyware.composeanalysis.constant.enums.FileAnalysisStatusEnum;
+import com.keyware.composeanalysis.entity.AnalysisTask;
+import com.keyware.composeanalysis.mongo.FileDataMongoDto;
+import com.keyware.composeanalysis.mongo.LineDataMongoDto;
+import com.keyware.composeanalysis.mongo.MatchOpenFile;
+import com.keyware.composeanalysis.mongo.MatchOpenFileMongoDto;
+import com.keyware.composeanalysis.solr.VersionTree;
+import com.keyware.composeanalysis.util.*;
+import com.keyware.keyswan.common.LineModel;
+import com.keyware.keyware.anaysis.Analysis;
+import com.keyware.keyware.anaysis.AnalysisFactory;
+import com.keyware.keyware.common.CodeFile;
+import com.keyware.keyware.common.Function;
+import com.keyware.utils.IdGenerator;
+import lombok.extern.log4j.Log4j2;
+import org.apache.commons.lang3.StringUtils;
+import org.apache.solr.common.SolrDocument;
+import org.apache.solr.common.SolrDocumentList;
+import org.springframework.data.mongodb.core.MongoTemplate;
+import org.springframework.data.mongodb.core.query.Update;
+
+import java.io.FileInputStream;
+import java.math.BigDecimal;
+import java.math.RoundingMode;
+import java.util.*;
+import java.util.concurrent.CountDownLatch;
+import java.util.stream.Collectors;
+import java.util.stream.Stream;
+
+import static org.springframework.data.mongodb.core.query.Criteria.where;
+
+/**
+ * @author liuzongren
+ * @ClassName LineAnalysisTask
+ * @description: 函数级别溯源 任务
+ * @datetime 2024年 07月 25日 16:19
+ * @version: 1.0
+ */
+
+@Log4j2
+public class FunctionAnalysisTask extends IAnalysisTask {
+
+ private MongoTemplate mongoTemplate;
+ private AnalysisTask analysisTask;
+ //被测件的文件信息
+ private FileDataMongoDto analysisFile;
+
+ private SolrUtils solrUtils;
+
+ private RedisUtil redisUtil;
+
+ private CountDownLatch countDownLatch;
+
+
+ public FunctionAnalysisTask(AnalysisTask analysisTask, FileDataMongoDto analysisFile, MongoTemplate mongoTemplate, CountDownLatch countDownLatch) {
+ this.mongoTemplate = mongoTemplate;
+ this.analysisTask = analysisTask;
+ this.analysisFile = analysisFile;
+ this.countDownLatch = countDownLatch;
+ this.solrUtils = SpringContextUtils.getBean(SolrUtils.class);
+ this.redisUtil = SpringContextUtils.getBean(RedisUtil.class);
+ }
+
+ /**
+ * 方法 或者代码块 级别 源代码溯源
+ * 当前任务 需要在 文件级分析完成后 进行
+ */
+
+ @Override
+ public void run() {
+ //执行任务前,判断一下任务执行的状态
+ Object status = redisUtil.get(String.format(RedisConst.TASK_RUNNING_STATUS_KEY_PREFIX, analysisTask.getId()));
+ if (status != null && (status.equals(AnalysisStatusEnum.STOP_ANALYSIS.getCode()) || status.equals(AnalysisStatusEnum.PAUSE_ANALYSIS.getCode()))) {
+ log.info("任务已取消,fileName:{}", analysisFile.getName());
+ countDownLatch.countDown();
+ return;
+ }
+ //获取文件地址
+ String filePath = analysisFile.getFileUrl();
+ //获取文件名称
+ String fileName = analysisFile.getName();
+
+ try {
+
+ //根据文件后缀判断需要查询的solr特征库库名称
+ String featureCoreName = FixedValue.SUFFIX_SOLR_FILE.get(analysisFile.getSuffix());
+
+ //根据文件后缀,去检索sourceFileBase库,来获取文件版本信息
+ String sourceFileBaseCoreName = FixedValue.SUFFIX_SOLR_VERSION.get(analysisFile.getSuffix());
+
+ //根据文件的名称获取函数解析器
+ Analysis analysis = AnalysisFactory.getAnalysis(filePath);
+ //解析文件
+ if (!ObjUtil.hasEmpty(featureCoreName, sourceFileBaseCoreName, analysis)) {
+ CodeFile codeFile = analysis.analysisFile(new FileInputStream(filePath));
+ if (codeFile != null) {
+ List functionList = codeFile.getFunctionList();
+ if (CollectionUtil.isNotEmpty(functionList)) {
+ //获取函数的特征MD5,cutMD5
+ List featureFunctionMd5List = functionList.stream().map(Function::getMd5).collect(Collectors.toList());
+ List cutFunctionMd5List = functionList.stream().map(Function::getSourceMd5).collect(Collectors.toList());
+ Set queryMd5List = Stream.concat(featureFunctionMd5List.stream(), cutFunctionMd5List.stream()).collect(Collectors.toSet());
+ String queryStr = "fun_hay:(" + StringUtils.join(queryMd5List, " OR ") + ")";
+// log.info("检索函数特征,coreName:{} ,queryStr:{}", featureCoreName, queryStr);
+ SolrDocumentList matchOpenFiles = solrUtils.query(featureCoreName, queryStr, "sourceMd5,fun_hay");
+// log.info("resp", sourceMd5);
+ //如果函数级特征匹配,能够匹配到开源文件信息,则根据开源文件的md5或者开源文件信息,做相似度对比
+ if (matchOpenFiles != null) {
+ //对匹配到的文件进行分析
+ doAnalysis(matchOpenFiles, sourceFileBaseCoreName, codeFile);
+ } else {
+ //因为函数的特征库较少,这里补充一个对比逻辑,如果当前文件解析失败,或者没有通过函数匹配到数据,则直接通过文件的md5 再次查询一次solr库
+ checkByOriginalFileMd5(sourceFileBaseCoreName, analysisFile.getMd5());
+ }
+ }
+ }
+ } else {
+ //因为函数的特征库较少,这里补充一个对比逻辑,如果当前文件解析失败,或者没有通过函数匹配到数据,则直接通过文件的md5 再次查询一次solr库
+ checkByOriginalFileMd5(sourceFileBaseCoreName, analysisFile.getMd5());
+ }
+
+ //更新文件表的分析状态为3 函数级特征以分析完毕
+ analysisFile.setFileAnalysisStatus(FileAnalysisStatusEnum.ANALYSIS_DONE.getCode());
+ mongoTemplate.update(FileDataMongoDto.class)
+ .matching(where("_id").is(analysisFile.getId()))
+ .replaceWith(analysisFile)
+ .findAndReplace();
+
+ AnalysisLogUtil.insert(mongoTemplate, "【函数级分析】完成" + fileName);
+ log.info("文件" + fileName + ":函数级分析完成");
+ } catch (Exception e) {
+ AnalysisLogUtil.insertErrorInfo(mongoTemplate, "【函数级级分析】失败" + fileName, e);
+ log.error("文件:" + fileName + "函数级别特征提取失败!", e);
+ //修改当前文件分析状态未失败
+ mongoTemplate.update(FileDataMongoDto.class)
+ .matching(where("_id").is(analysisFile.getId()))
+ .apply(new Update().set("fileAnalysisStatus", FileAnalysisStatusEnum.FAILED_ANALYSIS.getCode()))
+ .first();
+ } finally {
+ countDownLatch.countDown();
+ }
+ }
+
+
+ /**
+ * 对比函数级文本相似度
+ *
+ * @param matchOpenFiles 通过特征匹配到的开源文件的md5
+ * @param sourceFileBaseCoreName 查询版开源文件版本ID的 solr库名称
+ * @param fileAnalysisRes 被测件的函数解析结果
+ * @throws Exception
+ */
+ private void doAnalysis(SolrDocumentList matchOpenFiles, String sourceFileBaseCoreName, CodeFile fileAnalysisRes) throws Exception {
+
+ //按照函数的特征md5进行分组,getter ,setter等方法的 特征值会重复
+ Map> featureMd5FunctionMap = fileAnalysisRes.getFunctionList().stream().collect(Collectors.groupingBy(Function::getMd5));
+
+ //函数代码总函数
+ int totalFunctionLineCount = fileAnalysisRes.getFunctionList().stream().mapToInt(Function::getCodeRowNum).sum();
+
+ //匹配到的特征函数Md5
+ Set matchFeatureFunctionMd5s = new HashSet();
+
+ //匹配到源码的行号
+ Set matchOpenLineRowsNum = new HashSet();
+
+ //计算与每个开源文件的开源率和特征相似度
+ List matchOpenFilesRes = calculateSimilarityAndOpenRate(matchOpenFiles, fileAnalysisRes, sourceFileBaseCoreName, matchOpenLineRowsNum, matchFeatureFunctionMd5s);
+
+ //计算文件的总体的特征相似度
+ int matchFunctionLineCount = 0;
+ for (String matchFeatureFunctionMd5 : matchFeatureFunctionMd5s) {
+ matchFunctionLineCount += featureMd5FunctionMap.get(matchFeatureFunctionMd5).stream().mapToInt(Function::getCodeRowNum).sum();
+ }
+
+ BigDecimal featureSimilarity = new BigDecimal(matchFunctionLineCount).divide(new BigDecimal(totalFunctionLineCount), 4, RoundingMode.HALF_UP).multiply(new BigDecimal(100)).setScale(2);
+
+ //计算文件的总体开源率
+ BigDecimal openRate = new BigDecimal(matchOpenLineRowsNum.size()).divide(new BigDecimal(analysisFile.getCodeRowNum()), 4, RoundingMode.HALF_UP).multiply(new BigDecimal(100)).setScale(2);
+
+ //获取开源率的阈值
+ Integer openRateThreshold = analysisTask.getOpenRateThreshold();
+
+ //如果开源率大于阈值,则将当前文件设置成开源
+ if (openRate.floatValue() > openRateThreshold) {
+ analysisFile.setOpenType(true);
+ }
+
+ //保存当前文件的开源信息到mongo库中
+ MatchOpenFileMongoDto matchOpenFileMongo = new MatchOpenFileMongoDto();
+ matchOpenFileMongo.setId(IdGenerator.uuid32())
+ .setFilePath(analysisFile.getFileUrl())
+ .setFileName(analysisFile.getName())
+ .setFeatureSimilarity(featureSimilarity.floatValue())
+ .setOpenRate(openRate.floatValue())
+ .setOpenType(analysisFile.getOpenType())
+ .setMatchOpenFile(matchOpenFilesRes);
+
+ mongoTemplate.save(matchOpenFileMongo);
+ }
+
+
+ /**
+ * 计算当前文件的特征相似度 和 开源率
+ *
+ * @param matchOpenFiles 通过MD5 匹配到的所有开源文件
+ * @param sourceFileBaseCoreName 当前文件特征文件的 solr coreName
+ * @param matchLineRowsNum 所有开源文件匹配到的开源行号列表
+ * @param matchFeatureFunctionMd5s 所有开源文件匹配到的特征函数MD5
+ * return 匹配的开源文件解析后的结果集
+ */
+ private List calculateSimilarityAndOpenRate(SolrDocumentList matchOpenFiles, CodeFile fileAnalysisRes, String sourceFileBaseCoreName, Set matchLineRowsNum, Set matchFeatureFunctionMd5s) {
+
+ //匹配的开源文件列表
+ List matchOpenFilesRes = new ArrayList<>();
+
+ //按照函数的特征md5进行分组,getter ,setter等方法的 特征值会重复
+ Map> featureMd5FunctionMap = fileAnalysisRes.getFunctionList().stream().collect(Collectors.groupingBy(Function::getMd5));
+
+ //首先根据文件的MD5查询开源文件的版本ID,和路径信息
+ Set openSourceFileMd5s = matchOpenFiles.stream().map(doc -> (String) doc.get("sourceMd5")).collect(Collectors.toSet());
+ Map md5VersionInfoMap = solrUtils.batchQueryVersionIdFromSourceFileBaseBySourceMd5(sourceFileBaseCoreName, openSourceFileMd5s);
+
+ //根据版本ID查询版本的详细信息
+ //todo 这里 查询一个版本的信息 需要检索 两个 solr 库 而且还需要检索 versioinTree 后面需要优化
+ Set openSourceFileVersionIds = md5VersionInfoMap.values().stream().map(doc -> (String) doc.get("versionId")).collect(Collectors.toSet());
+ List versionTrees = solrUtils.queryBatchVersionInfoByVersionIds(openSourceFileVersionIds);
+ Map versionIdVersionInfoMap = versionTrees.stream().collect(Collectors.toMap(VersionTree::getVersionId, java.util.function.Function.identity()));
+
+
+ //函数总行数
+ BigDecimal totalFunctionLineCount = new BigDecimal(fileAnalysisRes.getFunctionList().stream().mapToInt(Function::getCodeRowNum).sum());
+
+ for (SolrDocument openSourceFile : matchOpenFiles) {
+
+ //开源文件md5
+ String openSourceFileMd5 = openSourceFile.getFieldValue("sourceMd5").toString();
+
+ //解析文件的函数特征值
+ List openFileFunctionList = getOpenFileFunctionList(openSourceFile);
+
+ //根据源文件的MD5确定需要查询源码库的序号
+ String openSourceCodeCoreIndex = openSourceFileMd5.substring(0, 1) + SolrDBConst.CORE_NAME_SUFFIX_SOURCE_FILE_INFO;
+
+ //获取开源文件的文本信息
+ SolrDocument openSourceContent = solrUtils.queryOne(openSourceCodeCoreIndex, "sourceFileMd5:" + openSourceFileMd5, "sourceContent");
+
+ //当前文件匹配特征函数总行数
+ int currentFileMatchFeatureLineCount = 0;
+
+ //当前文件所匹配的特征函数MD5
+ Set currentFileMatchFeatureFunctionMd5 = new HashSet();
+
+ //遍历函数特征MD5
+ for (String funFeatureMd5 : featureMd5FunctionMap.keySet()) {
+ List currentFueatureFunctionList = featureMd5FunctionMap.get(funFeatureMd5);
+ //源文件的特征函数列表
+ for (Function openFunction : openFileFunctionList) {
+ if (funFeatureMd5.equals(openFunction.getMd5())) {
+ //每个特征函数 不能多次匹配,影响整体特征相似度
+ //匹配成功后,相同的特征行 一并加上
+ if (!currentFileMatchFeatureFunctionMd5.contains(funFeatureMd5)) {
+ currentFileMatchFeatureFunctionMd5.add(funFeatureMd5);
+ matchFeatureFunctionMd5s.add(funFeatureMd5);
+ currentFileMatchFeatureLineCount += currentFueatureFunctionList.stream().mapToInt(Function::getCodeRowNum).sum();
+ }
+ }
+ }
+ }
+
+ //当前文件的开源率
+ Pair> openRateAndSaveRowNum = SimilarityUtil.getOpenRateAndSaveRowNum(new String(fileAnalysisRes.getFileContent()), openSourceContent.getFieldValue("sourceContent").toString());
+ //将当前文件匹配的行号,存储到缓存中,方便统计整体的开源率
+ matchLineRowsNum.addAll(openRateAndSaveRowNum.getValue());
+
+ //统计当前文件的特征相似度
+ BigDecimal featureSimilarity = new BigDecimal(currentFileMatchFeatureLineCount).divide(totalFunctionLineCount, 4, RoundingMode.HALF_UP).multiply(new BigDecimal(100)).setScale(2);
+
+ SolrDocument openEntries = md5VersionInfoMap.get(openSourceFileMd5);
+ VersionTree versionInfo = versionIdVersionInfoMap.get(openEntries.get("versionId"));
+ //组装当前开源文件的开源项目信息
+ MatchOpenFile matchOpenFileInfo = new MatchOpenFile();
+ matchOpenFileInfo.setPId(versionInfo.getProId())
+ .setPName(versionInfo.getProName())
+ .setSourceUrl((String) openEntries.get("fullPath"))
+ .setFeatureSimilarity(featureSimilarity.floatValue())
+ .setOpenRate(openRateAndSaveRowNum.getKey())
+ .setVersion(versionInfo.getVersionName())
+ .setLicenseType(versionInfo.getLicenseType())
+ .setAnalyzeType(AnalysisLevelEnum.FUNCTION_LEVEL.getCode());
+ matchOpenFilesRes.add(matchOpenFileInfo);
+ }
+ return matchOpenFilesRes;
+ }
+
+
+ /**
+ * 防止函数特征库不全,再次根据文件MD5查询开源文件信息, 做二次校验
+ *
+ * @param originalFileMd5
+ * @param versionIdCoreName
+ */
+ private void checkByOriginalFileMd5(String versionIdCoreName, String originalFileMd5) {
+
+ //根据文件的MD5,查询特征库,看当前文件是否在开源代码库中
+ SolrDocument versionIdAndPath = solrUtils.queryOne(versionIdCoreName, "sourceFileMd5:" + originalFileMd5, "versionId,fullPath,sourceFileMd5");
+
+ if (versionIdAndPath != null) {
+ //根据版本ID查询版本的详细信息
+ VersionTree versionInfo = solrUtils.queryVersionInfoByVersionId((String) versionIdAndPath.get("versionId"));
+ if (versionInfo != null) {
+ //当前开源文件的开源项目信息
+ MatchOpenFile matchOpenFileInfo = new MatchOpenFile();
+ matchOpenFileInfo.setPId(versionInfo.getProId())
+ .setPName(versionInfo.getProName())
+ .setSourceUrl(versionInfo.getDownUrl())
+ .setFeatureSimilarity(100.00f)
+ .setOpenRate(100.00f)
+ .setAnalyzeType(AnalysisLevelEnum.FUNCTION_LEVEL.getCode());
+
+ //保存当前文件的开源信息到mongo库中
+ MatchOpenFileMongoDto matchOpenFileMongo = new MatchOpenFileMongoDto();
+ matchOpenFileMongo.setId(IdGenerator.uuid32())
+ .setFilePath(analysisFile.getFileUrl())
+ .setFileName(analysisFile.getName())
+ .setOpenRate(100.00f)
+ .setOpenType(analysisFile.getOpenType())
+ .setMatchOpenFile(Arrays.asList(matchOpenFileInfo));
+
+ mongoTemplate.save(matchOpenFileMongo);
+ }
+ }
+ }
+
+
+ /**
+ * 获取当前文件的函数特征值
+ *
+ * @param matchOpenFile
+ * @return
+ */
+ private List getOpenFileFunctionList(SolrDocument matchOpenFile) {
+ try {
+ //解析文件的函数特征值
+ String lineFeatureMd5s = matchOpenFile.getFieldValue("fun_hay").toString();
+ lineFeatureMd5s = lineFeatureMd5s.replace("\\", "")
+ .replace("\"{", "{")
+ .replace("}\"", "}");
+ return JSONArray.parseArray(lineFeatureMd5s, Function.class);
+ }catch (Exception e){
+ log.error("解析文件特征值失败",e);
+ }
+ return new ArrayList();
+ }
+
+ /**
+ * 将特征值插入到mongo库中
+ *
+ * @param features 特征集合
+ * @param lineDataMongoDto 当前分析任务 ,特征信息存储
+ * @param
+ */
+ @Deprecated
+ private void insertFeatureValue(List features, LineDataMongoDto lineDataMongoDto) {
+ List batchInsertList = new ArrayList<>();
+ if (CollectionUtil.isNotEmpty(features)) {
+ //这里的批量插入逻辑可以进行校验
+ //每10条存一次,解析的数据量如果过大,可能会超过MongoDB数据限制
+ int batchInsertStpe = 10;
+ int total = 0;
+ for (int i = 0; i < features.size(); i++) {
+ LineModel lineModel = features.get(i);
+ if (total != batchInsertStpe) {
+ batchInsertList.add(lineModel);
+ total++;
+ }
+ if (i == features.size() - 1 && total != batchInsertStpe) {
+ total = 0;
+ lineDataMongoDto.setId(IdGenerator.uuid32())
+ .setLineModels(batchInsertList);
+ mongoTemplate.insert(lineDataMongoDto);
+ }
+ if (total == batchInsertStpe) {
+ total = 0;
+ lineDataMongoDto.setId(IdGenerator.uuid32())
+ .setLineModels(batchInsertList);
+ mongoTemplate.insert(lineDataMongoDto);
+ batchInsertList.clear();
+ }
+ }
+ } else {
+ lineDataMongoDto.setId(IdGenerator.uuid32());
+ mongoTemplate.insert(lineDataMongoDto);
+ }
+ }
+
+}
diff --git a/src/main/java/com/keyware/composeanalysis/task/IAnalysisTask.java b/src/main/java/com/keyware/composeanalysis/task/IAnalysisTask.java
new file mode 100644
index 0000000..2c9b2f9
--- /dev/null
+++ b/src/main/java/com/keyware/composeanalysis/task/IAnalysisTask.java
@@ -0,0 +1,10 @@
+package com.keyware.composeanalysis.task;
+
+/**
+ * @author liuzongren
+ * @date 2024/7/31
+ * @description 分析任务抽象接口
+ */
+public abstract class IAnalysisTask implements Runnable{
+
+}
diff --git a/src/main/java/com/keyware/composeanalysis/task/LineAnalysisTask.java b/src/main/java/com/keyware/composeanalysis/task/LineAnalysisTask.java
new file mode 100644
index 0000000..ac1bb75
--- /dev/null
+++ b/src/main/java/com/keyware/composeanalysis/task/LineAnalysisTask.java
@@ -0,0 +1,298 @@
+package com.keyware.composeanalysis.task;
+
+
+import cn.hutool.core.collection.CollectionUtil;
+import com.keyware.composeanalysis.constant.FixedValue;
+import com.keyware.composeanalysis.constant.FunctionAndAnalysisAssemblyConst;
+import com.keyware.composeanalysis.constant.RedisConst;
+import com.keyware.composeanalysis.constant.SolrDBConst;
+import com.keyware.composeanalysis.constant.enums.AnalysisLevelEnum;
+import com.keyware.composeanalysis.constant.enums.AnalysisStatusEnum;
+import com.keyware.composeanalysis.constant.enums.FileAnalysisStatusEnum;
+import com.keyware.composeanalysis.entity.AnalysisTask;
+import com.keyware.composeanalysis.mongo.FileDataMongoDto;
+import com.keyware.composeanalysis.mongo.LineDataMongoDto;
+import com.keyware.composeanalysis.mongo.MatchOpenFile;
+import com.keyware.composeanalysis.mongo.MatchOpenFileMongoDto;
+import com.keyware.composeanalysis.solr.VersionTree;
+import com.keyware.composeanalysis.util.AnalysisLogUtil;
+import com.keyware.composeanalysis.util.RedisUtil;
+import com.keyware.composeanalysis.util.SolrUtils;
+import com.keyware.composeanalysis.util.SpringContextUtils;
+import com.keyware.keyswan.anaysis.Analysis;
+import com.keyware.keyswan.anaysis.AnalysisFactory;
+import com.keyware.keyswan.common.CodeFile;
+import com.keyware.utils.IdGenerator;
+import lombok.extern.log4j.Log4j2;
+import org.apache.commons.lang3.StringUtils;
+import org.apache.solr.common.SolrDocument;
+import org.apache.solr.common.SolrDocumentList;
+import org.springframework.data.mongodb.core.MongoTemplate;
+import org.springframework.data.mongodb.core.query.Update;
+
+import java.math.BigDecimal;
+import java.math.RoundingMode;
+import java.util.*;
+import java.util.concurrent.CountDownLatch;
+
+import static org.springframework.data.mongodb.core.query.Criteria.where;
+
+/**
+ * @author liuzongren
+ * @ClassName LineAnalysisTask
+ * @description: 行级别 特征提取定时任务
+ * @datetime 2024年 07月 25日 16:19
+ * @version: 1.0
+ */
+
+@Log4j2
+public class LineAnalysisTask extends IAnalysisTask {
+
+ private MongoTemplate mongoTemplate;
+ private AnalysisTask analysisTask;
+ //被测件的文件信息
+ private FileDataMongoDto analysisFile;
+
+ private SolrUtils solrUtils;
+
+ private RedisUtil redisUtil;
+
+ private CountDownLatch countDownLatch;
+
+ public LineAnalysisTask(AnalysisTask analysisTask, FileDataMongoDto analysisFile, MongoTemplate mongoTemplate, CountDownLatch countDownLatch) {
+ this.mongoTemplate = mongoTemplate;
+ this.analysisTask = analysisTask;
+ this.analysisFile = analysisFile;
+ this.countDownLatch = countDownLatch;
+ this.solrUtils = SpringContextUtils.getBean(SolrUtils.class);
+ this.redisUtil = SpringContextUtils.getBean(RedisUtil.class);
+ }
+
+ /**
+ * 行级别 源代码溯源
+ * 当前任务 需要在 文件级分析完成后 进行
+ */
+
+ @Override
+ public void run() {
+ //执行任务前,判断一下任务执行的状态
+ Object status = redisUtil.get(String.format(RedisConst.TASK_RUNNING_STATUS_KEY_PREFIX, analysisTask.getId()));
+ if (status != null && (status.equals(AnalysisStatusEnum.STOP_ANALYSIS.getCode()) || status.equals(AnalysisStatusEnum.PAUSE_ANALYSIS.getCode()))) {
+ log.info("任务已取消,fileName:{}", analysisFile.getName());
+ countDownLatch.countDown();
+ return;
+ }
+
+ //获取文件地址
+ String filePath = analysisFile.getFileUrl();
+ //获取文件名称
+ String fileName = analysisFile.getName();
+
+ AnalysisLogUtil.insert(mongoTemplate, "【行级特征提取】正在提取" + fileName);
+ try {
+ LineDataMongoDto lineDataMongoDto = new LineDataMongoDto();
+ lineDataMongoDto.setFileId(analysisFile.getId())
+ .setStatus(0)
+ .setIsSelect(false);
+ Analysis analysis = AnalysisFactory.getAnalysis(filePath);
+ CodeFile codeFile = null;
+
+ //获取文件行级特征md5
+ codeFile = analysis.analysisFile(filePath, FunctionAndAnalysisAssemblyConst.LINE_EXTRACT, FunctionAndAnalysisAssemblyConst.LINE_EXTRACT);
+ //每一行原内容MD5值集合
+// String cutFileLineMd5 = codeFile.getCutFileLineMd5();
+ //每一行特征内容MD5值集合
+ String traitFileLineMd5 = codeFile.getTraitFileLineMd5();
+
+ String[] featureMd5Arr = {};
+ if (StringUtils.isNotBlank(traitFileLineMd5)) {
+ featureMd5Arr = traitFileLineMd5.split(",");
+ }
+ List lineFeatures = Arrays.asList(featureMd5Arr);
+
+ //从solr中获取特征相似的 文件
+ SolrDocumentList featureSimilarityFromSolr = getFeatureSimilarityFromSolr(lineFeatures);
+
+ //计算文件的开源率
+ calculateOpenRate(featureSimilarityFromSolr, lineFeatures);
+
+ //更新文件表的分析状态为3 行级特征以分析完毕
+ analysisFile.setFileAnalysisStatus(FileAnalysisStatusEnum.ANALYSIS_DONE.getCode());
+ mongoTemplate.update(FileDataMongoDto.class)
+ .matching(where("_id").is(analysisFile.getId()))
+ .replaceWith(analysisFile)
+ .findAndReplace();
+
+ AnalysisLogUtil.insert(mongoTemplate, "【行级特征提取】提取完成" + fileName);
+ log.info("文件" + fileName + ":行级分析完成");
+ } catch (Exception e) {
+ AnalysisLogUtil.insertErrorInfo(mongoTemplate, "【行级特征提取】提取失败" + fileName, e);
+ log.error("文件:" + fileName + "行级别特征提取失败!", e);
+ //修改当前文件分析状态未失败
+ mongoTemplate.update(FileDataMongoDto.class)
+ .matching(where("_id").is(analysisFile.getId()))
+ .apply(new Update().set("fileAnalysisStatus", FileAnalysisStatusEnum.FAILED_ANALYSIS.getCode()))
+ .first();
+ } finally {
+ countDownLatch.countDown();
+ }
+ }
+
+
+ /**
+ * 计算开源率 被测件的开源率
+ *
+ * @param matcheOpenSourceFiles
+ * @param lineFeatures
+ */
+ private void calculateOpenRate(SolrDocumentList matcheOpenSourceFiles, List lineFeatures) {
+
+ if (CollectionUtil.isEmpty(matcheOpenSourceFiles)) {
+ return;
+ }
+
+ //根据文件后缀判断需要查询的文件版本库名称
+ String versionIdCoreName = FixedValue.SUFFIX_SOLR_VERSION.get(analysisFile.getSuffix());
+
+
+ //定义结果集对象
+ MatchOpenFileMongoDto matchOpenFileMongo = new MatchOpenFileMongoDto();
+ matchOpenFileMongo.setId(IdGenerator.uuid32())
+ .setFilePath(analysisFile.getFileUrl())
+ .setFileName(analysisFile.getName());
+
+ //开源文件信息保存结果集
+ List matchOpenFileInfoList = new ArrayList<>();
+
+ //保存所有匹配的行数信息,方便统计总的匹配行数
+ Set matchingLineSet = new HashSet<>();
+
+ //获取文件总行数
+ BigDecimal totalCodeRowNum = new BigDecimal(analysisFile.getCodeRowNum());
+
+ //统计每个开源文件和被测件的匹配行数
+ for (SolrDocument matchFile : matcheOpenSourceFiles) {
+ //解析文件的代码块特征值
+ String lineFeatureMd5s = (String) matchFile.get("tz_line_hay");
+ List matchedLineFeatures = Arrays.asList(lineFeatureMd5s.split(","));
+
+ //匹配的总行数
+ int currentFileMatchLineCount = 0;
+
+ //遍历当前文件的代码块特征,统计匹配的总行数
+ for (String originalLineFeatureMd5 : lineFeatures) {
+ for (String matchLineFeatureMd5 : matchedLineFeatures) {
+ if (originalLineFeatureMd5.equals(matchLineFeatureMd5)) {
+ currentFileMatchLineCount++;
+ matchingLineSet.add(originalLineFeatureMd5);
+ }
+ }
+ }
+
+ //首先根据文件的MD5查询开源文件的版本ID,和路径信息
+ SolrDocument versionIdAndPath = solrUtils.queryOne(versionIdCoreName, "sourceFileMd5:" + matchFile.get("sourceMd5"), "versionId,fullPath,sourceFileMd5");
+
+ //根据版本ID查询版本的详细信息
+ //todo 这里 查询一个版本的信息 需要检索 两个 solr 库 而且还需要检索 versioinTree 后面需要优化
+ VersionTree versionInfo = solrUtils.queryVersionInfoByVersionId((String) versionIdAndPath.get("versionId"));
+
+ //计算与当前开源文件的开源率
+ BigDecimal openRate = new BigDecimal(currentFileMatchLineCount).divide(totalCodeRowNum, 4, RoundingMode.HALF_UP).multiply(new BigDecimal(100));
+
+ //当前开源文件的开源项目信息
+ MatchOpenFile matchOpenFileInfo = new MatchOpenFile();
+ matchOpenFileInfo.setPId(versionInfo.getProId())
+ .setPName(versionInfo.getProName())
+ .setSourceUrl(versionInfo.getDownUrl())
+ .setOpenRate(openRate.floatValue())
+ .setVersion(versionInfo.getVersionName())
+ .setLicenseType(versionInfo.getLicenseType())
+ .setAnalyzeType(AnalysisLevelEnum.LINE_LEVEL.getCode());
+ matchOpenFileInfoList.add(matchOpenFileInfo);
+ }
+
+ //统计当前文件的整体开源率
+ BigDecimal openRate = new BigDecimal(matchingLineSet.size()).divide(totalCodeRowNum, 4, RoundingMode.HALF_UP).multiply(new BigDecimal(100));
+
+ //获取开源率的阈值
+ Integer openRateThreshold = analysisTask.getOpenRateThreshold();
+
+
+ //如果开源率大于阈值,则将当前文件设置成开源
+ if (openRate.compareTo(new BigDecimal(openRateThreshold)) >= 0) {
+ analysisFile.setOpenType(true);
+ }
+
+ //保存当前文件的开源信息
+ matchOpenFileMongo.setOpenType(analysisFile.getOpenType())
+ .setMatchOpenFile(matchOpenFileInfoList);
+ mongoTemplate.save(matchOpenFileMongo);
+
+ }
+
+
+ /**
+ * 将特征值插入到mongo库中
+ *
+ * @param features 特征集合
+ * @param lineDataMongoDto 当前分析任务 ,特征信息存储
+ * todo 后期 看看有没有插入的必要
+ * @param
+ */
+ @Deprecated
+ private void insertFeatureValue(String features, LineDataMongoDto lineDataMongoDto) {
+ String[] featureMd5Arr = {};
+ if (StringUtils.isNotBlank(features)) {
+ featureMd5Arr = features.split(",");
+ }
+ List lineFeatures = Arrays.asList(featureMd5Arr);
+ List batchInsertList = new ArrayList<>();
+ if (CollectionUtil.isNotEmpty(lineFeatures)) {
+ //这里的批量插入逻辑可以进行校验
+ //每10条存一次,解析的数据量如果过大,可能会超过MongoDB数据限制
+ int batchInsertStpe = 5000;
+ int total = 0;
+ for (int i = 0; i < lineFeatures.size(); i++) {
+ if (total != batchInsertStpe) {
+ batchInsertList.add(lineFeatures.get(i));
+ total++;
+ }
+ if (i == lineFeatures.size() - 1 && total != batchInsertStpe) {
+ total = 0;
+ lineDataMongoDto.setId(IdGenerator.uuid32())
+ .setLineFeatueMd5s(batchInsertList);
+ mongoTemplate.insert(lineDataMongoDto);
+ }
+ if (total == batchInsertStpe) {
+ total = 0;
+ lineDataMongoDto.setId(IdGenerator.uuid32())
+ .setLineFeatueMd5s(batchInsertList);
+ mongoTemplate.insert(lineDataMongoDto);
+ batchInsertList.clear();
+ }
+ }
+ } else {
+ lineDataMongoDto.setId(IdGenerator.uuid32());
+ mongoTemplate.insert(lineDataMongoDto);
+ }
+ }
+
+
+ /**
+ * 根据 特征值 从特征库中检索 具有特征相似的
+ *
+ * @param lineFeatureList 行特征信息
+ * @return
+ */
+ private SolrDocumentList getFeatureSimilarityFromSolr(List lineFeatureList) {
+ String solrCoreName = SolrDBConst.CORE_NAME_SOURCE_FILE_INFO_TEMP;
+ //拼接行特征查询条件
+ String queryStr = "tz_line_hay:(" + StringUtils.join(lineFeatureList, " OR ") + ")";
+ log.info("查询条件: solrCoreName:{},queryStr:{}", solrCoreName, queryStr);
+ SolrDocumentList result = solrUtils.query(solrCoreName, queryStr, "sourceMd5,tz_line_hay");
+ log.info("查询结果: result:{}", result);
+ return result;
+ }
+
+
+}
diff --git a/src/main/java/com/keyware/composeanalysis/task/PorjectAnalysisTask.java b/src/main/java/com/keyware/composeanalysis/task/PorjectAnalysisTask.java
new file mode 100644
index 0000000..4692050
--- /dev/null
+++ b/src/main/java/com/keyware/composeanalysis/task/PorjectAnalysisTask.java
@@ -0,0 +1,378 @@
+package com.keyware.composeanalysis.task;
+
+import cn.hutool.core.collection.CollectionUtil;
+import com.google.common.collect.Sets;
+import com.keyware.composeanalysis.constant.FixedValue;
+import com.keyware.composeanalysis.constant.MongoDBConst;
+import com.keyware.composeanalysis.constant.enums.AnalysisLevelEnum;
+import com.keyware.composeanalysis.constant.enums.AnalysisStatusEnum;
+import com.keyware.composeanalysis.constant.enums.FileAnalysisStatusEnum;
+import com.keyware.composeanalysis.entity.AnalysisTask;
+import com.keyware.composeanalysis.mongo.*;
+import com.keyware.composeanalysis.service.impl.AnalysisTaskServiceImpl;
+import com.keyware.composeanalysis.solr.VersionTree;
+import com.keyware.composeanalysis.solr.VersionTreeNode;
+import com.keyware.composeanalysis.util.AnalysisLogUtil;
+import com.keyware.composeanalysis.util.SolrUtils;
+import com.keyware.composeanalysis.util.SpringContextUtils;
+import com.keyware.utils.IdGenerator;
+import com.mongodb.client.MongoClient;
+import lombok.extern.log4j.Log4j2;
+import org.apache.commons.collections.CollectionUtils;
+import org.apache.commons.lang3.StringUtils;
+import org.apache.solr.common.SolrDocument;
+import org.springframework.core.task.TaskExecutor;
+import org.springframework.data.mongodb.core.MongoTemplate;
+import org.springframework.data.mongodb.core.query.Query;
+import org.springframework.data.mongodb.core.query.Update;
+
+import java.math.BigDecimal;
+import java.math.RoundingMode;
+import java.util.*;
+import java.util.function.Function;
+import java.util.stream.Collectors;
+
+
+import static org.springframework.data.mongodb.core.query.Criteria.where;
+
+/**
+ * @author liuzongren
+ * @date 2024/7/26
+ * desc 项目级溯源分析任务,先将所有文件进行项目级匹配,匹配不中的文件在进行细致级别的匹配
+ * 项目级匹配前,需要完成文件解压工作
+ */
+@Log4j2
+public class PorjectAnalysisTask {
+ private MongoTemplate mongoTemplate;
+ private MongoTemplate keyswanDBTemplate;
+ private AnalysisTask analysisTask;
+ private AnalysisTaskServiceImpl analysisService;
+ private SolrUtils solrUtils;
+ private TaskExecutor taskExecutor;
+
+ /**
+ * 项目级分析
+ *
+ * @param mongoClient
+ * @param analysisTask
+ * @param solrUtils
+ * @param analysisService
+ */
+ public PorjectAnalysisTask(MongoClient mongoClient, AnalysisTask analysisTask, SolrUtils solrUtils, AnalysisTaskServiceImpl analysisService) {
+ this.analysisService = analysisService;
+ keyswanDBTemplate = new MongoTemplate(mongoClient, MongoDBConst.DB_NAME_KEYSWAN);
+ this.mongoTemplate = new MongoTemplate(mongoClient, MongoDBConst.DB_NAME_PREFIX + analysisTask.getId());
+ this.analysisTask = analysisTask;
+ this.solrUtils = solrUtils;
+ this.taskExecutor = SpringContextUtils.getBean(TaskExecutor.class);
+ }
+
+
+ public void doAnalysis() {
+ try {
+ long startTime = System.currentTimeMillis();
+ //首先从versionbasedata库中匹配当前被测件
+ Boolean matchedPrject = matchByProjectMd5();
+
+ //从versionbase 中整体匹配不到项目信息, 拿项目的所有文件去匹配 solr库的versionTree去检索
+ if (!matchedPrject) {
+ List unMatchedFiles = matchByAllFilesMd5();
+
+ //剩余没有匹配文件,用文件的md5去匹配solr库的versionTree
+ if (CollectionUtils.isNotEmpty(unMatchedFiles)) {
+ matchByFileMd5s(unMatchedFiles);
+ }
+ }
+ //todo 如果整体耗时较长,將matchOpenFileInfo存储到数据库的逻辑修改成异步的
+ log.info("项目级分析完成,用时:" + (System.currentTimeMillis() - startTime) / 1000 + "s");
+ } catch (Exception e) {
+ AnalysisLogUtil.insert(mongoTemplate, "成分分析失败:" + e.getStackTrace());
+ log.error("项目级分析失败,项目名称:" + analysisTask.getFileName(), e);
+ analysisTask.setAnalysisStatus(AnalysisStatusEnum.FAIL_ANALYSIS.getCode());
+ analysisService.updateById(analysisTask);
+ }
+ }
+
+ //项目整体匹配, 查看整个项目是否开源
+ private Boolean matchByProjectMd5() {
+ //判断当前项目整体是否开源,去版本库中匹配
+ Query versionBaseQuery = new Query(where("MD5").is(analysisTask.getMd5()));
+ VersionbasedataMongoDto openSourceProject = keyswanDBTemplate.findOne(versionBaseQuery, VersionbasedataMongoDto.class);
+ //如果匹配中了开源项目,则将状态设置为开源,并将结果存储到数据库中
+ if (openSourceProject != null) {
+ analysisTask.setOpenType(true);
+ //将匹配中的开源项目信息存入当前文件开源信息中
+ ProjectAssemblyMongoDto projectAssembly = new ProjectAssemblyMongoDto();
+ projectAssembly.setId(IdGenerator.uuid32())
+ .setFileCount(analysisTask.getFileCount())
+ .setMatchFileCount(analysisTask.getFileCount())
+ .setProjectId(openSourceProject.getProjectId())
+ .setVersionId(openSourceProject.getVersionId())
+ .setVersionName(openSourceProject.getVersionName())
+ .setSemblance(100.00d)
+ .setOpenSourceUrl(openSourceProject.getDownloadUrl());
+ //根据版本信息去查询项目名称和许可证信息
+ Query projectBaseQuery = new Query(where("ID").is(openSourceProject.getProjectId()));
+ ProjectBaseDataMongoDto projectbasedata = keyswanDBTemplate.findOne(projectBaseQuery, ProjectBaseDataMongoDto.class);
+ if (projectbasedata != null) {
+ projectAssembly.setProjectName(projectbasedata.getName());
+ if (StringUtils.isNotEmpty(projectbasedata.getLicenseType())) {
+ projectAssembly.setLicenseType(Arrays.asList(projectbasedata.getLicenseType()));
+ }
+ }
+ //当前文件开源信息存入数据库中
+ mongoTemplate.insert(projectAssembly);
+
+ analysisService.updateById(analysisTask);
+
+ //更新文件分析的状态
+ mongoTemplate.update(FileDataMongoDto.class)
+ .matching(where("isDirectory").is(false))
+ .apply(new Update().set("openType", true)
+ .set("openRate", 100.00d)
+ .set("fileAnalysisStatus", FileAnalysisStatusEnum.ANALYSIS_DONE.getCode()))
+ .all();
+
+ //保存具体开源文件信息
+ VersionTree openProjectList = solrUtils.queryVersionTreeByVersionId(openSourceProject.getVersionId());
+ Query fileQuery = new Query(where("isDirectory").is(false));
+ List fileDataMongoDtos = mongoTemplate.find(fileQuery, FileDataMongoDto.class);
+ saveProjectOpenInfo(openProjectList, fileDataMongoDtos);
+ return true;
+ }
+ return false;
+ }
+
+ //通过文件的md5 去匹配开源项目
+ private List matchByAllFilesMd5() {
+ //检索当前项目的所有文件
+ Query query = new Query(where("isDirectory").is(false));
+ List projectFiles = mongoTemplate.query(FileDataMongoDto.class).matching(query).all();
+
+ //分多次拿所有文件匹配solr库
+ Set matchedFileMd5Set = multipleMatchByAllFilesMd5(projectFiles);
+
+ //统计未匹配的文件
+ List unMatchedFiles = projectFiles.stream().filter(file -> !matchedFileMd5Set.contains(file.getMd5())).collect(Collectors.toList());
+ return unMatchedFiles;
+ }
+
+ //通过文件的md5去特征库匹配
+ private void matchByFileMd5s(List unMatchedFiles) {
+
+ //将文件按照后缀分组,方便查询solr库
+ Map> allSuffixFiles = unMatchedFiles.stream().filter(file -> StringUtils.isNotEmpty(file.getSuffix())).collect(Collectors.groupingBy(FileDataMongoDto::getSuffix));
+
+ //统计语言的文件
+ List otherLanguageFiles = new ArrayList<>();
+
+ //遍历主流32语言
+ allSuffixFiles.forEach((suffix, data) -> {
+ //根据文件后缀名获取特征库名称
+ if (FixedValue.SUFFIX_SOLR_VERSION.containsKey(suffix)) {
+ String currentCoreName = FixedValue.SUFFIX_SOLR_VERSION.get(suffix);
+ //通过md5去*_SourceFileBase中匹配版本Id
+ Set fileMd5s = data.stream().map(FileDataMongoDto::getMd5).collect(Collectors.toSet());
+ Map md5VersionObjMap = solrUtils.batchQueryVersionIdFromSourceFileBaseBySourceMd5(currentCoreName, fileMd5s);
+ if (CollectionUtil.isEmpty(md5VersionObjMap)) {
+ return;
+ }
+ //保存结果数据
+ saveMatchOpenFileInfo(md5VersionObjMap, data);
+ } else {
+ //非主流语言的,没有单独的特征库,统一到默认的特征库进行检索
+ otherLanguageFiles.addAll(data);
+ }
+ });
+
+ //将无后缀的文件 归纳于 处理非32种语言的文件
+ List noSuffixFiles = unMatchedFiles.stream().parallel().filter(file -> StringUtils.isEmpty(file.getSuffix())).collect(Collectors.toList());
+ otherLanguageFiles.addAll(noSuffixFiles);
+
+ if (CollectionUtils.isNotEmpty(otherLanguageFiles)) {
+ //非32种语言的会分2种MD5
+ //暂时忽略字符流md5的匹配,因为大部分都是一样的
+ Set fileMd5s = otherLanguageFiles.stream().map(FileDataMongoDto::getMd5).collect(Collectors.toSet());
+ Map md5VersionIdMap = solrUtils.batchQueryVersionIdFromSourceFileBaseBySourceMd5(MongoDBConst.TABLE_NAME_SOURCE_FILE_BASE, fileMd5s);
+ if (md5VersionIdMap == null || md5VersionIdMap.isEmpty()) {
+ //如果没有匹配到,直接更新文件分析状态已完成,因为非32种语言的文件,无法进行解析,通过源文件的MD5匹配不到,就匹配不到了,无需进行下一步的匹配
+ updateFileAnalysisStatus(fileMd5s);
+ return;
+ }
+ saveMatchOpenFileInfo(md5VersionIdMap, otherLanguageFiles);
+ //直接更改没有匹配的文件分析状态
+ updateFileAnalysisStatus(Sets.difference(fileMd5s, md5VersionIdMap.keySet()));
+ }
+
+ }
+
+
+ //匹配到开源文件后,补充详细信息然后保存到mongo中
+ private void saveMatchOpenFileInfo(Map md5VersionIdMap, List originalFiles) {
+ List batchInsertCache = new ArrayList<>();
+ //根据版本id查询版本的详细信息
+ //todo 这段逻辑如果耗时的话,可以异步处理 补充文件的版本信息
+ Set versionIds = md5VersionIdMap.values().stream().map(doc->(String)doc.get("versionId")).collect(Collectors.toSet());
+ List versionInfos = solrUtils.queryBatchVersionInfoByVersionIds(versionIds);
+ Map versionTreeMap = versionInfos.stream().collect(Collectors.toMap(VersionTree::getVersionId, Function.identity(), (key1, key2) -> key1));
+ Map fileMd5ObjMap = originalFiles.stream().collect(Collectors.toMap(FileDataMongoDto::getMd5, Function.identity(), (key1, key2) -> key1));
+ md5VersionIdMap.forEach((fileMd5, versionObj) -> {
+ String versionId = (String) versionObj.get("versionId");
+ VersionTree versionInfo = versionTreeMap.get(versionId);
+ if (versionInfo == null){
+ log.error("根据versionId,未在versionTree中找到版本信息,fileMd5:{},versionId:{}",fileMd5, versionId);
+ return;
+ }
+ FileDataMongoDto fileDataMongoDto = fileMd5ObjMap.get(fileMd5);
+ MatchOpenFileMongoDto matchOpenFile = getMatchOpenFile(versionInfo, fileDataMongoDto, versionObj.get("fullPath").toString());
+ batchInsertCache.add(matchOpenFile);
+ });
+
+ if (CollectionUtils.isNotEmpty(batchInsertCache)) {
+ mongoTemplate.insert(batchInsertCache, MatchOpenFileMongoDto.class);
+ //更新文件分析的状态
+ updateFileAnalysisStatus(md5VersionIdMap.keySet());
+ }
+ }
+
+
+ //匹配到开源项目后,保存各个文件的开源信息
+ private void saveProjectOpenInfo(VersionTree versionInfo, List originalFiles) {
+ Map originalMd5ObjMap = originalFiles.stream().collect(Collectors.toMap(FileDataMongoDto::getMd5, Function.identity(), (key1, key2) -> key1));
+ Set matchedMd5s = new HashSet<>();
+ List batchInsertCache = new ArrayList<>();
+ List fileInfos = versionInfo.getDirTree();
+
+ fileInfos.forEach(versionTreeNodeObj->{
+ String openFileMd5 = versionTreeNodeObj.getSourceFileMd5();
+ //看是否和被测件的md5匹配
+ if (originalMd5ObjMap.keySet().contains(openFileMd5)) {
+ //匹配的文件只保存一次
+ if (!matchedMd5s.contains(openFileMd5)) {
+ MatchOpenFileMongoDto matchOpenFile = getMatchOpenFile(versionInfo, originalMd5ObjMap.get(openFileMd5),versionTreeNodeObj.getFullPath());
+ batchInsertCache.add(matchOpenFile);
+ matchedMd5s.add(openFileMd5);
+ }
+ }
+ //分批保存,防止单个项目太大,撑爆内存 或 超过 mongodb最大插入数
+ if (batchInsertCache.size() >= 1000) {
+ mongoTemplate.insert(batchInsertCache, MatchOpenFileMongoDto.class);
+ batchInsertCache.clear();
+ }
+ });
+
+ if (batchInsertCache.size() != 0) {
+ mongoTemplate.insert(batchInsertCache, MatchOpenFileMongoDto.class);
+ }
+
+ //更新文件分析的状态
+ updateFileAnalysisStatus(matchedMd5s);
+ }
+
+ //获取匹配到的开源文件信息
+ private MatchOpenFileMongoDto getMatchOpenFile(VersionTree versionInfo, FileDataMongoDto originalFile,String openFilePath) {
+ //设置匹配文件的信息
+ MatchOpenFile matchOpenFile = new MatchOpenFile();
+ matchOpenFile.setId(IdGenerator.uuid32())
+ .setVersionId(versionInfo.getVersionId())
+ .setSourceFilePath(openFilePath)
+ .setSourceUrl(versionInfo.getDownUrl())
+ .setPId(versionInfo.getProId())
+ .setPName(versionInfo.getProName())
+ .setLicenseType(versionInfo.getLicenseType())
+ .setAnalyzeType(AnalysisLevelEnum.FILE_LEVEL.getCode())
+ .setVersion(versionInfo.getVersionName())
+ .setFeatureSimilarity(100.00f)
+ .setOpenRate(100.00f);
+
+ //创建当前文件与开源代码的匹配信息
+ MatchOpenFileMongoDto matchOpenFileInfo = new MatchOpenFileMongoDto();
+ matchOpenFileInfo.setId(IdGenerator.uuid32())
+ .setFileName(originalFile.getName())
+ .setFilePath(originalFile.getFileUrl())
+ .setOpenType(originalFile.getOpenType())
+ .setFeatureSimilarity(100.00f)
+ .setOpenRate(100.00f)
+ .setMatchOpenFile(Arrays.asList(matchOpenFile));
+ return matchOpenFileInfo;
+ }
+
+ //匹配拿所有文件的md5去versionTree中,需要分多次匹配,单次匹配多个结果集的话,会导致solr响应长时间阻塞
+ //多次匹配,每次匹配上一次未匹配种的文件
+ //todo 这里需要设置一个阈值,一共匹配多少次,或者当相似度达到多少的时候,停止整体匹配
+ //目前默认查询三次
+ private Set multipleMatchByAllFilesMd5(List projectFiles) {
+
+ //获取被测件所有文件的md5
+ Set projectFilesMd5 = projectFiles.stream().map(FileDataMongoDto::getMd5).collect(Collectors.toSet());
+
+ //匹配到的文件md5存入set中
+ Set matchedFileMd5Set = new HashSet<>();
+
+ //剩余未匹配中的文件md5
+ Set unMatchedFileMd5s = projectFilesMd5;
+
+ //循环匹配5次,进行整体的文件匹配
+ for (int i = 0; i < 5; i++){
+ //检索versionTree库
+ String queryStr = "dirTree:(" + StringUtils.join(unMatchedFileMd5s, " OR ") + ")";
+ log.info("versionTree queryStr: " + queryStr);
+ long startTime = System.currentTimeMillis();
+ VersionTree openProject = solrUtils.queryVersionTree(queryStr);
+ log.info("query versionTree cost:{}s", (System.currentTimeMillis() - startTime) / 1000);
+ //如果存在没有匹配到开源数据的情况,直接退出循环匹配
+ if (openProject == null){
+ break;
+ }
+
+ //异步保存匹配的开源文件信息
+ taskExecutor.execute(() -> saveProjectOpenInfo(openProject, projectFiles));
+
+ //获取开源项目的所有文件md5集合
+ List openFilesMd5 = openProject.getDirTree().stream().map(VersionTreeNode::getSourceFileMd5).collect(Collectors.toList());
+ //获取被测件和开源项目相同的文件
+ Set matchedFiles = unMatchedFileMd5s.stream().filter(item -> openFilesMd5.contains(item)).collect(Collectors.toSet());
+
+ //保存已匹配的文件md5,后续需要统计整体的开源率
+ matchedFileMd5Set.addAll(matchedFiles);
+
+ //计算与当前项目的相似度
+ BigDecimal semblance = new BigDecimal(matchedFiles.size()).divide(new BigDecimal(projectFilesMd5.size()), 4, RoundingMode.HALF_UP).multiply(new BigDecimal(100));
+
+ //当相似度小于30%,不保存项目级的信息
+ if (semblance.compareTo(new BigDecimal(30)) < 0){
+ break;
+ }
+
+ ProjectAssemblyMongoDto projectAssembly = new ProjectAssemblyMongoDto();
+ projectAssembly.setId(IdGenerator.uuid32())
+ .setFileCount(openFilesMd5.size())
+ .setMatchFileCount(matchedFiles.size())
+ .setProjectId(openProject.getProId())
+ .setProjectName(openProject.getProName())
+ .setVersionName(openProject.getVersionName())
+ .setOpenSourceUrl(openProject.getDownUrl())
+ .setSemblance(semblance.doubleValue());
+ mongoTemplate.insert(projectAssembly);
+
+ //获取未匹配中的文件md5,更新下次匹配的md5集合
+ unMatchedFileMd5s = Sets.difference(unMatchedFileMd5s, matchedFiles);
+ //如果没有剩余未匹配文件,退出整体匹配
+ if (CollectionUtils.isEmpty(unMatchedFileMd5s) ) {
+ break;
+ }
+ }
+ return matchedFileMd5Set;
+ }
+
+ //更新文件分析的状态
+ private void updateFileAnalysisStatus(Set fileMd5Set) {
+ mongoTemplate.update(FileDataMongoDto.class)
+ .matching(where("md5").in(fileMd5Set))
+ .apply(new Update().set("openType", true)
+ .set("openRate", 100.00f)
+ .set("fileAnalysisStatus", FileAnalysisStatusEnum.ANALYSIS_DONE.getCode()))
+ .all();
+ }
+
+}
diff --git a/src/main/java/com/keyware/composeanalysis/util/AnalysisLogUtil.java b/src/main/java/com/keyware/composeanalysis/util/AnalysisLogUtil.java
new file mode 100644
index 0000000..bff61e7
--- /dev/null
+++ b/src/main/java/com/keyware/composeanalysis/util/AnalysisLogUtil.java
@@ -0,0 +1,33 @@
+package com.keyware.composeanalysis.util;
+
+import cn.hutool.core.date.DateTime;
+import com.keyware.composeanalysis.mongo.AnalysisLogMongoDto;
+import org.springframework.data.mongodb.core.MongoTemplate;
+import org.springframework.scheduling.annotation.Async;
+
+import java.io.PrintWriter;
+import java.io.StringWriter;
+
+/**
+ * @author liuzongren
+ * @date 2024/7/30
+ * @description 分析日志工具
+ */
+public class AnalysisLogUtil {
+
+ public static void insert(MongoTemplate mongoTemplate,String logInfo) {
+ mongoTemplate.insert(new AnalysisLogMongoDto().setLogInfo(logInfo).setCreateTime(new DateTime()));
+ }
+
+ public static void insertErrorInfo(MongoTemplate mongoTemplate, String logInfo, Exception e) {
+ mongoTemplate.insert(new AnalysisLogMongoDto().setLogInfo(logInfo + getErrorMsg(e)).setCreateTime(new DateTime()));
+ }
+
+
+ private static String getErrorMsg(Exception e) {
+ StringWriter errors = new StringWriter();
+ e.printStackTrace(new PrintWriter(errors));
+ return errors.toString();
+ }
+
+}
diff --git a/src/main/java/com/keyware/composeanalysis/util/BeanUtil.java b/src/main/java/com/keyware/composeanalysis/util/BeanUtil.java
new file mode 100644
index 0000000..c44c002
--- /dev/null
+++ b/src/main/java/com/keyware/composeanalysis/util/BeanUtil.java
@@ -0,0 +1,32 @@
+package com.keyware.composeanalysis.util;
+
+import com.alibaba.fastjson.JSON;
+import com.alibaba.fastjson.JSONArray;
+import com.keyware.composeanalysis.solr.VersionTree;
+import com.keyware.composeanalysis.solr.VersionTreeNode;
+import org.apache.solr.common.SolrDocument;
+
+import java.util.List;
+
+/**
+ * @author liuzongren
+ * @date 2024/8/7
+ * @description dom 转 entity
+ */
+public class BeanUtil {
+
+ public static VersionTree domToVersionTree(SolrDocument dom) {
+ String dirTree = String.valueOf(dom.get("dirTree"));
+ dirTree = dirTree.replace("\\", "");
+ dirTree = dirTree.replace("\"{", "{");
+ dirTree = dirTree.replace("}\"", "}");
+ dom.put("dirTree", null);
+ JSONArray treeArray = JSON.parseArray(dirTree);
+ List treeList = treeArray.toJavaList(VersionTreeNode.class);
+ String domObj = JSON.toJSONString(dom);
+ VersionTree versionTree = JSON.parseObject(domObj, VersionTree.class);
+ versionTree.setDirTree(treeList);
+ return versionTree;
+ }
+
+}
diff --git a/src/main/java/com/keyware/composeanalysis/util/ConvertUtil.java b/src/main/java/com/keyware/composeanalysis/util/ConvertUtil.java
new file mode 100644
index 0000000..44e4fa5
--- /dev/null
+++ b/src/main/java/com/keyware/composeanalysis/util/ConvertUtil.java
@@ -0,0 +1,38 @@
+package com.keyware.composeanalysis.util;
+
+import com.alibaba.fastjson.JSON;
+import com.mongodb.BasicDBObject;
+import org.bson.Document;
+import org.bson.json.JsonWriterSettings;
+
+/**
+ * @author liuzongren
+ * @date 2024/7/24
+ * 类型转化工具类
+ */
+public class ConvertUtil {
+
+ public T documentToBean(BasicDBObject dbObject, Class clzss) {
+ String realJson = dbObject.toJson(JsonWriterSettings.builder().build());
+ T obj = JSON.parseObject(realJson, clzss);
+ return obj;
+ }
+
+ public static T documentToBean(Document document, Class clzss) {
+ String realJson = document.toJson(JsonWriterSettings.builder().build());
+ T obj = JSON.parseObject(realJson, clzss);
+ return obj;
+ }
+
+ public static BasicDBObject toDBObject(T object) {
+ String json = JSON.toJSONString(object);
+ BasicDBObject basicDBObject = BasicDBObject.parse(json);
+ return basicDBObject;
+ }
+
+ public static Document beanToDocument(T object) {
+ String json = JSON.toJSONString(object);
+ Document document = Document.parse(json);
+ return document;
+ }
+}
diff --git a/src/main/java/com/keyware/composeanalysis/util/IpUtil.java b/src/main/java/com/keyware/composeanalysis/util/IpUtil.java
new file mode 100644
index 0000000..e299588
--- /dev/null
+++ b/src/main/java/com/keyware/composeanalysis/util/IpUtil.java
@@ -0,0 +1,23 @@
+package com.keyware.composeanalysis.util;
+
+import lombok.extern.log4j.Log4j2;
+
+import java.net.InetAddress;
+import java.net.UnknownHostException;
+
+/**
+ * @author liuzongren
+ * @date 2024/7/30
+ */
+@Log4j2
+public class IpUtil {
+
+ public static String getHostIp() {
+ try {
+ return InetAddress.getLocalHost().getHostAddress();
+ } catch (UnknownHostException e) {
+ log.error(e.getMessage(), e);
+ }
+ return "127.0.0.1";
+ }
+}
diff --git a/src/main/java/com/keyware/composeanalysis/util/RedisUtil.java b/src/main/java/com/keyware/composeanalysis/util/RedisUtil.java
new file mode 100644
index 0000000..c9b02a8
--- /dev/null
+++ b/src/main/java/com/keyware/composeanalysis/util/RedisUtil.java
@@ -0,0 +1,537 @@
+package com.keyware.composeanalysis.util;
+
+
+import org.springframework.beans.factory.annotation.Autowired;
+import org.springframework.beans.factory.annotation.Qualifier;
+import org.springframework.data.redis.core.RedisTemplate;
+import org.springframework.stereotype.Component;
+
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.concurrent.TimeUnit;
+
+/**
+ * liuzongren
+ * data 2024/04/02
+ */
+@Component
+public class RedisUtil {
+
+
+ @Autowired
+ private RedisTemplate redisTemplate;
+
+ /**
+ * 给一个指定的 key 值附加过期时间
+ *
+ * @param key
+ * @param time
+ * @return
+ */
+ public boolean expire(String key, long time) {
+ return this.redisTemplate.expire(key, time, TimeUnit.SECONDS);
+ }
+
+ /**
+ * 根据key 获取过期时间
+ *
+ * @param key
+ * @return
+ */
+ public long getTime(String key) {
+ return redisTemplate.getExpire(key, TimeUnit.SECONDS);
+ }
+
+ /**
+ * 根据key 获取过期时间
+ *
+ * @param key
+ * @return
+ */
+ public boolean hasKey(String key) {
+ return redisTemplate.hasKey(key);
+ }
+
+ /**
+ * 移除指定key 的过期时间
+ *
+ * @param key
+ * @return
+ */
+ public boolean persist(String key) {
+ return redisTemplate.boundValueOps(key).persist();
+ }
+
+ //- - - - - - - - - - - - - - - - - - - - - String类型 - - - - - - - - - - - - - - - - - - - -
+
+ /**
+ * 根据key获取值
+ *
+ * @param key 键
+ * @return 值
+ */
+ public Object get(String key) {
+ return key == null ? null : redisTemplate.opsForValue().get(key);
+ }
+
+ /**
+ * 将值放入缓存
+ *
+ * @param key 键
+ * @param value 值
+ * @return true成功 false 失败
+ */
+ public void set(String key, Object value) {
+ redisTemplate.opsForValue().set(key, value);
+ }
+
+ /**
+ * 将值放入缓存并设置时间
+ *
+ * @param key 键
+ * @param value 值
+ * @param time 时间(秒) -1为无期限
+ * @return true成功 false 失败
+ */
+ public void set(String key, String value, long time) {
+ if (time > 0) {
+ redisTemplate.opsForValue().set(key, value, time, TimeUnit.SECONDS);
+ } else {
+ redisTemplate.opsForValue().set(key, value);
+ }
+ }
+
+ public void delKey(String key) {
+ redisTemplate.delete(key);
+ }
+
+ /**
+ * 批量添加 key (重复的键会覆盖)
+ *
+ * @param keyAndValue
+ */
+ public void batchSet(Map keyAndValue) {
+ redisTemplate.opsForValue().multiSet(keyAndValue);
+ }
+
+ /**
+ * 批量添加 key-value 只有在键不存在时,才添加
+ * map 中只要有一个key存在,则全部不添加
+ *
+ * @param keyAndValue
+ */
+ public void batchSetIfAbsent(Map keyAndValue) {
+ redisTemplate.opsForValue().multiSetIfAbsent(keyAndValue);
+ }
+
+ /**
+ * 对一个 key-value 的值进行加减操作,
+ * 如果该 key 不存在 将创建一个key 并赋值该 number
+ * 如果 key 存在,但 value 不是长整型 ,将报错
+ *
+ * @param key
+ * @param number
+ */
+ public Long increment(String key, long number) {
+ return redisTemplate.opsForValue().increment(key, number);
+ }
+
+ /**
+ * 对一个 key-value 的值进行加减操作,
+ * 如果该 key 不存在 将创建一个key 并赋值该 number
+ * 如果 key 存在,但 value 不是 纯数字 ,将报错
+ *
+ * @param key
+ * @param number
+ */
+ public Double increment(String key, double number) {
+ return redisTemplate.opsForValue().increment(key, number);
+ }
+
+ //- - - - - - - - - - - - - - - - - - - - - set类型 - - - - - - - - - - - - - - - - - - - -
+
+ /**
+ * 将数据放入set缓存
+ *
+ * @param key 键
+ * @return
+ */
+ public void sSet(String key, String value) {
+ redisTemplate.opsForSet().add(key, value);
+ }
+
+ /**
+ * 获取变量中的值
+ *
+ * @param key 键
+ * @return
+ */
+ public Set