From 0aa44dcca475e78cf42cbdd117bfd9374cfaa952 Mon Sep 17 00:00:00 2001 From: liuzongren <15011502566@163.com> Date: Tue, 10 Sep 2024 17:52:45 +0800 Subject: [PATCH] first commit --- .gitignore | 33 ++ pom.xml | 148 +++++ .../ComposeAnalyzeApplication.java | 21 + .../config/AnalysisConfig.java | 20 + .../composeanalysis/config/RedisConfig.java | 94 +++ .../config/RedissionConfig.java | 31 + .../composeanalysis/config/SolrConfig.java | 25 + .../config/thread/TaskExecutePool.java | 60 ++ .../composeanalysis/constant/FixedValue.java | 340 +++++++++++ .../FunctionAndAnalysisAssemblyConst.java | 26 + .../constant/MongoDBConst.java | 69 +++ .../composeanalysis/constant/RedisConst.java | 20 + .../composeanalysis/constant/SolrDBConst.java | 33 ++ .../constant/enums/AnalysisLevelEnum.java | 48 ++ .../constant/enums/AnalysisStatusEnum.java | 48 ++ .../enums/FileAnalysisStatusEnum.java | 42 ++ .../controller/ComposeAnalysisController.java | 119 ++++ .../composeanalysis/entity/AnalysisTask.java | 157 +++++ .../mapper/AnalyzeTaskMapper.java | 18 + .../mongo/AnalysisLogMongoDto.java | 37 ++ .../mongo/AssemblyMongoDto.java | 96 ++++ .../mongo/FileDataMongoDto.java | 106 ++++ .../mongo/LineDataMongoDto.java | 56 ++ .../composeanalysis/mongo/MatchOpenFile.java | 62 ++ .../mongo/MatchOpenFileMongoDto.java | 58 ++ .../mongo/MatchOpenProjectMongoDto.java | 50 ++ .../mongo/ProjectAssemblyMongoDto.java | 75 +++ .../mongo/ProjectBaseDataMongoDto.java | 75 +++ .../mongo/VersionbasedataMongoDto.java | 75 +++ .../schedule/AnalysisStatusSchedule.java | 68 +++ .../service/AnalysisTaskService.java | 56 ++ .../service/impl/AnalysisTaskServiceImpl.java | 215 +++++++ .../composeanalysis/solr/VersionTree.java | 56 ++ .../composeanalysis/solr/VersionTreeNode.java | 42 ++ .../task/AnalysisTaskFactory.java | 45 ++ .../task/CodeBlockAnalysisTask.java | 356 ++++++++++++ .../task/FileAnalysisTask.java | 232 ++++++++ .../task/FunctionAnalysisTask.java | 409 +++++++++++++ .../composeanalysis/task/IAnalysisTask.java | 10 + .../task/LineAnalysisTask.java | 298 ++++++++++ .../task/PorjectAnalysisTask.java | 378 ++++++++++++ .../composeanalysis/util/AnalysisLogUtil.java | 33 ++ .../composeanalysis/util/BeanUtil.java | 32 ++ .../composeanalysis/util/ConvertUtil.java | 38 ++ .../keyware/composeanalysis/util/IpUtil.java | 23 + .../composeanalysis/util/RedisUtil.java | 537 ++++++++++++++++++ .../composeanalysis/util/SimilarityUtil.java | 206 +++++++ .../composeanalysis/util/SolrUtils.java | 321 +++++++++++ .../util/SpringContextUtils.java | 53 ++ src/main/resources/application.yaml | 18 + src/main/resources/logback-spring.xml | 215 +++++++ .../resources/mapper/AnalysisTaskService.xml | 25 + 52 files changed, 5708 insertions(+) create mode 100644 .gitignore create mode 100644 pom.xml create mode 100644 src/main/java/com/keyware/composeanalysis/ComposeAnalyzeApplication.java create mode 100644 src/main/java/com/keyware/composeanalysis/config/AnalysisConfig.java create mode 100644 src/main/java/com/keyware/composeanalysis/config/RedisConfig.java create mode 100644 src/main/java/com/keyware/composeanalysis/config/RedissionConfig.java create mode 100644 src/main/java/com/keyware/composeanalysis/config/SolrConfig.java create mode 100644 src/main/java/com/keyware/composeanalysis/config/thread/TaskExecutePool.java create mode 100644 src/main/java/com/keyware/composeanalysis/constant/FixedValue.java create mode 100644 src/main/java/com/keyware/composeanalysis/constant/FunctionAndAnalysisAssemblyConst.java create mode 100644 src/main/java/com/keyware/composeanalysis/constant/MongoDBConst.java create mode 100644 src/main/java/com/keyware/composeanalysis/constant/RedisConst.java create mode 100644 src/main/java/com/keyware/composeanalysis/constant/SolrDBConst.java create mode 100644 src/main/java/com/keyware/composeanalysis/constant/enums/AnalysisLevelEnum.java create mode 100644 src/main/java/com/keyware/composeanalysis/constant/enums/AnalysisStatusEnum.java create mode 100644 src/main/java/com/keyware/composeanalysis/constant/enums/FileAnalysisStatusEnum.java create mode 100644 src/main/java/com/keyware/composeanalysis/controller/ComposeAnalysisController.java create mode 100644 src/main/java/com/keyware/composeanalysis/entity/AnalysisTask.java create mode 100644 src/main/java/com/keyware/composeanalysis/mapper/AnalyzeTaskMapper.java create mode 100644 src/main/java/com/keyware/composeanalysis/mongo/AnalysisLogMongoDto.java create mode 100644 src/main/java/com/keyware/composeanalysis/mongo/AssemblyMongoDto.java create mode 100644 src/main/java/com/keyware/composeanalysis/mongo/FileDataMongoDto.java create mode 100644 src/main/java/com/keyware/composeanalysis/mongo/LineDataMongoDto.java create mode 100644 src/main/java/com/keyware/composeanalysis/mongo/MatchOpenFile.java create mode 100644 src/main/java/com/keyware/composeanalysis/mongo/MatchOpenFileMongoDto.java create mode 100644 src/main/java/com/keyware/composeanalysis/mongo/MatchOpenProjectMongoDto.java create mode 100644 src/main/java/com/keyware/composeanalysis/mongo/ProjectAssemblyMongoDto.java create mode 100644 src/main/java/com/keyware/composeanalysis/mongo/ProjectBaseDataMongoDto.java create mode 100644 src/main/java/com/keyware/composeanalysis/mongo/VersionbasedataMongoDto.java create mode 100644 src/main/java/com/keyware/composeanalysis/schedule/AnalysisStatusSchedule.java create mode 100644 src/main/java/com/keyware/composeanalysis/service/AnalysisTaskService.java create mode 100644 src/main/java/com/keyware/composeanalysis/service/impl/AnalysisTaskServiceImpl.java create mode 100644 src/main/java/com/keyware/composeanalysis/solr/VersionTree.java create mode 100644 src/main/java/com/keyware/composeanalysis/solr/VersionTreeNode.java create mode 100644 src/main/java/com/keyware/composeanalysis/task/AnalysisTaskFactory.java create mode 100644 src/main/java/com/keyware/composeanalysis/task/CodeBlockAnalysisTask.java create mode 100644 src/main/java/com/keyware/composeanalysis/task/FileAnalysisTask.java create mode 100644 src/main/java/com/keyware/composeanalysis/task/FunctionAnalysisTask.java create mode 100644 src/main/java/com/keyware/composeanalysis/task/IAnalysisTask.java create mode 100644 src/main/java/com/keyware/composeanalysis/task/LineAnalysisTask.java create mode 100644 src/main/java/com/keyware/composeanalysis/task/PorjectAnalysisTask.java create mode 100644 src/main/java/com/keyware/composeanalysis/util/AnalysisLogUtil.java create mode 100644 src/main/java/com/keyware/composeanalysis/util/BeanUtil.java create mode 100644 src/main/java/com/keyware/composeanalysis/util/ConvertUtil.java create mode 100644 src/main/java/com/keyware/composeanalysis/util/IpUtil.java create mode 100644 src/main/java/com/keyware/composeanalysis/util/RedisUtil.java create mode 100644 src/main/java/com/keyware/composeanalysis/util/SimilarityUtil.java create mode 100644 src/main/java/com/keyware/composeanalysis/util/SolrUtils.java create mode 100644 src/main/java/com/keyware/composeanalysis/util/SpringContextUtils.java create mode 100644 src/main/resources/application.yaml create mode 100644 src/main/resources/logback-spring.xml create mode 100644 src/main/resources/mapper/AnalysisTaskService.xml diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..549e00a --- /dev/null +++ b/.gitignore @@ -0,0 +1,33 @@ +HELP.md +target/ +!.mvn/wrapper/maven-wrapper.jar +!**/src/main/**/target/ +!**/src/test/**/target/ + +### STS ### +.apt_generated +.classpath +.factorypath +.project +.settings +.springBeans +.sts4-cache + +### IntelliJ IDEA ### +.idea +*.iws +*.iml +*.ipr + +### NetBeans ### +/nbproject/private/ +/nbbuild/ +/dist/ +/nbdist/ +/.nb-gradle/ +build/ +!**/src/main/**/build/ +!**/src/test/**/build/ + +### VS Code ### +.vscode/ diff --git a/pom.xml b/pom.xml new file mode 100644 index 0000000..d61784f --- /dev/null +++ b/pom.xml @@ -0,0 +1,148 @@ + + + 4.0.0 + + com.keyware + keyware-cloud + 1.0.0 + + + com.keyware + compose-analysis + 1.0.0 + compose-analysis + compose-analysis + + + 17 + + + + + org.springframework.boot + spring-boot-starter-web + + + + + com.alibaba.cloud + spring-cloud-starter-alibaba-nacos-discovery + + + + + com.alibaba.cloud + spring-cloud-starter-alibaba-nacos-config + + + + + org.springframework.boot + spring-boot-starter-data-mongodb + + + + + + org.springframework.boot + spring-boot-starter-data-redis + + + + + org.redisson + redisson-spring-boot-starter + 3.13.6 + + + + + org.apache.solr + solr-solrj + 7.6.0 + + + org.apache.zookeeper + zookeeper + + + + + + + com.keyware + keyswan-analysis + releases-1.1.5 + + + + com.keyware + keyswan-function + release-1.1.2 + + + + + com.keyware + keyware-common + 1.0.0 + + + + + com.keyware + compose-analysis-api + 1.0.0 + + + + + + + + keyware-repos + KeyWare Repository + http://218.30.67.85:19201/nexus/content/groups/public/ + + + keyware-repos-2 + KeyWare Repository-2 + http://218.30.67.85:19201/nexus/content/repositories/releases/ + + + + + + + + + + + + + + + + + + + + + + + org.springframework.boot + spring-boot-maven-plugin + 3.2.7 + + + org.apache.maven.plugins + maven-compiler-plugin + + 16 + 16 + + + + + diff --git a/src/main/java/com/keyware/composeanalysis/ComposeAnalyzeApplication.java b/src/main/java/com/keyware/composeanalysis/ComposeAnalyzeApplication.java new file mode 100644 index 0000000..ad3071a --- /dev/null +++ b/src/main/java/com/keyware/composeanalysis/ComposeAnalyzeApplication.java @@ -0,0 +1,21 @@ +package com.keyware.composeanalysis; + +import org.mybatis.spring.annotation.MapperScan; +import org.springframework.boot.SpringApplication; +import org.springframework.boot.autoconfigure.SpringBootApplication; +import org.springframework.cloud.client.discovery.EnableDiscoveryClient; +import org.springframework.cloud.context.config.annotation.RefreshScope; +import org.springframework.scheduling.annotation.EnableAsync; + +@MapperScan("com.keyware.composeanalysis.mapper") +@SpringBootApplication +@EnableDiscoveryClient +@RefreshScope +@EnableAsync +public class ComposeAnalyzeApplication { + + public static void main(String[] args) { + SpringApplication.run(ComposeAnalyzeApplication.class, args); + } + +} diff --git a/src/main/java/com/keyware/composeanalysis/config/AnalysisConfig.java b/src/main/java/com/keyware/composeanalysis/config/AnalysisConfig.java new file mode 100644 index 0000000..e42e5b5 --- /dev/null +++ b/src/main/java/com/keyware/composeanalysis/config/AnalysisConfig.java @@ -0,0 +1,20 @@ +package com.keyware.composeanalysis.config; + +import lombok.Data; +import org.springframework.beans.factory.annotation.Value; +import org.springframework.context.annotation.Configuration; + +/** + * @author liuzongren + * @date 2024/7/25 + * @description 分析配置类 + */ +@Configuration +@Data +public class AnalysisConfig { + + //源码上传和解压的地址 + @Value("${codeResourcePath}") + private String codeResourcePath; + +} diff --git a/src/main/java/com/keyware/composeanalysis/config/RedisConfig.java b/src/main/java/com/keyware/composeanalysis/config/RedisConfig.java new file mode 100644 index 0000000..95733b7 --- /dev/null +++ b/src/main/java/com/keyware/composeanalysis/config/RedisConfig.java @@ -0,0 +1,94 @@ +package com.keyware.composeanalysis.config; + +import org.redisson.Redisson; +import org.redisson.api.RedissonClient; +import org.redisson.codec.JsonJacksonCodec; +import org.redisson.config.Config; +import org.springframework.beans.factory.annotation.Value; +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Configuration; +import org.springframework.data.redis.connection.RedisConnectionFactory; +import org.springframework.data.redis.connection.RedisStandaloneConfiguration; +import org.springframework.data.redis.connection.lettuce.LettuceConnectionFactory; +import org.springframework.data.redis.core.RedisTemplate; +import org.springframework.data.redis.serializer.Jackson2JsonRedisSerializer; +import org.springframework.data.redis.serializer.StringRedisSerializer; + +@Configuration +public class RedisConfig { + + @Value("${spring.data.redis.host}") + private String redisHost; + + @Value("${spring.data.redis.port}") + private int redisPort; + + //配置文件中的密码 到这里已经被转义过了 + @Value("${spring.data.redis.password}") + private String redisPassword; + + @Bean(name = "redisTemplate") + public RedisTemplate getRedisTemplate(RedisConnectionFactory factory) { + RedisTemplate template = new RedisTemplate(); + template.setConnectionFactory(factory); + //配置序列化方式 + Jackson2JsonRedisSerializer jackson2JsonRedisSerializer = new Jackson2JsonRedisSerializer(Object.class); + + StringRedisSerializer stringRedisSerializer = new StringRedisSerializer(); + //key 采用String的序列化方式 + template.setKeySerializer(stringRedisSerializer); + //hash + template.setHashKeySerializer(jackson2JsonRedisSerializer); + //value + template.setValueSerializer(jackson2JsonRedisSerializer); + template.afterPropertiesSet(); + return template; + } + + + @Bean + public RedissonClient getRedisson() { + Config config = new Config(); + config.useSingleServer(). + setAddress("redis://" + redisHost + ":" + redisPort). + setPassword(redisPassword); + config.setCodec(new JsonJacksonCodec()); + return Redisson.create(config); + } + + + @Bean(name = "oneDBRedisTemplateClient") + public RedisTemplate redisTemplate() { + //为了开发方便,一般直接使用 + RedisTemplate template = new RedisTemplate<>(); + + template.setConnectionFactory(redisConnection(1)); + //配置序列化方式 + Jackson2JsonRedisSerializer jackson2JsonRedisSerializer = new Jackson2JsonRedisSerializer(Object.class); + + StringRedisSerializer stringRedisSerializer = new StringRedisSerializer(); + //key 采用String的序列化方式 + template.setKeySerializer(stringRedisSerializer); + //hash + template.setHashKeySerializer(jackson2JsonRedisSerializer); + //value + template.setValueSerializer(jackson2JsonRedisSerializer); + template.afterPropertiesSet(); + return template; + } + + private LettuceConnectionFactory redisConnection(int db) { + RedisStandaloneConfiguration server = new RedisStandaloneConfiguration(); + server.setHostName(redisHost); + server.setDatabase(db); + server.setPort(redisPort); + server.setPassword(redisPassword); + LettuceConnectionFactory factory = new LettuceConnectionFactory(server); + factory.afterPropertiesSet(); + return factory; + } + + + + +} \ No newline at end of file diff --git a/src/main/java/com/keyware/composeanalysis/config/RedissionConfig.java b/src/main/java/com/keyware/composeanalysis/config/RedissionConfig.java new file mode 100644 index 0000000..c0e17f5 --- /dev/null +++ b/src/main/java/com/keyware/composeanalysis/config/RedissionConfig.java @@ -0,0 +1,31 @@ +package com.keyware.composeanalysis.config; + +import org.redisson.Redisson; +import org.redisson.api.RBucket; +import org.redisson.api.RedissonClient; +import org.redisson.codec.JsonJacksonCodec; +import org.redisson.config.Config; +import org.springframework.beans.factory.annotation.Value; +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Configuration; + +/** + * @author liuzongren + * @date 2024/7/30 + */ +@Configuration +public class RedissionConfig { + + @Value("${spring.data.redis.host}") + private String redisHost; + + @Value("${spring.data.redis.port}") + private int redisPort; + + @Value("${spring.data.redis.password}") + private String redisPassword; + + + + +} diff --git a/src/main/java/com/keyware/composeanalysis/config/SolrConfig.java b/src/main/java/com/keyware/composeanalysis/config/SolrConfig.java new file mode 100644 index 0000000..3902abc --- /dev/null +++ b/src/main/java/com/keyware/composeanalysis/config/SolrConfig.java @@ -0,0 +1,25 @@ +package com.keyware.composeanalysis.config; + +import org.springframework.beans.factory.annotation.Value; +import org.springframework.boot.context.properties.ConfigurationProperties; +import org.springframework.stereotype.Component; + +/** + * @author liuzongren + * @date 2024/7/24 + * description solr的配置中心 + */ +@Component +@ConfigurationProperties(prefix = "solr") +public class SolrConfig { + + /** + * 每次查询返回的最大行数 + */ + @Value("${solr.row:5}") + private String ROWS; + + @Value("${solr.solrUrl}") + private String solrUrl; + +} diff --git a/src/main/java/com/keyware/composeanalysis/config/thread/TaskExecutePool.java b/src/main/java/com/keyware/composeanalysis/config/thread/TaskExecutePool.java new file mode 100644 index 0000000..02a716f --- /dev/null +++ b/src/main/java/com/keyware/composeanalysis/config/thread/TaskExecutePool.java @@ -0,0 +1,60 @@ +package com.keyware.composeanalysis.config.thread; + +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Configuration; +import org.springframework.scheduling.concurrent.ThreadPoolTaskExecutor; + +import java.util.concurrent.Executor; +import java.util.concurrent.ThreadPoolExecutor; + +/** + * 创建线程池配置类 + */ +@Configuration +public class TaskExecutePool { + /** + * 核心线程数 + */ + private int coreThreadsSize = 10; + + /** + * 最大线程数 + */ + private int maxThreadsSize = 50; + + /** + * 存活时间 + */ + private int keepAliveSeconds = 60; + + /** + * 队列容量 + */ + private int queueCapacity = 10000; + + + @Bean + public Executor taskExecutor() { + ThreadPoolTaskExecutor executor = new ThreadPoolTaskExecutor(); + //核心线程池大小 + executor.setCorePoolSize(coreThreadsSize); + //最大线程数 + executor.setMaxPoolSize(maxThreadsSize); + //队列容量 + executor.setQueueCapacity(queueCapacity); + //活跃时间 + executor.setKeepAliveSeconds(keepAliveSeconds); + //线程名字前缀 + executor.setThreadNamePrefix("ComposeAnalysisExecutePool-"); + + // setRejectedExecutionHandler:当pool已经达到max size的时候,如何处理新任务 + // CallerRunsPolicy:不在新线程中执行任务,而是由调用者所在的线程来执行 + executor.setRejectedExecutionHandler(new ThreadPoolExecutor.CallerRunsPolicy()); + // 等待所有任务结束后再关闭线程池 + executor.setWaitForTasksToCompleteOnShutdown(true); + + executor.initialize(); + + return executor; + } +} \ No newline at end of file diff --git a/src/main/java/com/keyware/composeanalysis/constant/FixedValue.java b/src/main/java/com/keyware/composeanalysis/constant/FixedValue.java new file mode 100644 index 0000000..da055fd --- /dev/null +++ b/src/main/java/com/keyware/composeanalysis/constant/FixedValue.java @@ -0,0 +1,340 @@ +package com.keyware.composeanalysis.constant; + + +import java.util.*; + + +/** + * 常见语言缓存标识 + */ + +public class FixedValue { + //压缩格式 + public final static Set COMPRESSED_FORMAT = new HashSet(); + + static { + COMPRESSED_FORMAT.add(".exe"); + COMPRESSED_FORMAT.add(".msi"); + COMPRESSED_FORMAT.add(".zip"); + COMPRESSED_FORMAT.add(".rar"); + COMPRESSED_FORMAT.add(".tar"); + COMPRESSED_FORMAT.add(".jar"); + COMPRESSED_FORMAT.add(".war"); + COMPRESSED_FORMAT.add(".tar.gz"); + COMPRESSED_FORMAT.add(".gz"); + COMPRESSED_FORMAT.add(".tar.bz2"); + COMPRESSED_FORMAT.add(".bz2"); + COMPRESSED_FORMAT.add(".tar.z"); + COMPRESSED_FORMAT.add(".z"); + COMPRESSED_FORMAT.add(".tgz"); + COMPRESSED_FORMAT.add(".7z"); + COMPRESSED_FORMAT.add(".xz"); + } + //分析结果类型 + + //许可协议对应的检测结果 + public final static Map LINCEN_RESULT = new HashMap(); + + static { + LINCEN_RESULT.put("BSD", "1. 如果再发布的产品中包含源代码,则在源代码中必须带有原来代码中的BSD协议。\n" + + "2. 如果再发布的只是二进制类库/软件,则需要在类库/软件的文档和版权声明中包含原来代码中的BSD协议。\n" + + "3. 不可以用开源代码的作者/机构名字和原来产品的名字做市场推广。"); + LINCEN_RESULT.put("Apache License","1. 需要给代码的用户一份Apache Licence\n" + + "2. 如果你修改了代码,需要再被修改的文件中说明。\n" + + "3. 在延伸的代码中(修改和有源代码衍生的代码中)需要带有原来代码中的协议,商标,专利声明和其他原来作者规定需要包含的说明。\n" + + "4. 如果再发布的产品中包含一个Notice文件,则在Notice文件中需要带有Apache Licence。你可以在Notice中增加自己的许可,但不可以表现为对Apache Licence构成更改。"); + LINCEN_RESULT.put("GNU General Public License","不允许修改后和衍生的代码做为闭源的商业软件发布和销售"); + LINCEN_RESULT.put("GNU Lesser General Public License","可以被商业软件作为类库引用并发布和销售"); + LINCEN_RESULT.put("MIT","必须在你的发行版里包含原许可协议的声明"); + } + + + + //语言后缀与语言类型 + public final static Map SUFFIX_LANG = new HashMap(); + + static { + SUFFIX_LANG.put(".java", "Java"); + SUFFIX_LANG.put(".c", "C"); + SUFFIX_LANG.put(".h", "C"); + SUFFIX_LANG.put(".cpp", "Cpp");//C++ + //add by 2022/07/13 + SUFFIX_LANG.put(".hpp", "Cpp");//C++ + SUFFIX_LANG.put(".cs", "Cs");//C# + SUFFIX_LANG.put(".m", "OC");//Objective-C + //add by 2022/07/13 + SUFFIX_LANG.put(".mm", "OC");//Objective-C + SUFFIX_LANG.put(".py", "Python"); + SUFFIX_LANG.put(".go", "Golang"); + SUFFIX_LANG.put(".pl", "Perl"); + SUFFIX_LANG.put(".rb", "Ruby"); + SUFFIX_LANG.put(".php", "PHP"); + SUFFIX_LANG.put(".sql", "Plsql");//PL/SQL + SUFFIX_LANG.put(".abap", "Abap"); + SUFFIX_LANG.put(".lua", "Lua"); + SUFFIX_LANG.put(".erl", "Erlang"); + SUFFIX_LANG.put(".swift", "Swift"); + SUFFIX_LANG.put(".groovy", "Groovy"); + SUFFIX_LANG.put(".frm", "VB");//VB.net + SUFFIX_LANG.put(".bas", "VB"); + SUFFIX_LANG.put(".cls", "VB"); + SUFFIX_LANG.put(".ctl", "VB"); + SUFFIX_LANG.put(".vb", "VB"); + SUFFIX_LANG.put(".vbs", "VB"); + SUFFIX_LANG.put(".pp", "Puppet"); + SUFFIX_LANG.put(".clj", "Clojure"); + SUFFIX_LANG.put(".fs", "F");//F# + SUFFIX_LANG.put(".fsx", "F");//F# + SUFFIX_LANG.put(".fsscript", "F");//F# + SUFFIX_LANG.put(".hs", "Haskell"); + SUFFIX_LANG.put(".js", "Javascript"); + SUFFIX_LANG.put(".ts", "Typescript"); + SUFFIX_LANG.put(".r", "R"); + //add by 2022/07/13 + SUFFIX_LANG.put(".R", "R"); + SUFFIX_LANG.put(".sc", "Scala"); + SUFFIX_LANG.put(".scala", "Scala"); + //add by 2022/07/13 + SUFFIX_LANG.put(".pas", "Pascal"); + SUFFIX_LANG.put(".cob", "Cobol"); + SUFFIX_LANG.put(".as", "ActionScript"); + SUFFIX_LANG.put(".rs", "Rust"); + SUFFIX_LANG.put(".ino", "Arduino"); + SUFFIX_LANG.put(".asm", "Assembly"); + SUFFIX_LANG.put(".f", "Fortran"); + SUFFIX_LANG.put(".f90", "Fortran"); + SUFFIX_LANG.put(".sh", "Shell"); + SUFFIX_LANG.put(".html", "Html"); + SUFFIX_LANG.put(".htm", "Html"); + SUFFIX_LANG.put(".css", "Css"); + SUFFIX_LANG.put(".rpg", "Rpg"); + SUFFIX_LANG.put(".xml", "Xml"); + SUFFIX_LANG.put(".pli", "Pli");//PL/I + SUFFIX_LANG.put(".p","OpenEdge");//OpenEdge + SUFFIX_LANG.put(".abl","OpenEdge");//OpenEdge + //SUFFIX_LANG.put(".jar", "Jar"); + //SUFFIX_LANG.put(".war", "War"); + } + //语言后缀与语言类型 + public final static Map SUFFIX_LANG1 = new HashMap(); + + static { + SUFFIX_LANG1.put(".java", "java"); + SUFFIX_LANG1.put(".c", "c"); + SUFFIX_LANG1.put(".h", "c"); + SUFFIX_LANG1.put(".cpp", "c++");//C++ + //add by 2022/07/13 + SUFFIX_LANG1.put(".hpp", "c++");//C++ + SUFFIX_LANG1.put(".cs", "c#");//C# + SUFFIX_LANG1.put(".m", "Objective-C");//Objective-C + //add by 2022/07/13 + SUFFIX_LANG1.put(".mm", "Objective-C");//Objective-C + SUFFIX_LANG1.put(".py", "python"); + SUFFIX_LANG1.put(".go", "go"); + SUFFIX_LANG1.put(".pl", "perl"); + SUFFIX_LANG1.put(".rb", "ruby"); + SUFFIX_LANG1.put(".php", "php"); + SUFFIX_LANG1.put(".sql", "plsql");//PL/SQL + SUFFIX_LANG1.put(".abap", "abap"); + SUFFIX_LANG1.put(".lua", "lua"); + SUFFIX_LANG1.put(".erl", "erlang"); + SUFFIX_LANG1.put(".swift", "swift"); + SUFFIX_LANG1.put(".groovy", "groovy"); + SUFFIX_LANG1.put(".frm", "vb");//VB.net + SUFFIX_LANG1.put(".bas", "vb"); + SUFFIX_LANG1.put(".cls", "vb"); + SUFFIX_LANG1.put(".ctl", "vb"); + SUFFIX_LANG1.put(".vb", "vb"); + SUFFIX_LANG1.put(".vbs", "vb"); + SUFFIX_LANG1.put(".pp", "puppet"); + SUFFIX_LANG1.put(".clj", "clojure"); + SUFFIX_LANG1.put(".fs", "f");//F# + SUFFIX_LANG1.put(".fsx", "f");//F# + SUFFIX_LANG1.put(".fsscript", "f");//F# + SUFFIX_LANG1.put(".hs", "haskell"); + SUFFIX_LANG1.put(".js", "javaScript"); + SUFFIX_LANG1.put(".ts", "typeScript"); + SUFFIX_LANG1.put(".r", "r"); + //add by 2022/07/13 + SUFFIX_LANG1.put(".R", "r"); + SUFFIX_LANG1.put(".sc", "scala"); + SUFFIX_LANG1.put(".scala", "scala"); + //add by 2022/07/13 + SUFFIX_LANG1.put(".pas", "pascal"); + SUFFIX_LANG1.put(".cob", "cobol"); + SUFFIX_LANG1.put(".as", "actionScript"); + SUFFIX_LANG1.put(".rs", "rust"); + SUFFIX_LANG1.put(".ino", "arduino"); + SUFFIX_LANG1.put(".asm", "assembly"); + SUFFIX_LANG1.put(".f", "fortran"); + SUFFIX_LANG1.put(".f90", "fortran"); + SUFFIX_LANG1.put(".sh", "shell"); + SUFFIX_LANG1.put(".html", "html"); + SUFFIX_LANG1.put(".htm", "html"); + SUFFIX_LANG1.put(".css", "css"); + SUFFIX_LANG1.put(".rpg", "rpg"); + SUFFIX_LANG1.put(".xml", "xml"); + SUFFIX_LANG1.put(".pli", "pli");//PL/I + SUFFIX_LANG1.put(".p","OpenEdge");//OpenEdge + SUFFIX_LANG1.put(".abl","OpenEdge");//OpenEdge + //SUFFIX_LANG.put(".jar", "Jar"); + //SUFFIX_LANG.put(".war", "War"); + } + //文件后缀对应的solr特征库库名称 + public final static Map SUFFIX_SOLR_FILE = new HashMap(); + + static { + SUFFIX_SOLR_FILE.put("java", "Java_CutFileInfo"); + SUFFIX_SOLR_FILE.put("c", "C_CutFileInfo"); + SUFFIX_SOLR_FILE.put("h", "C_CutFileInfo"); + SUFFIX_SOLR_FILE.put("cpp", "Cpp_CutFileInfo");//C++ + SUFFIX_SOLR_FILE.put("hpp", "Cpp_CutFileInfo");//C++ + SUFFIX_SOLR_FILE.put("cs", "Cs_CutFileInfo");//C# + SUFFIX_SOLR_FILE.put("m", "OC_CutFileInfo");//Objective-C + SUFFIX_SOLR_FILE.put("mm", "OC_CutFileInfo");//Objective-C + SUFFIX_SOLR_FILE.put("py", "Python_CutFileInfo"); + SUFFIX_SOLR_FILE.put("go", "Golang_CutFileInfo"); + SUFFIX_SOLR_FILE.put("pl", "Perl_CutFileInfo"); + SUFFIX_SOLR_FILE.put("rb", "Ruby_CutFileInfo"); + SUFFIX_SOLR_FILE.put("php", "PHP_CutFileInfo"); + SUFFIX_SOLR_FILE.put("sql", "Plsql_CutFileInfo");//PL/SQL + SUFFIX_SOLR_FILE.put("abap", "Abap_CutFileInfo"); + SUFFIX_SOLR_FILE.put("lua", "Lua_CutFileInfo"); + SUFFIX_SOLR_FILE.put("erl", "Erlang_CutFileInfo"); + SUFFIX_SOLR_FILE.put("swift", "Swift_CutFileInfo"); + SUFFIX_SOLR_FILE.put("groovy", "Groovy_CutFileInfo"); + SUFFIX_SOLR_FILE.put("frm", "VB_CutFileInfo");//VB.net + SUFFIX_SOLR_FILE.put("bas", "VB_CutFileInfo"); + SUFFIX_SOLR_FILE.put("cls", "VB_CutFileInfo"); + SUFFIX_SOLR_FILE.put("ctl", "VB_CutFileInfo"); + SUFFIX_SOLR_FILE.put("vb", "VB_CutFileInfo"); + SUFFIX_SOLR_FILE.put("vbs", "VB_CutFileInfo"); + SUFFIX_SOLR_FILE.put("pp", "Puppet_CutFileInfo"); + SUFFIX_SOLR_FILE.put("clj", "Clojure_CutFileInfo"); + SUFFIX_SOLR_FILE.put("fs", "F_CutFileInfo");//F# + SUFFIX_SOLR_FILE.put("fsx", "F_CutFileInfo");//F# + SUFFIX_SOLR_FILE.put("fsscript", "F_CutFileInfo");//F# + SUFFIX_SOLR_FILE.put("hs", "Haskell_CutFileInfo"); + SUFFIX_SOLR_FILE.put("js", "Javascript_CutFileInfo"); + SUFFIX_SOLR_FILE.put("ts", "Typescript_CutFileInfo"); + SUFFIX_SOLR_FILE.put("r", "R_CutFileInfo"); + SUFFIX_SOLR_FILE.put("R", "R_CutFileInfo"); + SUFFIX_SOLR_FILE.put("sc", "Scala_CutFileInfo"); + SUFFIX_SOLR_FILE.put("scala", "Scala_CutFileInfo"); + SUFFIX_SOLR_FILE.put("pas", "Pascal_CutFileInfo"); + SUFFIX_SOLR_FILE.put("cob", "Cobol_CutFileInfo"); + SUFFIX_SOLR_FILE.put("as", "ActionScript_CutFileInfo"); + SUFFIX_SOLR_FILE.put("rs", "Rust_CutFileInfo"); + SUFFIX_SOLR_FILE.put("ino", "Arduino_CutFileInfo"); + SUFFIX_SOLR_FILE.put("asm", "Assembly_CutFileInfo"); + SUFFIX_SOLR_FILE.put("f", "Fortran_CutFileInfo"); + SUFFIX_SOLR_FILE.put("f90", "Fortran_CutFileInfo"); + SUFFIX_SOLR_FILE.put("sh", "Shell_CutFileInfo"); + SUFFIX_SOLR_FILE.put("html", "Html_CutFileInfo"); + SUFFIX_SOLR_FILE.put("htm", "Html_CutFileInfo"); + SUFFIX_SOLR_FILE.put("css", "Css_CutFileInfo"); + SUFFIX_SOLR_FILE.put("rpg", "Rpg_CutFileInfo"); + SUFFIX_SOLR_FILE.put("xml", "Xml_CutFileInfo"); + SUFFIX_SOLR_FILE.put("pli", "Pli_CutFileInfo");//PL/I + } + //文件后缀对应的solr文件库库名称 + public final static Map SUFFIX_SOLR_VERSION = new HashMap(); + + static { + SUFFIX_SOLR_VERSION.put("java", "Java_SourceFileBase"); + SUFFIX_SOLR_VERSION.put("c", "C_SourceFileBase"); + SUFFIX_SOLR_VERSION.put("h", "C_SourceFileBase"); + SUFFIX_SOLR_VERSION.put("cpp", "Cpp_SourceFileBase");//C++ + SUFFIX_SOLR_VERSION.put("hpp", "Cpp_SourceFileBase");//C++ + SUFFIX_SOLR_VERSION.put("cs", "Cs_SourceFileBase");//C# + SUFFIX_SOLR_VERSION.put("m", "OC_SourceFileBase");//Objective-C + SUFFIX_SOLR_VERSION.put("mm", "OC_SourceFileBase");//Objective-C + SUFFIX_SOLR_VERSION.put("py", "Python_SourceFileBase"); + SUFFIX_SOLR_VERSION.put("go", "Golang_SourceFileBase"); + SUFFIX_SOLR_VERSION.put("pl", "Perl_SourceFileBase"); + SUFFIX_SOLR_VERSION.put("rb", "Ruby_SourceFileBase"); + SUFFIX_SOLR_VERSION.put("php", "PHP_SourceFileBase"); + SUFFIX_SOLR_VERSION.put("sql", "Plsql_SourceFileBase");//PL/SQL + SUFFIX_SOLR_VERSION.put("abap", "Abap_SourceFileBase"); + SUFFIX_SOLR_VERSION.put("lua", "Lua_SourceFileBase"); + SUFFIX_SOLR_VERSION.put("erl", "Erlang_SourceFileBase"); + SUFFIX_SOLR_VERSION.put("swift", "Swift_SourceFileBase"); + SUFFIX_SOLR_VERSION.put("groovy", "Groovy_SourceFileBase"); + SUFFIX_SOLR_VERSION.put("frm", "VB_SourceFileBase");//VB.net + SUFFIX_SOLR_VERSION.put("bas", "VB_SourceFileBase"); + SUFFIX_SOLR_VERSION.put("cls", "VB_SourceFileBase"); + SUFFIX_SOLR_VERSION.put("ctl", "VB_SourceFileBase"); + SUFFIX_SOLR_VERSION.put("vb", "VB_SourceFileBase"); + SUFFIX_SOLR_VERSION.put("vbs", "VB_SourceFileBase"); + SUFFIX_SOLR_VERSION.put("pp", "Puppet_SourceFileBase"); + SUFFIX_SOLR_VERSION.put("clj", "Clojure_SourceFileBase"); + SUFFIX_SOLR_VERSION.put("fs", "F_SourceFileBase");//F# + SUFFIX_SOLR_VERSION.put("fsx", "F_SourceFileBase");//F# + SUFFIX_SOLR_VERSION.put("fsscript", "F_SourceFileBase");//F# + SUFFIX_SOLR_VERSION.put("hs", "Haskell_SourceFileBase"); + SUFFIX_SOLR_VERSION.put("js", "Javascript_SourceFileBase"); + SUFFIX_SOLR_VERSION.put("ts", "Typescript_SourceFileBase"); + SUFFIX_SOLR_VERSION.put("r", "R_SourceFileBase"); + SUFFIX_SOLR_VERSION.put("R", "R_SourceFileBase"); + SUFFIX_SOLR_VERSION.put("sc", "Scala_SourceFileBase"); + SUFFIX_SOLR_VERSION.put("scala", "Scala_SourceFileBase"); + SUFFIX_SOLR_VERSION.put("pas", "Pascal_SourceFileBase"); + SUFFIX_SOLR_VERSION.put("cob", "Cobol_SourceFileBase"); + SUFFIX_SOLR_VERSION.put("as", "ActionScript_SourceFileBase"); + SUFFIX_SOLR_VERSION.put("rs", "Rust_SourceFileBase"); + SUFFIX_SOLR_VERSION.put("ino", "Arduino_SourceFileBase"); + SUFFIX_SOLR_VERSION.put("asm", "Assembly_SourceFileBase"); + SUFFIX_SOLR_VERSION.put("f", "Fortran_SourceFileBase"); + SUFFIX_SOLR_VERSION.put("f90", "Fortran_SourceFileBase"); + SUFFIX_SOLR_VERSION.put("sh", "Shell_SourceFileBase"); + SUFFIX_SOLR_VERSION.put("html", "Html_SourceFileBase"); + SUFFIX_SOLR_VERSION.put("htm", "Html_SourceFileBase"); + SUFFIX_SOLR_VERSION.put("css", "Css_SourceFileBase"); + SUFFIX_SOLR_VERSION.put("rpg", "Rpg_SourceFileBase"); + SUFFIX_SOLR_VERSION.put("xml", "Xml_SourceFileBase"); + SUFFIX_SOLR_VERSION.put("pli", "Pli_SourceFileBase");//PL/I +// SUFFIX_SOLR_VERSION.put(".class", "mavenBinaryVersionTree");//class二进制文件 +// SUFFIX_SOLR_VERSION.put(".jar", "mavenBinaryVersionTree");//class二进制文件 +// SUFFIX_SOLR_VERSION.put(".dll", "nugetBinaryVersionTree");//dll二进制文件 + } + public final static Map SUFFIX_BIN = new HashMap(); + + static { + SUFFIX_BIN.put(".class", "java"); + SUFFIX_BIN.put(".dll", "c++"); + } + public final static Map SUFFIX_BIN_SOLR = new HashMap(); + + static { + SUFFIX_BIN_SOLR.put(".class", "mavenBinaryVersionTree"); + SUFFIX_BIN_SOLR.put(".dll", "nugetBinaryVersionTree"); + } + + public final static Map SUFFIX_TFILE = new HashMap(); + + static { + SUFFIX_TFILE.put(".c", "C"); + SUFFIX_TFILE.put(".cc", "C++"); + SUFFIX_TFILE.put(".cpp", "C++"); + SUFFIX_TFILE.put(".cs", "C#"); + } + public final static Map CVE_LEVE = new HashMap(); + + static { + CVE_LEVE.put("HIGH", 2); + CVE_LEVE.put("MEDIUM", 1); + CVE_LEVE.put("LOW", 0); + } + + +// //系统允许的最大执行分析个数 + public static int MAX_ANALYSIS_SIZE = 0; +// +// //系统允许的最大等待个数 + public static int MAX_WAIT_SIZE = 0; + +// //正在执行的任务个数 + public static int IN_PROGRESS_NUM = 0; + + +} diff --git a/src/main/java/com/keyware/composeanalysis/constant/FunctionAndAnalysisAssemblyConst.java b/src/main/java/com/keyware/composeanalysis/constant/FunctionAndAnalysisAssemblyConst.java new file mode 100644 index 0000000..cadaff6 --- /dev/null +++ b/src/main/java/com/keyware/composeanalysis/constant/FunctionAndAnalysisAssemblyConst.java @@ -0,0 +1,26 @@ +package com.keyware.composeanalysis.constant; + + +/** + * @Author liuzongren + * @Date 2024/7/24 + * @Description functon 和 analysis 组件 常用常量 + * @return + **/ +public interface FunctionAndAnalysisAssemblyConst { + + + + /** + * analysis 组件 行级特征提取 + */ + String LINE_EXTRACT= "2"; + + + /** + * analysis 组件 行级特征提取-按每6行 滚动提取 + */ + String LINE_EXTRACT_BY_6_LINE = "1"; + + +} diff --git a/src/main/java/com/keyware/composeanalysis/constant/MongoDBConst.java b/src/main/java/com/keyware/composeanalysis/constant/MongoDBConst.java new file mode 100644 index 0000000..fae747a --- /dev/null +++ b/src/main/java/com/keyware/composeanalysis/constant/MongoDBConst.java @@ -0,0 +1,69 @@ +package com.keyware.composeanalysis.constant; + + +/** + * @Author liuzongren + * @Description mongoDB 数据库 常量 + * @Date 2024/7/23 + * @Param + * @return + **/ +public interface MongoDBConst { + + + /** + * mongodb KEYSWAN 数据库名称 + */ + String DB_NAME_KEYSWAN = "KEYSWAN"; + + + + /** + * mongodb 每个任务数据库前缀 + */ + String DB_NAME_PREFIX = "keyswan_task_"; + + + /** + * mongodb 的 VERSIONBASEDATA 数据表 存储开源项目的MD5等信息 + */ + String TABLE_NAME_VERSIONBASEDATA = "VERSIONBASEDATA"; + + //非32种语言的文件库 + String TABLE_NAME_SOURCE_FILE_BASE = "Other_SourceFileBase"; + + //版本树库 + String VERSION_TREE = "versionTree"; + + /** + * file_data 库 用于存储文件分析的相关信息 + */ + String TABLE_NAME_FILE_DATA = "file_data"; + + /** + * match_open_file 库 用于存储 匹配到的开源项目的信息 + */ + String TABLE_NAME_MATCH_OPEN_FILE = "match_open_file"; + + //mongodb line_data数据库 + String DB_TABLE_NAME_LINE_DATA = "line_data"; + + + // file_data 库 isAnalyze 分析状态字段 + String ANALYSIS_STATUS = "isAnalyze"; + + + // file_data 库 isParent 是否是目录 标识 + String IS_DIR = "isParent"; + + + // file_data 库 assFlag 组件分析是否完成 标识 + String ASS_FLAG = "assFlag"; + + // file_data 库 isSelect 0:完成文件解压,未进行任何提取 1:进入文件级特征提取 2:进入行级特征提取 3:进入成分分析特征提取 + //match_open_file isSelect 0 初始状态 1 代表已经查询过 + //line_hay 0:进入行级特征提取 + String IS_SELECT = "isSelect"; + + +} diff --git a/src/main/java/com/keyware/composeanalysis/constant/RedisConst.java b/src/main/java/com/keyware/composeanalysis/constant/RedisConst.java new file mode 100644 index 0000000..75e512b --- /dev/null +++ b/src/main/java/com/keyware/composeanalysis/constant/RedisConst.java @@ -0,0 +1,20 @@ +package com.keyware.composeanalysis.constant; + +/** + * @author liuzongren + * @date 2024/7/31 + * @description redis key 常量池 + */ +public interface RedisConst { + + /** + * 分析任务,全局分布式锁前缀 + */ + String TASK_LOCK_KEY_PREFIX = "ANALYSIS_TASK_LOCK_ID_%s"; + + + /** + * 分析任务,运行状态前缀 + */ + String TASK_RUNNING_STATUS_KEY_PREFIX = "ANALYSIS_TASK_RUNNING_STATUS_%s"; +} diff --git a/src/main/java/com/keyware/composeanalysis/constant/SolrDBConst.java b/src/main/java/com/keyware/composeanalysis/constant/SolrDBConst.java new file mode 100644 index 0000000..194389b --- /dev/null +++ b/src/main/java/com/keyware/composeanalysis/constant/SolrDBConst.java @@ -0,0 +1,33 @@ +package com.keyware.composeanalysis.constant; + + +/** + * @Author liuzongren + * @Description solrDB 数据库 常量 + * @Date 2024/7/24 + * @Param + * @return + **/ +public interface SolrDBConst { + + + /** + * solr versionTree 数据库名称 , 版本树 存储 开源项目的版本信息以及各个版本的文件目录信息 + */ + String VERSION_TREE = "versionTree"; + + + /** + * solr _SourceFileInfo 数据库后缀 , 开源项目-源代码信息 ,根据源文件MD5值的第一位进行区分 + */ + String CORE_NAME_SUFFIX_SOURCE_FILE_INFO = "_SourceFileInfo"; + + + /** + * solr SourceFileInfoTemp , 存储文件行特征信息 + */ + String CORE_NAME_SOURCE_FILE_INFO_TEMP = "SourceFileInfoTemp"; + + + +} diff --git a/src/main/java/com/keyware/composeanalysis/constant/enums/AnalysisLevelEnum.java b/src/main/java/com/keyware/composeanalysis/constant/enums/AnalysisLevelEnum.java new file mode 100644 index 0000000..f08a065 --- /dev/null +++ b/src/main/java/com/keyware/composeanalysis/constant/enums/AnalysisLevelEnum.java @@ -0,0 +1,48 @@ +package com.keyware.composeanalysis.constant.enums; + + +/** + * 分析等级枚举对象 + */ +public enum AnalysisLevelEnum { + //数据库的类型 还没有进行 更改,后面修改数据库 这里需要进行变更 + + //检测特征级别:0文件,1函数 2:代码块 3:行 + + FILE_LEVEL("文件级", 0), + + FUNCTION_LEVEL("函数级", 1), + + BLOCK_LEVEL("代码块级", 2), + + LINE_LEVEL("行级", 3); + + // 状态 + private String status; + //状态码 + private Integer code; + + AnalysisLevelEnum(String status, Integer code) { + this.status = status; + this.code = code; + } + + public String getStatus() { + return status; + } + + public Integer getCode() { + return code; + } + + public static AnalysisLevelEnum getAnalysisLevelEnum(Integer code){ + for (AnalysisLevelEnum analysisLevelEnum : AnalysisLevelEnum.values()) { + if(analysisLevelEnum.getCode().equals(code)){ + return analysisLevelEnum; + } + } + return null; + } + + +} \ No newline at end of file diff --git a/src/main/java/com/keyware/composeanalysis/constant/enums/AnalysisStatusEnum.java b/src/main/java/com/keyware/composeanalysis/constant/enums/AnalysisStatusEnum.java new file mode 100644 index 0000000..bf87e3c --- /dev/null +++ b/src/main/java/com/keyware/composeanalysis/constant/enums/AnalysisStatusEnum.java @@ -0,0 +1,48 @@ +package com.keyware.composeanalysis.constant.enums; + + +/** + * 分析状态枚举类 + */ + +public enum AnalysisStatusEnum { + + //0:未分析 1:正在分析 2:分析完成 3:暂停分析 4:等待 5:开始分析 6:终止分析 7:分析失败 + + UN_ANALYSIS("未分析",0), + + ANALYSISING("正在分析",1), + + ANALYSIS_DONE("分析完成",2), + + PAUSE_ANALYSIS("暂停分析",3), + + WAIT_ANALYSIS("等待分析",4), + + START_ANALYSIS("开始分析",5), + + STOP_ANALYSIS("终止分析",6), + + FAIL_ANALYSIS("分析失败",7); + + // 状态 + private String status; + //状态码 + private Integer code; + + AnalysisStatusEnum(String status, Integer code) { + this.status = status; + this.code = code; + } + + public String getStatus() { + return status; + } + + public Integer getCode() { + return code; + } + + + +} \ No newline at end of file diff --git a/src/main/java/com/keyware/composeanalysis/constant/enums/FileAnalysisStatusEnum.java b/src/main/java/com/keyware/composeanalysis/constant/enums/FileAnalysisStatusEnum.java new file mode 100644 index 0000000..3b7b7ee --- /dev/null +++ b/src/main/java/com/keyware/composeanalysis/constant/enums/FileAnalysisStatusEnum.java @@ -0,0 +1,42 @@ +package com.keyware.composeanalysis.constant.enums; + + +/** + * 文件分析状态枚举类 + */ + +public enum FileAnalysisStatusEnum { + + //旧的文件分析状态(0:未分析;1:文件级已分析完成 2:文件级 3:行级别特征提取完成 4:分析失败 5:暂停分析 6:成分分析完成) + + UN_START_ANALYSIS("未开始分析",0), + + FILE_ANALYSIS_DONE("文件级已分析完成",1), + + FAILED_ANALYSIS("分析失败",2), + + PAUSE_ANALYSIS("暂停分析",3), + + ANALYSIS_DONE("成分分析完成",4); + + // 状态 + private String status; + //状态码 + private Integer code; + + FileAnalysisStatusEnum(String status, Integer code) { + this.status = status; + this.code = code; + } + + public String getStatus() { + return status; + } + + public Integer getCode() { + return code; + } + + + +} \ No newline at end of file diff --git a/src/main/java/com/keyware/composeanalysis/controller/ComposeAnalysisController.java b/src/main/java/com/keyware/composeanalysis/controller/ComposeAnalysisController.java new file mode 100644 index 0000000..392e99d --- /dev/null +++ b/src/main/java/com/keyware/composeanalysis/controller/ComposeAnalysisController.java @@ -0,0 +1,119 @@ +package com.keyware.composeanalysis.controller; + +import com.keyware.common.constant.RedisConst; +import com.keyware.composeanalysis.api.ComposeAnalysisApi; +import com.keyware.composeanalysis.constant.enums.AnalysisStatusEnum; +import com.keyware.composeanalysis.entity.AnalysisTask; +import com.keyware.composeanalysis.response.AnalysisResp; +import com.keyware.composeanalysis.service.AnalysisTaskService; +import com.keyware.composeanalysis.util.IpUtil; +import jakarta.annotation.Resource; +import lombok.extern.log4j.Log4j2; +import org.redisson.api.RLock; +import org.redisson.api.RedissonClient; +import org.springframework.web.bind.annotation.RestController; + +/** + * @author liuzongren + * @date 2024/7/30 + */ +@Log4j2 +@RestController +public class ComposeAnalysisController implements ComposeAnalysisApi { + + @Resource + private AnalysisTaskService taskService; + + @Resource + private RedissonClient redissonClient; + + @Override + public AnalysisResp startComposeAnalysisTask(String taskId) { + AnalysisResp result = new AnalysisResp(); + result.setNodeIp(IpUtil.getHostIp()); + RLock lock = redissonClient.getLock(String.format(RedisConst.TASK_LOCK_KEY_PREFIX, taskId)); + try { + //执行任务前 首先获取当前任务的锁,防止多节点并发分析同一任务 + if (lock.tryLock()) { + AnalysisTask analysisTask = taskService.getById(taskId); + + //校验任务是否存在 + if (analysisTask == null) { + result.setCode(202); + result.setResponseMsg("当前任务不存在"); + return result; + } + + if (analysisTask.getAnalysisStatus().equals(AnalysisStatusEnum.ANALYSIS_DONE.getCode())) { + result.setCode(202); + result.setResponseMsg("任务已分析完成,如需重新分析,请点击重新分析"); + return result; + } + + //执行成分分析任务 + result.setCode(200); + taskService.doComposeAnalyze(analysisTask); + result.setResponseMsg("任务执行成功"); + } + } catch (Exception e) { + result.setCode(500); + result.setResponseMsg("任务执行失败"); + log.error("任务执行失败", e); + lock.unlock(); + }finally { + lock.unlock(); + } + return result; + } + + @Override + public void stopComposeAnalysisTask(String taskId) { + taskService.stopComposeAnalysisTask(taskId); + } + + @Override + public AnalysisResp restartComposeAnalysisTask(String taskId) { + AnalysisResp result = new AnalysisResp(); + result.setNodeIp(IpUtil.getHostIp()); + AnalysisTask analysisTask = taskService.getById(taskId); + + //校验任务是否存在 + if (analysisTask == null) { + result.setCode(202); + result.setResponseMsg("当前任务不存在"); + return result; + } + + Boolean isRestart = taskService.restartComposeAnalysisTask(taskId); + + if (isRestart) { + result.setCode(200); + result.setResponseMsg("任务重启成功"); + } else { + result.setCode(202); + result.setResponseMsg("任务重启失败"); + } + return result; + } + + @Override + public AnalysisResp recoveryComposeAnalysisTask(String taskId) { + AnalysisResp result = new AnalysisResp(); + result.setNodeIp(IpUtil.getHostIp()); + AnalysisTask analysisTask = taskService.getById(taskId); + + //校验任务是否存在 + if (analysisTask == null) { + result.setCode(202); + result.setResponseMsg("当前任务不存在"); + return result; + } + taskService.recoveryComposeAnalysisTask(analysisTask); + + result.setCode(200); + result.setResponseMsg("任务恢复成功"); + return result; + } + + +} diff --git a/src/main/java/com/keyware/composeanalysis/entity/AnalysisTask.java b/src/main/java/com/keyware/composeanalysis/entity/AnalysisTask.java new file mode 100644 index 0000000..bc09880 --- /dev/null +++ b/src/main/java/com/keyware/composeanalysis/entity/AnalysisTask.java @@ -0,0 +1,157 @@ +package com.keyware.composeanalysis.entity; + +import com.baomidou.mybatisplus.annotation.TableField; +import com.baomidou.mybatisplus.annotation.TableId; +import com.baomidou.mybatisplus.annotation.TableName; +import lombok.AllArgsConstructor; +import lombok.Data; +import lombok.NoArgsConstructor; +import lombok.experimental.Accessors; + +import java.io.Serializable; +import java.util.Date; + +/** + *

+ * 成分分析任务 实体对象 + *

+ * + * @author liuzongren + * @since 2024-07-23 + */ +@Data +@NoArgsConstructor +@AllArgsConstructor +@Accessors(chain = true) +@TableName("analysis_task") +public class AnalysisTask implements Serializable { + + private static final long serialVersionUID = 1L; + + /** + * 逻辑主键ID,UUID + */ + @TableId("id") + private String id; + + /** + * 被测件的名称(上传文件的名称) + */ + @TableField("file_name") + private String fileName; + + /** + * 被测件版本 + */ + @TableField("version") + private String version; + + + /** + * 成分分析等级 + * 检测特征级别:0文件,1函数 2:代码块 3:行 + */ + @TableField("analysis_level") + private Integer analysisLevel; + + /** + * 开源比例阈值,当开源比例超过此阈值,则判断当前文件开源 + */ + @TableField("open_rate_threshold") + private Integer openRateThreshold; + + /** + * 当前被测件是否是开源的 + */ + @TableField("open_type") + private Boolean openType; + + /** + * 被测件的md5值 + */ + @TableField("md5") + private String md5; + + + /** + * 被测件的文件总数 + */ + @TableField("file_count") + private Integer fileCount; + + + /** + * 成分分析的状态 + */ + @TableField("analysis_status") + private Integer analysisStatus; + + /** + * 分析开始时间 + */ + @TableField("analysis_start_time") + private Date analysisStartTime; + + /** + * 分析结束时间 + */ + @TableField("analysis_end_time") + private Date analysisEndTime; + + /** + * 成分分析是否完成(0:未完成 1:分析中 2:已完成) + */ + @TableField("compose_flag") + private Integer composeFlag; + + /** + * 组件分析是否完成(0:未完成 1:分析中 2:已完成) + */ + @TableField("assembly_flag") + private Integer assemblyFlag; + + /** + * 漏洞分析是否完成(0:未完成 1:分析中 2:已完成) + */ + @TableField("hold_flag") + private Integer holdFlag; + + /** + * 许可证分析是否完成(0:未完成 1:分析中 2:已完成) + */ + @TableField("licence_flag") + private Integer licenceFlag; + + /** + * 文件解压缩是否完成(false:未解压 2:true) + */ + @TableField("decompression_flag") + private Boolean decompressionFlag = false; + + /** + * 任务创建时间 + */ + @TableField("create_time") + private Date createTime; + + /** + * 任务创建ID + */ + @TableField("create_user_id") + private String createUserId; + + /** + * 分析总耗时 + * 格式为:时-分-秒 + */ + @TableField(exist = false) + private String analysisUsedTime; + + /** + * 分析进度 + * 100% + */ + @TableField(exist = false) + private String analysisProgress; + +} diff --git a/src/main/java/com/keyware/composeanalysis/mapper/AnalyzeTaskMapper.java b/src/main/java/com/keyware/composeanalysis/mapper/AnalyzeTaskMapper.java new file mode 100644 index 0000000..5106e09 --- /dev/null +++ b/src/main/java/com/keyware/composeanalysis/mapper/AnalyzeTaskMapper.java @@ -0,0 +1,18 @@ +package com.keyware.composeanalysis.mapper; + +import com.baomidou.mybatisplus.core.mapper.BaseMapper; +import com.keyware.composeanalysis.entity.AnalysisTask; +import org.apache.ibatis.annotations.Mapper; +import org.apache.ibatis.annotations.Param; + +import java.io.Serializable; +import java.util.List; + +/** + * @author liuzongren + * @since 2024-07-23 + */ +@Mapper +public interface AnalyzeTaskMapper extends BaseMapper { + +} diff --git a/src/main/java/com/keyware/composeanalysis/mongo/AnalysisLogMongoDto.java b/src/main/java/com/keyware/composeanalysis/mongo/AnalysisLogMongoDto.java new file mode 100644 index 0000000..eec6800 --- /dev/null +++ b/src/main/java/com/keyware/composeanalysis/mongo/AnalysisLogMongoDto.java @@ -0,0 +1,37 @@ +package com.keyware.composeanalysis.mongo; + +import cn.hutool.core.date.DateTime; +import lombok.Data; +import lombok.experimental.Accessors; +import org.springframework.data.annotation.Id; +import org.springframework.data.mongodb.core.mapping.Document; + +import java.io.Serializable; + +/** + * @author liuzongren + * @ClassName AnalysisLogMongoDto + * @description: 分析过程日志记录 + * @datetime 2024年 07月 23日 18:05 + * @version: 1.0 + */ +@Data +@Accessors(chain =true) +@Document(collection = "analysis_log") +public class AnalysisLogMongoDto implements Serializable { + + + @Id + private String id; + + /** + * 日志内容 + */ + private String logInfo; + + /** + * 创建时间 + */ + private DateTime createTime; + +} diff --git a/src/main/java/com/keyware/composeanalysis/mongo/AssemblyMongoDto.java b/src/main/java/com/keyware/composeanalysis/mongo/AssemblyMongoDto.java new file mode 100644 index 0000000..b63b36e --- /dev/null +++ b/src/main/java/com/keyware/composeanalysis/mongo/AssemblyMongoDto.java @@ -0,0 +1,96 @@ +package com.keyware.composeanalysis.mongo; + +import lombok.Data; +import org.springframework.data.annotation.Id; +import org.springframework.data.mongodb.core.mapping.Document; + +import java.util.ArrayList; +import java.util.List; + + +/** + * AssemblyMongoDto 类,用于封装 MongoDB 中的装配数据文档。 + * 此类映射到名为 "assembly_data" 的 MongoDB 集合。 + * + * @author liuzongren + * @date 2024/7/9 + */ +@Document(collection = "assembly_data") +@Data +public class AssemblyMongoDto { + + /** + * MongoDB 文档的唯一标识符。 + */ + @Id + private String id; + + /** + * 项目引用文件的名称。 + */ + private String fileName; + + /** + * 匹配到的开源组件名称。 + */ + private String assemblyName; + + /** + * 匹配到的开源组件版本号。 + */ + private String assemblyVersion; + + /** + * 组件的来源 URL。 + */ + private String url; + + /** + * 开源率,描述组件开源程度的指标。 + */ + private String semblance; + + /** + * 许可证列表,描述组件的许可协议。 + */ + private List license; + + /** + * 任务 ID,与分析任务关联。 + */ + private String testFileId; + + /** + * 文件 ID,与特定文件关联。 + */ + private String fileId; + + /** + * 父节点编号,用于构建树形结构。 + */ + private int parentNode; + + /** + * 节点所在的层次。 + */ + private int layer; + + /** + * 当前节点的编号。 + */ + private int node; + + /** + * 子节点集合,用于构建树形结构。 + */ + private List children = new ArrayList<>(); + + /** + * 添加子节点的方法。 + * + * @param child 要添加的子节点 + */ + public void addChild(AssemblyMongoDto child) { + children.add(child); + } +} \ No newline at end of file diff --git a/src/main/java/com/keyware/composeanalysis/mongo/FileDataMongoDto.java b/src/main/java/com/keyware/composeanalysis/mongo/FileDataMongoDto.java new file mode 100644 index 0000000..20919c9 --- /dev/null +++ b/src/main/java/com/keyware/composeanalysis/mongo/FileDataMongoDto.java @@ -0,0 +1,106 @@ +package com.keyware.composeanalysis.mongo; + +import lombok.Data; +import lombok.experimental.Accessors; +import org.springframework.data.annotation.Id; +import org.springframework.data.mongodb.core.mapping.Document; + +import java.io.Serializable; +import java.util.Date; + +@Document(collection = "file_data") +@Data +@Accessors(chain =true) +public class FileDataMongoDto implements Serializable { + + @Id + private String id; + + //文件名称 + private String name; + + //文件的MD5 + private String md5; + + /** + * 场景1: 文件级别匹配 :成功后 赋值 + */ + //文件特征MD5 + @Deprecated + private String cutMd5; + + //文件大小, 单位是字节 + private Integer fileSize; + + //代码行数 + private Integer codeRowNum; + + //函数个数,目测无用,后期测试删除 + @Deprecated + private Integer funCount; + + //父级目录ID + private String pId; + + //直接获取的文件hash,暂时无用 + @Deprecated + private String sourceMd5; + + //语言 + @Deprecated + private String lang; + + //标识军用代码(1:军用代码;2:民用代码) + @Deprecated + private String militaryType; + + //是否是目录(0:false; 1:true) + private Boolean isDirectory; + + //文件分析状态(0:未分析;1:文件级已分析同时匹配到数据 2:文件级未匹配到需要行级别提取 3:行级别提取完成 4:分析失败 5:暂停分析 6:成分分析完成) + private Integer fileAnalysisStatus; + + //关联的文件ID + @Deprecated + private String testFileId; + + //创建时间 + private Date createTime; + + //文件地址 + private String fileUrl; + + //文件后缀 + private String suffix; + + //组件分析是否完成(0:否 1:是) + private String assFlag; + + //许可证分析是否完成(0:否 1:是) + private String licenceFlag; + + //漏洞分析是否完成(0:否 1:是) + private String holdFlag; + + //文件开源率 + private Float openRate = 0f; + + /** + * 场景1: 文件级别匹配 : 使用 文件特征MD5 或者 源文件md5匹配中 文件匹配成功后,直接设置为文件的总行数 + */ + //文件开源代码行数 + private Integer openLineCount = 0; + + //文件开源类型 (false:自研 true:开源) + private Boolean openType = false; + + //当前文件是否在分析中 + //充当文件分析状态的第二个字段,0:刚刚解压完成 + /** + * 场景1:刚刚解压完成 赋值为 0 + * 场景2: 文件级分析完,但是没有分析到结果,还需要进行下一级的分析,赋值为 1 + */ + @Deprecated + private String isSelect; + +} diff --git a/src/main/java/com/keyware/composeanalysis/mongo/LineDataMongoDto.java b/src/main/java/com/keyware/composeanalysis/mongo/LineDataMongoDto.java new file mode 100644 index 0000000..deb5077 --- /dev/null +++ b/src/main/java/com/keyware/composeanalysis/mongo/LineDataMongoDto.java @@ -0,0 +1,56 @@ +package com.keyware.composeanalysis.mongo; + +import com.keyware.keyswan.common.LineModel; +import lombok.Data; +import lombok.experimental.Accessors; +import org.springframework.data.annotation.Id; +import org.springframework.data.mongodb.core.mapping.Document; + +import java.util.List; + +/** + * @author liuzongren + * @date 2024/7/25 + */ +@Document(collection = "line_data") +@Data +@Accessors(chain =true) +public class LineDataMongoDto { + + /** + * MongoDB 文档的唯一标识符。 + */ + @Id + private String id; + + /** + * 文件ID + */ + private String fileId; + + /** + * 状态 + * 场景1: 新建的情况下赋值为 0 + */ + @Deprecated + private Integer status; + + /** + * 选中状态 ? + * 场景1: 新建的情况下赋值为false + */ + @Deprecated + private Boolean isSelect; + + /** + * 当前文件,行的特征MD5值 + */ + private List lineFeatueMd5s; + + + /** + * 当前文件,代码块的MD5值 + */ + private List lineModels; + +} diff --git a/src/main/java/com/keyware/composeanalysis/mongo/MatchOpenFile.java b/src/main/java/com/keyware/composeanalysis/mongo/MatchOpenFile.java new file mode 100644 index 0000000..cb279e7 --- /dev/null +++ b/src/main/java/com/keyware/composeanalysis/mongo/MatchOpenFile.java @@ -0,0 +1,62 @@ +package com.keyware.composeanalysis.mongo; + +import lombok.Data; +import lombok.experimental.Accessors; +import org.springframework.data.annotation.Id; + + +import java.io.Serializable; +import java.util.List; + +/** + * @author liuzongren + * @ClassName MatchOpenFile + * @description: 匹配的开源文件信息 + * @datetime 2024年 07月 23日 18:05 + * @version: 1.0 + */ +@Data +@Accessors(chain =true) +public class MatchOpenFile implements Serializable { + + //ID + @Id + private String id; + + //开源项目版本名称 + private String version; + + //组件版本id + private String versionId; + + //开源项目名称 + private String pName; + + //开源项目id + private String pId; + + //与被测文件的特征相似度 + private Float featureSimilarity; + + //开源地址 + private String sourceUrl; + + //开源文件的详细路径 + private String sourceFilePath; + + //开源许可协议类型 + private List licenseType; + + //长度 + private Integer fileSize; + + //文件MD5值 + private String md5; + + //分析类型 (0文件,1函数 2:代码块 3:行) + private int analyzeType; + + //文件开源率 + private Float openRate; + +} diff --git a/src/main/java/com/keyware/composeanalysis/mongo/MatchOpenFileMongoDto.java b/src/main/java/com/keyware/composeanalysis/mongo/MatchOpenFileMongoDto.java new file mode 100644 index 0000000..55b663e --- /dev/null +++ b/src/main/java/com/keyware/composeanalysis/mongo/MatchOpenFileMongoDto.java @@ -0,0 +1,58 @@ +package com.keyware.composeanalysis.mongo; + +import lombok.Data; +import lombok.experimental.Accessors; +import org.springframework.data.annotation.Id; +import org.springframework.data.mongodb.core.mapping.Document; + +import java.io.Serializable; +import java.util.List; + +/** + * @author liuzongren + * @ClassName MatchOpenFile + * @description: 文件开源率匹配信息 (成分分析表) + * @datetime 2024年 07月 23日 18:05 + * @version: 1.0 + */ +@Data +@Document(collection = "match_open_file") +@Accessors(chain =true) +public class MatchOpenFileMongoDto implements Serializable { + + @Id + private String id; + + /** + * 文件名称 + */ + private String fileName; + + /** + * 文件路径 + */ + @Deprecated + private String filePath; + + /** + * 当前 文件 是否 开源 false:不开源 true:开源 + */ + private Boolean openType; + + + /** + * 当前文件的开源率 + */ + private float openRate; + + /** + * 特征相似度 + */ + private Float featureSimilarity; + + /** + * 匹配的开源文件信息 + */ + List matchOpenFile; + +} diff --git a/src/main/java/com/keyware/composeanalysis/mongo/MatchOpenProjectMongoDto.java b/src/main/java/com/keyware/composeanalysis/mongo/MatchOpenProjectMongoDto.java new file mode 100644 index 0000000..2104b19 --- /dev/null +++ b/src/main/java/com/keyware/composeanalysis/mongo/MatchOpenProjectMongoDto.java @@ -0,0 +1,50 @@ +package com.keyware.composeanalysis.mongo; + +import lombok.Data; +import lombok.experimental.Accessors; +import org.springframework.data.annotation.Id; +import org.springframework.data.mongodb.core.mapping.Document; + +import java.io.Serializable; +import java.util.List; + +/** + * @author liuzongren + * @ClassName MatchOpenProjectMongoDto + * @description: 匹配的开源项目信息 + * @datetime 2024年 07月 26日 18:05 + * @version: 1.0 + */ +@Data +@Document(collection = "match_open_project") +@Accessors(chain =true) +public class MatchOpenProjectMongoDto implements Serializable { + + @Id + private String id; + + //开源项目版本 + private String version; + + //开源项目id + private String projectId; + + //开源项目名称 + private String projectName; + + //开源项目文件数量 + private Integer projectFileNum; + + //匹配到的开源项目文件数量 + private Integer matchFileNum; + + //开源地址 + private String sourceUrl; + + //匹配到的开源项目文件md5值集合 + private List matchFilesMd5; + + //与开源项目相似度 matchFileNum / projectFileNum + private Double similarity; + +} diff --git a/src/main/java/com/keyware/composeanalysis/mongo/ProjectAssemblyMongoDto.java b/src/main/java/com/keyware/composeanalysis/mongo/ProjectAssemblyMongoDto.java new file mode 100644 index 0000000..de8894c --- /dev/null +++ b/src/main/java/com/keyware/composeanalysis/mongo/ProjectAssemblyMongoDto.java @@ -0,0 +1,75 @@ +package com.keyware.composeanalysis.mongo; + +import lombok.Data; +import lombok.experimental.Accessors; +import org.springframework.data.annotation.Id; +import org.springframework.data.mongodb.core.mapping.Document; + +import java.util.List; + + +/** + * 此类映射到名为 "project_assembly" 的 MongoDB 集合。 + *

+ * 当前项目匹配到的开源项目的信息 + * + * @author liuzongren + * @date 2024/7/9 + */ +@Document(collection = "project_assembly") +@Data +@Accessors(chain = true) +public class ProjectAssemblyMongoDto { + + /** + * MongoDB 文档的唯一标识符。 + */ + @Id + private String id; + + /** + * 项目的文件数量 + */ + private Integer fileCount; + + /** + * 匹配到的开源文件的数量 + */ + private Integer matchFileCount; + + /** + * 匹配到的开源项目版本Id + */ + private String versionId; + + /** + * 匹配到的开源项目版本名称 + */ + private String versionName; + + /** + * 匹配到的开源项目在开源网站的项目序号 + */ + private String projectId; + + /** + * 匹配到的开源项目的名称。 + */ + private String projectName; + + /** + * 开源项目的地址 + */ + private String openSourceUrl; + + /** + * 被测件和当前项目的整体相似度 + */ + private Double semblance; + + /** + * 当前开源项目的开源协议 + */ + private List licenseType; + +} \ No newline at end of file diff --git a/src/main/java/com/keyware/composeanalysis/mongo/ProjectBaseDataMongoDto.java b/src/main/java/com/keyware/composeanalysis/mongo/ProjectBaseDataMongoDto.java new file mode 100644 index 0000000..d20c0df --- /dev/null +++ b/src/main/java/com/keyware/composeanalysis/mongo/ProjectBaseDataMongoDto.java @@ -0,0 +1,75 @@ +package com.keyware.composeanalysis.mongo; + +import lombok.Data; +import org.springframework.data.annotation.Id; +import org.springframework.data.mongodb.core.mapping.Document; +import org.springframework.data.mongodb.core.mapping.Field; + +import java.io.Serializable; + +/** + *

+ * 项目基本信息 + *

+ * + * @author liuzongren + * @since 2024-07-23 + */ +@Data +@Document(collection = "PROJECTBASEDATA") +public class ProjectBaseDataMongoDto implements Serializable { + + private static final long serialVersionUID = 1L; + + @Id + private String id; + + /** + * 项目id + */ + @Field("ID") + private String projectId; + + /** + * 项目名称 + */ + @Field("NAME") + private String name; + + /** + * 项目类型 + */ + @Field("TYPE") + private String type; + + /** + * 当前开源项目被stars 的次数 + */ + @Field("STARS") + private String stars; + + /** + * 项目描述 + */ + @Field("DESCRIBE") + private String describe; + + /** + * 项目URL + */ + @Field("URL") + private String url; + + /** + * 许可类型 + */ + @Field("LICENSETYPE") + private String licenseType; + + /** + * 创建时间 + */ + @Field("CREATE_TIME") + private String createTime; + +} diff --git a/src/main/java/com/keyware/composeanalysis/mongo/VersionbasedataMongoDto.java b/src/main/java/com/keyware/composeanalysis/mongo/VersionbasedataMongoDto.java new file mode 100644 index 0000000..4a12050 --- /dev/null +++ b/src/main/java/com/keyware/composeanalysis/mongo/VersionbasedataMongoDto.java @@ -0,0 +1,75 @@ +package com.keyware.composeanalysis.mongo; + +import lombok.Data; +import lombok.EqualsAndHashCode; +import org.springframework.data.annotation.Id; +import org.springframework.data.mongodb.core.mapping.Document; +import org.springframework.data.mongodb.core.mapping.Field; + +import java.io.Serializable; + +/** + *

+ * 項目的版本信息 + *

+ * + * @author liuzongren + * @since 2024-07-23 + */ +@Data +@EqualsAndHashCode(callSuper = false) +@Document(collection = "VERSIONBASEDATA") +public class VersionbasedataMongoDto implements Serializable { + + private static final long serialVersionUID = 1L; + + /** + * 主键 + */ + @Id + private String id; + + /** + * 版本ID + */ + @Field("ID") + private String versionId; + + /** + * pid 项目ID + */ + @Field("PID") + private String projectId; + + /** + * 版本名称 + */ + @Field("NAME") + private String versionName; + + /** + * 版本下载地址 + */ + @Field("DOWNURL") + private String downloadUrl; + + /** + * 項目的相对路径 + */ + @Field("PATH") + private String path; + + /** + * 创建时间 + */ + @Field("CREATE_TIME") + private String createTime; + + /** + * 版本描述 + */ + @Field("DESCRIBE") + private String description; + + +} diff --git a/src/main/java/com/keyware/composeanalysis/schedule/AnalysisStatusSchedule.java b/src/main/java/com/keyware/composeanalysis/schedule/AnalysisStatusSchedule.java new file mode 100644 index 0000000..8489aa4 --- /dev/null +++ b/src/main/java/com/keyware/composeanalysis/schedule/AnalysisStatusSchedule.java @@ -0,0 +1,68 @@ +//package com.keyware.composeanalysis.schedule; +// +//import cn.hutool.core.date.DateUnit; +//import cn.hutool.core.date.DateUtil; +//import com.baomidou.mybatisplus.core.conditions.query.LambdaQueryWrapper; +//import com.keyware.common.constant.enums.AnalysisStatusEnum; +//import com.keyware.composeanalysis.constant.MongoDBConst; +//import com.keyware.composeanalysis.constant.enums.FileAnalysisStatusEnum; +//import com.keyware.composeanalysis.entity.AnalysisTask; +//import com.keyware.composeanalysis.mongo.FileDataMongoDto; +//import com.keyware.composeanalysis.service.impl.AnalysisTaskServiceImpl; +//import com.keyware.composeanalysis.util.AnalysisLogUtil; +//import com.mongodb.client.MongoClient; +//import jakarta.annotation.Resource; +//import lombok.extern.log4j.Log4j2; +//import org.springframework.context.annotation.Configuration; +//import org.springframework.data.mongodb.core.MongoTemplate; +//import org.springframework.data.mongodb.core.query.Query; +//import org.springframework.scheduling.annotation.EnableScheduling; +//import org.springframework.scheduling.annotation.Scheduled; +// +//import java.util.List; +// +//import static org.springframework.data.mongodb.core.query.Criteria.where; +// +///** +// * 定时检测 分析任务是否完成 +// */ +//@Log4j2 +//@EnableScheduling +//@Configuration +//public class AnalysisStatusSchedule { +// +// @Resource +// private AnalysisTaskServiceImpl taskService; +// +// @Resource +// private MongoClient mongoClient; +// +// /** +// * 定时查询任务库 ,看是否存在已经分析完成的任务,如果存在 变更任务的状态 +// */ +// @Scheduled(cron = "*/1 * * * * ?") // 每五秒钟执行一次 +// public void startTask() { +// //查询正在进行成分分析的任务 +// LambdaQueryWrapper taskQueryWrapper = new LambdaQueryWrapper<>(); +// taskQueryWrapper.eq(AnalysisTask::getAnalysisStatus, AnalysisStatusEnum.ANALYSISING.getCode()); +// taskQueryWrapper.eq(AnalysisTask::getDecompressionFlag,true); +// taskQueryWrapper.eq(AnalysisTask::getComposeFlag,AnalysisStatusEnum.ANALYSISING.getCode()); +// List composeAnalysisTasks = taskService.list(taskQueryWrapper); +// +// //循环遍历任务状态 +// for (AnalysisTask composeTask : composeAnalysisTasks) { +// MongoTemplate mongoTemplate = new MongoTemplate(mongoClient, MongoDBConst.DB_NAME_PREFIX+composeTask.getId()); +// +// Query fileQuery = new Query(where("isDirectory").is(false) +// .and("fileAnalysisStatus").in(FileAnalysisStatusEnum.FAILED_ANALYSIS.getCode(),FileAnalysisStatusEnum.ANALYSIS_DONE.getCode())); +// Long finishedAnalysisFileCount = mongoTemplate.count(fileQuery, FileDataMongoDto.class); +// +// //所有文件分析完毕,将成分分析的状态 更改为已完成 +// if (finishedAnalysisFileCount.intValue() == composeTask.getFileCount()){ +// composeTask.setComposeFlag(AnalysisStatusEnum.ANALYSIS_DONE.getCode()); +// taskService.updateById(composeTask); +// AnalysisLogUtil.insert(mongoTemplate,"成分分析已完成,耗时:"+ DateUtil.between(composeTask.getCreateTime(),DateUtil.date(), DateUnit.SECOND) +"秒"); +// } +// } +// } +//} diff --git a/src/main/java/com/keyware/composeanalysis/service/AnalysisTaskService.java b/src/main/java/com/keyware/composeanalysis/service/AnalysisTaskService.java new file mode 100644 index 0000000..55d40b1 --- /dev/null +++ b/src/main/java/com/keyware/composeanalysis/service/AnalysisTaskService.java @@ -0,0 +1,56 @@ +package com.keyware.composeanalysis.service; + +import com.baomidou.mybatisplus.extension.service.IService; +import com.keyware.composeanalysis.entity.AnalysisTask; +import com.keyware.composeanalysis.response.AnalysisResp; +import org.springframework.web.bind.annotation.GetMapping; +import org.springframework.web.bind.annotation.PathVariable; +import org.springframework.web.bind.annotation.PostMapping; + +/** + *

+ * 服务类 + *

+ * + * @author liuzongren + * @since 2024-07-23 + */ +public interface AnalysisTaskService extends IService { + + /** + * 执行成分分析 + * + * @param analysisTask + */ + void doComposeAnalyze(AnalysisTask analysisTask) throws InterruptedException; + + /** + * 停止或暂停分析任务 + * + * @param taskId 任务id + * @return AnalysisResp 成分分析任务响应 + * author liuzongren + */ + void stopComposeAnalysisTask(String taskId); + + /** + * 重新分析任务, + * + * @param taskId 任务id + * @return AnalysisResp 成分分析任务响应 + * author liuzongren + */ + Boolean restartComposeAnalysisTask(String taskId); + + + /** + * 恢复分析任务 + * + * @param analysisTask 任务 + * @return AnalysisResp 成分分析任务响应 + * author liuzongren + */ + void recoveryComposeAnalysisTask(AnalysisTask analysisTask); + + +} diff --git a/src/main/java/com/keyware/composeanalysis/service/impl/AnalysisTaskServiceImpl.java b/src/main/java/com/keyware/composeanalysis/service/impl/AnalysisTaskServiceImpl.java new file mode 100644 index 0000000..6528706 --- /dev/null +++ b/src/main/java/com/keyware/composeanalysis/service/impl/AnalysisTaskServiceImpl.java @@ -0,0 +1,215 @@ +package com.keyware.composeanalysis.service.impl; + +import cn.hutool.core.date.DateUnit; +import cn.hutool.core.date.DateUtil; +import com.baomidou.mybatisplus.extension.service.impl.ServiceImpl; +import com.keyware.common.constant.RedisConst; +import com.keyware.common.constant.enums.AnalysisStatusEnum; +import com.keyware.composeanalysis.constant.MongoDBConst; +import com.keyware.composeanalysis.constant.enums.FileAnalysisStatusEnum; +import com.keyware.composeanalysis.entity.AnalysisTask; +import com.keyware.composeanalysis.mapper.AnalyzeTaskMapper; +import com.keyware.composeanalysis.mongo.FileDataMongoDto; +import com.keyware.composeanalysis.mongo.MatchOpenFileMongoDto; +import com.keyware.composeanalysis.mongo.ProjectAssemblyMongoDto; +import com.keyware.composeanalysis.service.AnalysisTaskService; +import com.keyware.composeanalysis.task.*; +import com.keyware.composeanalysis.util.AnalysisLogUtil; +import com.keyware.composeanalysis.util.RedisUtil; +import com.keyware.composeanalysis.util.SolrUtils; +import com.mongodb.client.MongoClient; +import jakarta.annotation.Resource; +import lombok.extern.log4j.Log4j2; +import org.apache.commons.collections.CollectionUtils; +import org.springframework.core.task.TaskExecutor; +import org.springframework.data.mongodb.core.MongoTemplate; +import org.springframework.data.mongodb.core.query.Query; +import org.springframework.data.mongodb.core.query.Update; +import org.springframework.scheduling.annotation.Async; +import org.springframework.stereotype.Service; + +import java.math.BigDecimal; +import java.math.RoundingMode; +import java.util.List; +import java.util.concurrent.CountDownLatch; + +import static org.springframework.data.mongodb.core.query.Criteria.where; + + +/** + *

+ * 成分分析服务实现类 + *

+ * + * @author liuzongren + * @since 2024-07-23 + */ +@Log4j2 +@Service +public class AnalysisTaskServiceImpl extends ServiceImpl implements AnalysisTaskService { + + @Resource + private MongoClient mongoClient; + + @Resource + private SolrUtils solrUtils; + + @Resource + private TaskExecutor taskExecutor; + + @Resource + private RedisUtil redisUtil; + + @Override + @Async + public void doComposeAnalyze(AnalysisTask analysisTask) throws InterruptedException { + long startTime = System.currentTimeMillis(); + log.info("开始成份分析,taskName:{}",analysisTask.getFileName()); + //校验文件压缩是否完成 + retryGetDecompressionFlag(analysisTask); + + //开始分析前,将成分分析的状态为 进行中 + analysisTask.setComposeFlag(AnalysisStatusEnum.ANALYSISING.getCode()); + this.updateById(analysisTask); + MongoTemplate mongoTemplate = new MongoTemplate(mongoClient, MongoDBConst.DB_NAME_PREFIX + analysisTask.getId()); + AnalysisLogUtil.insert(mongoTemplate, "【成分分析】开始:" + analysisTask.getFileName()); + + //首先进行项目级别的分析,将所有文件的源MD5批量去solr库中匹配 + PorjectAnalysisTask projectAnalysisTask = new PorjectAnalysisTask(mongoClient, analysisTask, solrUtils, this); + projectAnalysisTask.doAnalysis(); + + //项目级的分析完成后,没有匹配中的文件,根据分析的级别,对每个文件进行相应级别的分析 + analysisFile(mongoTemplate,analysisTask); + + //成份分析完成后,查询所有开源文件,判断当前项目是否开源 + checkProjectIfOpen(mongoTemplate,analysisTask); + + //修改成分分析状态为完成 + analysisTask.setComposeFlag(AnalysisStatusEnum.ANALYSIS_DONE.getCode()); + this.updateById(analysisTask); + + //插入分析日志 + AnalysisLogUtil.insert(mongoTemplate,"【成分分析】已完成,耗时:"+ DateUtil.between(analysisTask.getAnalysisStartTime(),DateUtil.date(), DateUnit.SECOND) +"秒"); + log.info("成份分析完成,taskName:{},耗时:{}",analysisTask.getFileName(),(System.currentTimeMillis()-startTime)/1000 +"秒"); + } + + @Override + public void stopComposeAnalysisTask(String taskId) { + //将成分分析的任务状态的标志位置为暂停,让线程池中的排队的任务队列停止分析 + redisUtil.set(String.format(RedisConst.TASK_RUNNING_STATUS_KEY_PREFIX, taskId), AnalysisStatusEnum.ANALYSIS_PAUSED.getCode()); + } + + @Override + public Boolean restartComposeAnalysisTask(String taskId) { + boolean result = false; + try { + //删除匹配的开源项目信息 + MongoTemplate mongoTemplate = new MongoTemplate(mongoClient, MongoDBConst.DB_NAME_PREFIX + taskId); + mongoTemplate.remove(ProjectAssemblyMongoDto.class); + + //删除项目匹配的开源文件 + mongoTemplate.remove(MatchOpenFileMongoDto.class); + + //将文件分析状态设置为未开始分析 + mongoTemplate.update(FileDataMongoDto.class) + .apply(new Update().set("openType", false) + .set("fileAnalysisStatus", FileAnalysisStatusEnum.UN_START_ANALYSIS.getCode())) + .all(); + + //重新开始分析任务 + doComposeAnalyze(getById(taskId)); + result = true; + } catch (Exception e) { + log.error("重新分析失败", e); + } + return result; + } + + @Override + @Async + public void recoveryComposeAnalysisTask(AnalysisTask analysisTask) { + /** + * todo 这里存在一个逻辑缺陷 + * 项目级别的分析是无法终止的,当前任务恢复恢复的是文件级的成分分析,如果文件级的没有分析完成,这里可能会将所有文件进行文件级别的分析 + */ + try { + //将成分分析的任务状态的标志位置改为进行中 + redisUtil.set(String.format(RedisConst.TASK_RUNNING_STATUS_KEY_PREFIX, analysisTask.getId()), AnalysisStatusEnum.ANALYSISING.getCode()); + + MongoTemplate mongoTemplate = new MongoTemplate(mongoClient, MongoDBConst.DB_NAME_PREFIX + analysisTask.getId()); + //项目级的分析完成后 + Query unAnalyzedFileQuery = new Query(where("fileAnalysisStatus").ne(FileAnalysisStatusEnum.UN_START_ANALYSIS.getCode()) + .and("isDirectory").is(false)); + List unAnalyzedFiles = mongoTemplate.find(unAnalyzedFileQuery, FileDataMongoDto.class); + + if (CollectionUtils.isNotEmpty(unAnalyzedFiles)){ + //使用线程池 并行的分析文件 + CountDownLatch countDownLatch = new CountDownLatch(unAnalyzedFiles.size()); + unAnalyzedFiles.parallelStream().forEach(fileDataMongoDto -> { + IAnalysisTask task = AnalysisTaskFactory.createAnalysisTask(analysisTask, fileDataMongoDto, mongoTemplate, countDownLatch); + taskExecutor.execute(task); + }); + countDownLatch.await(); + //修改成分分析状态为完成 + analysisTask.setComposeFlag(AnalysisStatusEnum.ANALYSIS_DONE.getCode()); + this.updateById(analysisTask); + AnalysisLogUtil.insert(mongoTemplate,"成分分析已完成,耗时:"+ DateUtil.between(analysisTask.getCreateTime(),DateUtil.date(), DateUnit.SECOND) +"秒"); + } + } catch (Exception e) { + log.error("恢复分析失败", e); + } + } + + + //引入解压缩有可能会很慢,这里添加重试机制,最多重试6次,60s + private boolean retryGetDecompressionFlag(AnalysisTask analysisTask) { + int retryCount = 0; + while (retryCount < 60) { + AnalysisTask latestAnalysisTask = this.getById(analysisTask.getId()); + if (latestAnalysisTask.getDecompressionFlag()) { + analysisTask.setDecompressionFlag(true); + analysisTask.setFileCount(latestAnalysisTask.getFileCount()); + return true; + } + try { + Thread.sleep(1000); + } catch (InterruptedException e) { + log.error("线程休眠异常", e); + } + retryCount++; + } + return false; + } + + + //开启单个文件的分析 + private void analysisFile(MongoTemplate mongoTemplate,AnalysisTask analysisTask) throws InterruptedException { + Query unAnalyzedFileQuery = new Query(where("fileAnalysisStatus").is(FileAnalysisStatusEnum.UN_START_ANALYSIS.getCode()) + .and("isDirectory").is(false)); + List unAnalyzedFiles = mongoTemplate.find(unAnalyzedFileQuery, FileDataMongoDto.class); + + //使用线程池 并行的分析 + CountDownLatch countDownLatch = new CountDownLatch(unAnalyzedFiles.size()); + unAnalyzedFiles.parallelStream().forEach(fileDataMongoDto -> { + IAnalysisTask task = AnalysisTaskFactory.createAnalysisTask(analysisTask, fileDataMongoDto, mongoTemplate, countDownLatch); + taskExecutor.execute(task); + }); + countDownLatch.await(); + } + + //校验当前项目是否开源 + private void checkProjectIfOpen(MongoTemplate mongoTemplate,AnalysisTask analysisTask){ + Query openFileQuery = new Query(where("openType").is(true)); + Long openFilesCount = mongoTemplate.count(openFileQuery, FileDataMongoDto.class); + //是否开源阈值 + Integer openThread = analysisTask.getOpenRateThreshold(); + BigDecimal totalFileCount = new BigDecimal(analysisTask.getFileCount()); + //统计开源率 + BigDecimal openRate = new BigDecimal(openFilesCount).divide(totalFileCount, 4, RoundingMode.HALF_UP).multiply(new BigDecimal(100)); + //超过阈值认为开源 + if (openRate.compareTo(new BigDecimal(openThread)) >= 0) { + analysisTask.setOpenType(true); + } + } + +} diff --git a/src/main/java/com/keyware/composeanalysis/solr/VersionTree.java b/src/main/java/com/keyware/composeanalysis/solr/VersionTree.java new file mode 100644 index 0000000..669292a --- /dev/null +++ b/src/main/java/com/keyware/composeanalysis/solr/VersionTree.java @@ -0,0 +1,56 @@ +package com.keyware.composeanalysis.solr; + +import lombok.Data; +import lombok.experimental.Accessors; + +import java.util.Arrays; +import java.util.List; + +/** + * @author liuzongren + * @date 2024/7/26 + * @Description solr 库 VersionTree树信息 ,保存项目的整体信息 + */ +@Data +@Accessors(chain = true) +public class VersionTree { + + /** + * 项目ID + */ + private String proId; + + /** + * 项目名称 + */ + private String proName; + + /** + * 项目编号 + */ + private String versionId; + + /** + * 项目版本 + */ + private String versionName; + + /** + * 项目地址 + */ + private String downUrl; + + public void setLicenseType(String licenseType) { + if (licenseType != null){ + this.licenseType = Arrays.asList(licenseType.split("@@@")); + } + } + + private List licenseType; + + /** + * 项目所有文件的信息 + */ + private List dirTree; + +} diff --git a/src/main/java/com/keyware/composeanalysis/solr/VersionTreeNode.java b/src/main/java/com/keyware/composeanalysis/solr/VersionTreeNode.java new file mode 100644 index 0000000..506e637 --- /dev/null +++ b/src/main/java/com/keyware/composeanalysis/solr/VersionTreeNode.java @@ -0,0 +1,42 @@ +package com.keyware.composeanalysis.solr; + +import lombok.Data; + +/** + * @author liuzongren + * @date 2024/7/26 + */ +@Data +public class VersionTreeNode { + + /** + * 节点编号 + */ + private String id; + + /** + * 父节点编号 + */ + private String pid; + + /** + * 文件名称 + */ + private String name; + + /** + * 是否是父节点 + */ + private Boolean isParent; + + /** + * 文件的MD5 + */ + private String sourceFileMd5; + + /** + * 文件在项目中的相对路径 + */ + private String fullPath; + +} diff --git a/src/main/java/com/keyware/composeanalysis/task/AnalysisTaskFactory.java b/src/main/java/com/keyware/composeanalysis/task/AnalysisTaskFactory.java new file mode 100644 index 0000000..de10f4a --- /dev/null +++ b/src/main/java/com/keyware/composeanalysis/task/AnalysisTaskFactory.java @@ -0,0 +1,45 @@ +package com.keyware.composeanalysis.task; + +import com.keyware.composeanalysis.constant.enums.AnalysisLevelEnum; +import com.keyware.composeanalysis.entity.AnalysisTask; +import com.keyware.composeanalysis.mongo.FileDataMongoDto; +import org.springframework.data.mongodb.core.MongoTemplate; + +import java.util.concurrent.CountDownLatch; + +/** + * @author liuzongren + * @date 2024/7/31 + * @description + */ +public class AnalysisTaskFactory { + + /** + * 根据分析类型,创建具体的分析任务 + * @param analysisTask + * @param analysisFile + * @param mongoTemplate + * @param countDownLatch 任务总数控制器 + * @return + */ + + public static IAnalysisTask createAnalysisTask(AnalysisTask analysisTask, FileDataMongoDto analysisFile, MongoTemplate mongoTemplate, CountDownLatch countDownLatch) { + AnalysisLevelEnum analysisLevel = AnalysisLevelEnum.getAnalysisLevelEnum(analysisTask.getAnalysisLevel()); + switch (analysisLevel) { + case FILE_LEVEL: + return new FileAnalysisTask(analysisTask,analysisFile,mongoTemplate,countDownLatch); + case FUNCTION_LEVEL: + return new FunctionAnalysisTask(analysisTask,analysisFile,mongoTemplate,countDownLatch); + case BLOCK_LEVEL: + return new CodeBlockAnalysisTask(analysisTask,analysisFile,mongoTemplate,countDownLatch); + case LINE_LEVEL: + return new LineAnalysisTask(analysisTask,analysisFile,mongoTemplate,countDownLatch); + default: + break; + } + return null; + } + + + +} diff --git a/src/main/java/com/keyware/composeanalysis/task/CodeBlockAnalysisTask.java b/src/main/java/com/keyware/composeanalysis/task/CodeBlockAnalysisTask.java new file mode 100644 index 0000000..5a20742 --- /dev/null +++ b/src/main/java/com/keyware/composeanalysis/task/CodeBlockAnalysisTask.java @@ -0,0 +1,356 @@ +package com.keyware.composeanalysis.task; + + +import cn.hutool.core.collection.CollectionUtil; +import cn.hutool.core.lang.Pair; +import com.alibaba.fastjson.JSONArray; +import com.keyware.composeanalysis.constant.FixedValue; +import com.keyware.composeanalysis.constant.FunctionAndAnalysisAssemblyConst; +import com.keyware.composeanalysis.constant.RedisConst; +import com.keyware.composeanalysis.constant.SolrDBConst; +import com.keyware.composeanalysis.constant.enums.AnalysisLevelEnum; +import com.keyware.composeanalysis.constant.enums.AnalysisStatusEnum; +import com.keyware.composeanalysis.constant.enums.FileAnalysisStatusEnum; +import com.keyware.composeanalysis.entity.AnalysisTask; +import com.keyware.composeanalysis.mongo.FileDataMongoDto; +import com.keyware.composeanalysis.mongo.LineDataMongoDto; +import com.keyware.composeanalysis.mongo.MatchOpenFile; +import com.keyware.composeanalysis.mongo.MatchOpenFileMongoDto; +import com.keyware.composeanalysis.solr.VersionTree; +import com.keyware.composeanalysis.util.*; +import com.keyware.keyswan.anaysis.Analysis; +import com.keyware.keyswan.anaysis.AnalysisFactory; +import com.keyware.keyswan.common.CodeFile; +import com.keyware.keyswan.common.LineModel; +import com.keyware.utils.IdGenerator; +import lombok.extern.log4j.Log4j2; +import org.apache.commons.lang3.StringUtils; +import org.apache.solr.common.SolrDocument; +import org.apache.solr.common.SolrDocumentList; +import org.springframework.data.mongodb.core.MongoTemplate; +import org.springframework.data.mongodb.core.query.Update; + +import java.math.BigDecimal; +import java.math.RoundingMode; +import java.util.*; +import java.util.concurrent.CountDownLatch; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +import static com.keyware.composeanalysis.util.SimilarityUtil.getOpenRateAndSaveRowNum; +import static org.springframework.data.mongodb.core.query.Criteria.where; + +/** + * @author liuzongren + * @ClassName LineAnalysisTask + * @description: 代码块级别溯源 任务 + * @datetime 2024年 07月 25日 16:19 + * @version: 1.0 + */ + +@Log4j2 +public class CodeBlockAnalysisTask extends IAnalysisTask { + + private MongoTemplate mongoTemplate; + private AnalysisTask analysisTask; + //被测件的文件信息 + private FileDataMongoDto analysisFile; + + private SolrUtils solrUtils; + + private RedisUtil redisUtil; + + private CountDownLatch countDownLatch; + + public CodeBlockAnalysisTask(AnalysisTask analysisTask, FileDataMongoDto analysisFile, MongoTemplate mongoTemplate, CountDownLatch countDownLatch) { + this.mongoTemplate = mongoTemplate; + this.analysisTask = analysisTask; + this.analysisFile = analysisFile; + this.countDownLatch = countDownLatch; + this.solrUtils = SpringContextUtils.getBean(SolrUtils.class); + this.redisUtil = SpringContextUtils.getBean(RedisUtil.class); + } + + /** + * 方法 或者代码块 级别 源代码溯源 + * 当前任务 需要在 文件级分析完成后 进行 + */ + + @Override + public void run() { + //执行任务前,判断一下任务执行的状态 + Object status = redisUtil.get(String.format(RedisConst.TASK_RUNNING_STATUS_KEY_PREFIX, analysisTask.getId())); + if (status != null && (status.equals(AnalysisStatusEnum.STOP_ANALYSIS.getCode()) || status.equals(AnalysisStatusEnum.PAUSE_ANALYSIS.getCode()))) { + log.info("任务已取消,fileName:{}", analysisFile.getName()); + countDownLatch.countDown(); + return; + } + + //获取文件地址 + String filePath = analysisFile.getFileUrl(); + //获取文件名称 + String fileName = analysisFile.getName(); + + try { + LineDataMongoDto lineDataMongoDto = new LineDataMongoDto(); + lineDataMongoDto.setFileId(analysisFile.getId()); + Analysis analysis = AnalysisFactory.getAnalysis(filePath); + //将代码块特征存入MongoDB + //提取文件的代码块信息 + CodeFile codeFile = analysis.analysisFile(filePath, FunctionAndAnalysisAssemblyConst.LINE_EXTRACT, FunctionAndAnalysisAssemblyConst.LINE_EXTRACT_BY_6_LINE); + List lineFeatures = codeFile.getLine_hay(); + + //根据文件后缀判断需要查询的solr特征库库名称 + String featureCoreName = FixedValue.SUFFIX_SOLR_FILE.get(analysisFile.getSuffix()); + + //从solr库中获取特征相似的文件 + SolrDocumentList matchOpenSourceFiles = getFeatureSimilarityFromSolr(featureCoreName, lineFeatures); + + //计算开源率 + doAnalysis(matchOpenSourceFiles, codeFile); + + //更新文件表的分析状态为3 行级特征以分析完毕 + analysisFile.setFileAnalysisStatus(FileAnalysisStatusEnum.ANALYSIS_DONE.getCode()); + mongoTemplate.update(FileDataMongoDto.class) + .matching(where("_id").is(analysisFile.getId())) + .replaceWith(analysisFile) + .findAndReplace(); + + AnalysisLogUtil.insert(mongoTemplate, "【代码块级分析】完成" + fileName); + log.info("文件" + fileName + ":代码块级分析完成"); + } catch (Exception e) { + AnalysisLogUtil.insertErrorInfo(mongoTemplate, "【代码块分析】失败" + fileName, e); + log.error("文件:" + fileName + "代码块级分析失败!", e); + //修改当前文件分析状态未失败 + mongoTemplate.update(FileDataMongoDto.class) + .matching(where("_id").is(analysisFile.getId())) + .apply(new Update().set("fileAnalysisStatus", FileAnalysisStatusEnum.FAILED_ANALYSIS.getCode())) + .first(); + } finally { + countDownLatch.countDown(); + } + } + + + /** + * 根据 特征值 从特征库中检索 具有特征相似的 + * + * @param solrCoreName 检索的solr 库名称 + * @param functionAndCodeBlockInfos + * @return + */ + private SolrDocumentList getFeatureSimilarityFromSolr(String solrCoreName, List functionAndCodeBlockInfos) { + //获取函数获取代码块的特征MD5值 + Set traitLineMd5Arr = functionAndCodeBlockInfos.stream().map(LineModel::getTraitLineMd5).collect(Collectors.toSet()); + Set cuttLineMd5Arr = functionAndCodeBlockInfos.stream().map(LineModel::getCutLineMd5).collect(Collectors.toSet()); + Set queryMd5Arr = Stream.concat(traitLineMd5Arr.stream(), cuttLineMd5Arr.stream()).collect(Collectors.toSet()); + String queryStr = "line_hay:(" + StringUtils.join(queryMd5Arr, " OR ") + ")"; + log.info("查询条件: solrCoreName:{},queryStr:{}", solrCoreName, queryStr); + SolrDocumentList result = solrUtils.query(solrCoreName, queryStr, "sourceMd5,line_hay"); + log.info("查询结果: result:{}", result); + return result; + } + + + /** + * 计算开源率 被测件的开源率 + * + * @param matcheOpenSourceFiles 匹配的开源文件信息 + * @param fileAnalysisRes 被测件的解析结果 + */ + private void doAnalysis(SolrDocumentList matcheOpenSourceFiles, CodeFile fileAnalysisRes) { + + if (CollectionUtil.isEmpty(matcheOpenSourceFiles)) { + return; + } + + //根据文件后缀判断需要查询的文件版本库名称 + String sourceFileBaseCoreName = FixedValue.SUFFIX_SOLR_VERSION.get(analysisFile.getSuffix()); + + + //保存所有匹配的特征代码块MD5信息,方便统计总的匹配行数 + Set matchingTraitLineSet = new HashSet<>(); + + //匹配的特征代码块MD5 + Set matchedLineRowsNum = new HashSet<>(); + + //统计每个文件的开源率 + List matchOpenFilesRes = calculateSimilarityAndOpenRate(matcheOpenSourceFiles, fileAnalysisRes, sourceFileBaseCoreName, matchedLineRowsNum, matchingTraitLineSet); + + //计算文件的总体的特征相似度 + Map traitMd5Map = fileAnalysisRes.getLine_hay().stream().collect(Collectors.toMap(LineModel::getTraitLineMd5, java.util.function.Function.identity())); + + int matchCodeBlockLineCount = 0; + for (String matchFeatureFunctionMd5 : matchingTraitLineSet) { + LineModel lineModel = traitMd5Map.get(matchFeatureFunctionMd5); + matchCodeBlockLineCount += (Integer.valueOf(lineModel.getEndLine()) - Integer.valueOf(lineModel.getStartLine())); + } + + BigDecimal featureSimilarity = new BigDecimal(matchCodeBlockLineCount).divide(new BigDecimal(analysisFile.getCodeRowNum()), 4, RoundingMode.HALF_UP).multiply(new BigDecimal(100)).setScale(2); + + //计算文件的总体开源率 + BigDecimal openRate = new BigDecimal(matchedLineRowsNum.size()).divide(new BigDecimal(analysisFile.getCodeRowNum()), 4, RoundingMode.HALF_UP).multiply(new BigDecimal(100)).setScale(2); + + //获取开源率的阈值 + Integer openRateThreshold = analysisTask.getOpenRateThreshold(); + + //如果开源率大于阈值,则将当前文件设置成开源 + if (openRate.compareTo(new BigDecimal(openRateThreshold)) >= 0) { + analysisFile.setOpenType(true); + } + + //保存当前文件的开源信息到mongo库中 + MatchOpenFileMongoDto matchOpenFileMongo = new MatchOpenFileMongoDto(); + matchOpenFileMongo.setId(IdGenerator.uuid32()) + .setFilePath(analysisFile.getFileUrl()) + .setFileName(analysisFile.getName()) + .setFeatureSimilarity(featureSimilarity.floatValue()) + .setOpenRate(openRate.floatValue()) + .setOpenType(analysisFile.getOpenType()) + .setMatchOpenFile(matchOpenFilesRes); + + mongoTemplate.save(matchOpenFileMongo); + } + + + /** + * 计算当前文件的特征相似度 和 开源率 + * + * @param matchOpenFiles 通过MD5 匹配到的所有开源文件 + * @param sourceFileBaseCoreName 当前文件特征文件的 solr coreName + * @param matchLineRowsNum 所有开源文件匹配到的开源行号列表 + * @param matchFeatureCodeBlockMd5s 所有开源文件匹配到的特征代码块MD5 + */ + private List calculateSimilarityAndOpenRate(SolrDocumentList matchOpenFiles, CodeFile fileAnalysisRes, String sourceFileBaseCoreName, Set matchLineRowsNum, Set matchFeatureCodeBlockMd5s) { + + List matchOpenFilesRes = new ArrayList<>(); + + //首先根据文件的MD5查询开源文件的版本ID,和路径信息 + Set openSourceFileMd5s = matchOpenFiles.stream().map(doc -> (String) doc.get("sourceMd5")).collect(Collectors.toSet()); + Map md5VersionInfoMap = solrUtils.batchQueryVersionIdFromSourceFileBaseBySourceMd5(sourceFileBaseCoreName, openSourceFileMd5s); + + //根据版本ID查询版本的详细信息 + //todo 这里 查询一个版本的信息 需要检索 两个 solr 库 而且还需要检索 versioinTree 后面需要优化 + Set openSourceFileVersionIds = md5VersionInfoMap.values().stream().map(doc -> (String) doc.get("versionId")).collect(Collectors.toSet()); + List versionTrees = solrUtils.queryBatchVersionInfoByVersionIds(openSourceFileVersionIds); + Map versionIdVersionInfoMap = versionTrees.stream().collect(Collectors.toMap(VersionTree::getVersionId, java.util.function.Function.identity())); + + + for (SolrDocument matchFile : matchOpenFiles) { + + //开源文件md5 + String openSourceFileMd5 = matchFile.getFieldValue("sourceMd5").toString(); + + //解析文件的代码块特征值 + List openFileCodeBlockFeatureList = getOpenFileCodeBlockList(matchFile); + + //匹配的总特征行数 + int currentFileMatchFeatureLineCount = 0; + + //遍历当前文件的代码块特征,统计匹配的总行数 + for (LineModel lineModel : fileAnalysisRes.getLine_hay()) { + String traitLineMd5 = lineModel.getTraitLineMd5(); + //村换匹配到的文件的行信息 + for (LineModel matchLine : openFileCodeBlockFeatureList) { + if (traitLineMd5.equals(matchLine.getTraitLineMd5())) { + //计算匹配的特征行数 + currentFileMatchFeatureLineCount += (Integer.valueOf(matchLine.getEndLine()) - Integer.valueOf(matchLine.getStartLine()) + 1); + matchFeatureCodeBlockMd5s.add(traitLineMd5); + } + } + } + + + //根据源文件的MD5确定需要查询源码库的序号 + String openSourceCodeCoreIndex = openSourceFileMd5.substring(0, 1) + SolrDBConst.CORE_NAME_SUFFIX_SOURCE_FILE_INFO; + + //获取开源文件的文本信息 + SolrDocument openSourceContent = solrUtils.queryOne(openSourceCodeCoreIndex, "sourceFileMd5:" + openSourceFileMd5, "sourceContent"); + + //当前文件的开源率 + Pair> openRateAndSaveRowNum = getOpenRateAndSaveRowNum(fileAnalysisRes.getSourceFileContent(), openSourceContent.getFieldValue("sourceContent").toString()); + + //将当前文件匹配的行号,存储到缓存中,方便统计整体的开源率 + matchLineRowsNum.addAll(openRateAndSaveRowNum.getValue()); + + //统计当前文件的特征相似度 + BigDecimal featureSimilarity = new BigDecimal(currentFileMatchFeatureLineCount).divide(fileAnalysisRes.getCodeRowNum(), 4, RoundingMode.HALF_UP).multiply(new BigDecimal(100)).setScale(2); + + SolrDocument openEntries = md5VersionInfoMap.get(openSourceFileMd5); + VersionTree versionInfo = versionIdVersionInfoMap.get(openEntries.get("versionId")); + + //组装当前开源文件的开源项目信息 + MatchOpenFile matchOpenFileInfo = new MatchOpenFile(); + matchOpenFileInfo.setPId(versionInfo.getProId()) + .setPName(versionInfo.getProName()) + .setSourceUrl((String) openEntries.get("fullPath")) + .setFeatureSimilarity(featureSimilarity.floatValue()) + .setOpenRate(openRateAndSaveRowNum.getKey()) + .setVersion(versionInfo.getVersionName()) + .setLicenseType(versionInfo.getLicenseType()) + .setAnalyzeType(AnalysisLevelEnum.BLOCK_LEVEL.getCode()); + matchOpenFilesRes.add(matchOpenFileInfo); + } + return matchOpenFilesRes; + } + + + /** + * 获取当前文件的代码块特征值 + * + * @param openSourceFile + * @return + */ + private List getOpenFileCodeBlockList(SolrDocument openSourceFile) { + //解析文件的代码块特征值 + String lineFeatureMd5s = (String) openSourceFile.get("line_hay"); + lineFeatureMd5s = lineFeatureMd5s.replace("\\", "") + .replace("\"{", "{") + .replace("}\"", "}"); + return JSONArray.parseArray(lineFeatureMd5s, LineModel.class); + } + + + /** + * 将特征值插入到mongo库中 + * + * @param features 特征集合 + * @param lineDataMongoDto 当前分析任务 ,特征信息存储 + * todo 后期 看看有没有插入的必要 + * @param + */ + @Deprecated + private void insertFeatureValue(List features, LineDataMongoDto lineDataMongoDto) { + List batchInsertList = new ArrayList<>(); + if (CollectionUtil.isNotEmpty(features)) { + //这里的批量插入逻辑可以进行校验 + //每10条存一次,解析的数据量如果过大,可能会超过MongoDB数据限制 + int batchInsertStpe = 10; + int total = 0; + for (int i = 0; i < features.size(); i++) { + LineModel lineModel = features.get(i); + if (total != batchInsertStpe) { + batchInsertList.add(lineModel); + total++; + } + if (i == features.size() - 1 && total != batchInsertStpe) { + total = 0; + lineDataMongoDto.setId(IdGenerator.uuid32()) + .setLineModels(batchInsertList); + mongoTemplate.insert(lineDataMongoDto); + } + if (total == batchInsertStpe) { + total = 0; + lineDataMongoDto.setId(IdGenerator.uuid32()) + .setLineModels(batchInsertList); + mongoTemplate.insert(lineDataMongoDto); + batchInsertList.clear(); + } + } + } else { + lineDataMongoDto.setId(IdGenerator.uuid32()); + mongoTemplate.insert(lineDataMongoDto); + } + } + +} diff --git a/src/main/java/com/keyware/composeanalysis/task/FileAnalysisTask.java b/src/main/java/com/keyware/composeanalysis/task/FileAnalysisTask.java new file mode 100644 index 0000000..3a0aa4c --- /dev/null +++ b/src/main/java/com/keyware/composeanalysis/task/FileAnalysisTask.java @@ -0,0 +1,232 @@ +package com.keyware.composeanalysis.task; + +import com.keyware.composeanalysis.constant.FixedValue; +import com.keyware.composeanalysis.constant.RedisConst; +import com.keyware.composeanalysis.constant.SolrDBConst; +import com.keyware.composeanalysis.constant.enums.AnalysisLevelEnum; +import com.keyware.composeanalysis.constant.enums.AnalysisStatusEnum; +import com.keyware.composeanalysis.constant.enums.FileAnalysisStatusEnum; +import com.keyware.composeanalysis.entity.AnalysisTask; +import com.keyware.composeanalysis.mongo.FileDataMongoDto; +import com.keyware.composeanalysis.mongo.MatchOpenFile; +import com.keyware.composeanalysis.mongo.MatchOpenFileMongoDto; +import com.keyware.composeanalysis.solr.VersionTree; +import com.keyware.composeanalysis.util.*; +import com.keyware.keyswan.anaysis.Analysis; +import com.keyware.keyswan.anaysis.AnalysisFactory; +import com.keyware.keyswan.common.CodeFile; +import com.keyware.utils.IdGenerator; +import lombok.extern.log4j.Log4j2; +import org.apache.commons.collections.CollectionUtils; +import org.apache.commons.lang3.StringUtils; +import org.apache.solr.common.SolrDocument; +import org.apache.solr.common.SolrDocumentList; +import org.springframework.data.mongodb.core.MongoTemplate; +import org.springframework.data.mongodb.core.query.Update; + +import java.io.IOException; +import java.math.BigDecimal; +import java.math.RoundingMode; +import java.nio.file.Files; +import java.nio.file.Paths; +import java.util.*; +import java.util.concurrent.CountDownLatch; +import java.util.function.Function; +import java.util.stream.Collectors; + +import static org.springframework.data.mongodb.core.query.Criteria.where; + +/** + * @author liuzongren + * @date 2024/7/23 + * desc 文件级溯源分析任务 + */ +@Log4j2 +public class FileAnalysisTask extends IAnalysisTask { + + private MongoTemplate mongoTemplate; + private AnalysisTask analysisTask; + private SolrUtils solrUtils; + //文件信息 + private FileDataMongoDto analysisFile; + private RedisUtil redisUtil; + private CountDownLatch countDownLatch; + + + public FileAnalysisTask(AnalysisTask analysisTask, FileDataMongoDto analysisFile, MongoTemplate mongoTemplate, CountDownLatch countDownLatch) { + this.mongoTemplate = mongoTemplate; + this.analysisTask = analysisTask; + this.analysisFile = analysisFile; + this.countDownLatch = countDownLatch; + this.solrUtils = SpringContextUtils.getBean(SolrUtils.class); + this.redisUtil = SpringContextUtils.getBean(RedisUtil.class); + } + + + /** + * 文件级溯源分析 + * 当前级别溯源分析 需要在 项目级级分析完成后执行 + * 当前文件源MD5 已经在solr库中匹配不到了,需要提取特征去匹配 + */ + @Override + public void run() { + //执行任务前,判断一下任务执行的状态 + Object status = redisUtil.get(String.format(RedisConst.TASK_RUNNING_STATUS_KEY_PREFIX, analysisTask.getId())); + if (status != null && (status.equals(AnalysisStatusEnum.STOP_ANALYSIS.getCode()) || status.equals(AnalysisStatusEnum.PAUSE_ANALYSIS.getCode()))) { + log.info("任务已取消,fileName:{}", analysisFile.getName()); + countDownLatch.countDown(); + return; + } + //获取当前文件名称 + String fileName = analysisFile.getName(); + + AnalysisLogUtil.insert(mongoTemplate, "【文件级分析】正在分析" + fileName); + try { + //只有主流语言的才能解析 + //非32种主流语言的不能提取文件特征,在文件级MD5匹配的时候,已经做过匹配 + if (StringUtils.isNotEmpty(analysisFile.getSuffix()) && FixedValue.SUFFIX_SOLR_VERSION.containsKey(analysisFile.getSuffix())) { + //根据文件后缀 查询 *_CutFileInfo库名称 + String featureCoreName = FixedValue.SUFFIX_SOLR_FILE.get(analysisFile.getSuffix()); + //根据文件名称,获取文件解析器 + Analysis analysis = AnalysisFactory.getAnalysis(fileName); + //如果 analysis 返回值为null 说明还未支持这种语言的特征提取 可以直接通过文件的MD5值去solr库中匹配 + if (analysis != null) { + //如果文件大小超过3M,则不进行文件级行级特征提取 + Integer fileSize = analysisFile.getFileSize(); + if (fileSize < (3 * 1024 * 1024)) { + CodeFile codeFile = analysis.analysisFile(analysisFile.getFileUrl(), "1", "0"); + //根据文件的特征值,去相应文件文件后缀的特征库中进行查询 + if (codeFile != null) { + String querySb = "sourceMd5:" + codeFile.getSourceMd5() + " OR cutFileMd5:" + codeFile.getCutFileMd5() + " OR traitFileMd5:" + codeFile.getTraitFileMd5(); + SolrDocumentList openSourceFileList = solrUtils.query(featureCoreName, querySb, "sourceMd5"); + //如果当前文件在源码库中,匹配到了数据,则统计当前文件的开源率 + if (CollectionUtils.isNotEmpty(openSourceFileList)) { + ananlyzeFileOpenRate(openSourceFileList); + } + } + } + } + } + //更新文件级分析结果 + analysisFile.setFileAnalysisStatus(FileAnalysisStatusEnum.ANALYSIS_DONE.getCode()); + mongoTemplate.update(FileDataMongoDto.class) + .matching(where("_id").is(analysisFile.getId())) + .replaceWith(analysisFile) + .findAndReplace(); + } catch (Exception e) { + AnalysisLogUtil.insertErrorInfo(mongoTemplate, "【文件级】提取失败" + fileName, e); + log.error("文件:" + fileName + "文件级别特征提取失败!", e); + //将当前文件的分析状态变更为失败 + analysisFile.setFileAnalysisStatus(FileAnalysisStatusEnum.FAILED_ANALYSIS.getCode()); + //更新文件级分析结果 + mongoTemplate.update(FileDataMongoDto.class) + .matching(where("_id").is(analysisFile.getId())) + .apply(new Update().set("fileAnalysisStatus", FileAnalysisStatusEnum.FAILED_ANALYSIS.getCode())) + .first(); + } finally { + countDownLatch.countDown(); + } + } + + + /** + * 分析文件的开源率 + * + * @param fileList 匹配的开源文件信息 + * @throws IOException + */ + private void ananlyzeFileOpenRate(SolrDocumentList fileList) throws IOException { + //创建匹配开源文件信息匹配对象 + MatchOpenFileMongoDto matchOpenFileInfo = new MatchOpenFileMongoDto(); + matchOpenFileInfo.setId(IdGenerator.uuid32()) + .setFileName(analysisFile.getName()) + .setFilePath(analysisFile.getFileUrl()); + + //根据匹配的开源文件的md5 获取版本ID + Set sourceFileMd5 = fileList.stream().map(solrDocument -> (String) solrDocument.get("sourceMd5")).collect(Collectors.toSet()); + String sourceCoreName = FixedValue.SUFFIX_SOLR_VERSION.get(analysisFile.getSuffix()); + Map md5VersionObjMap = solrUtils.batchQueryVersionIdFromSourceFileBaseBySourceMd5(sourceCoreName, sourceFileMd5); + + //根据版本ID获取版本信息 + Set versionIds = md5VersionObjMap.values().stream().map(solrDocument -> (String) solrDocument.get("versionId")).collect(Collectors.toSet()); + List treeInfoList = solrUtils.queryBatchVersionInfoByVersionIds(versionIds); + Map versionIdMap = treeInfoList.stream().collect(Collectors.toMap(VersionTree::getVersionId, Function.identity())); + + //获取被测件文本内容 + String fileContent = new String(Files.readAllBytes(Paths.get(analysisFile.getFileUrl())), "utf-8").replaceAll(" ", ""); + + //将被测件的文本内容拆分成行信息,用于匹配开源信息 + List fileLines = SimilarityUtil.getSplitWords(fileContent); + + HashSet openLineNum = new HashSet<>(); + + //开源文件结果集合 + ArrayList matchOpenFileList = new ArrayList<>(); + //遍历匹配到的开源文件列表 + for (int i = 0; i < fileList.size(); i++) { + String openFileMd5 = (String) fileList.get(i).get("sourceMd5"); + SolrDocument versionObj = md5VersionObjMap.get(openFileMd5); + String versionId = (String) versionObj.get("versionId"); + VersionTree versionInfo = versionIdMap.get(versionId); + if (versionInfo == null) { + log.error("未在versionTree中找到版本信息,openFileMd5:{},versionId:{}",openFileMd5, versionId); + continue; + } + MatchOpenFile matchOpenFile = new MatchOpenFile(); + matchOpenFile.setId(IdGenerator.uuid32()) + .setVersionId(versionId) + .setSourceFilePath((String) versionObj.get("fullPath")) + .setSourceUrl(versionInfo.getDownUrl()) + .setPId(versionInfo.getProId()) + .setPName(versionInfo.getProName()) + .setLicenseType(versionInfo.getLicenseType()) + .setAnalyzeType(AnalysisLevelEnum.FILE_LEVEL.getCode()) + .setVersion(versionInfo.getVersionName()) + .setFeatureSimilarity(100.00f); + //计算被测件和开源文件的文本相似度 + //根据文件的MD5的第一位获取solr库索引名称 + String solrNameIndex =openFileMd5.substring(0, 1) + SolrDBConst.CORE_NAME_SUFFIX_SOURCE_FILE_INFO; + SolrDocumentList sourceFileInfo = solrUtils.query(solrNameIndex, "sourceFileMd5:" + openFileMd5, "sourceContent"); + if (CollectionUtils.isNotEmpty(sourceFileInfo)) { + String openSourceContent = String.valueOf(sourceFileInfo.get(0).getFieldValue("sourceContent")); + //这里存在优化空间,被测件的文件行拆分 可以拿到循环外面 + double similarity = SimilarityUtil.getSimilarityAndSaveRowNum(fileLines, openSourceContent, openLineNum); + matchOpenFile.setOpenRate(new BigDecimal(similarity * 100).setScale(2, RoundingMode.HALF_UP).floatValue()); + //如果找不到源代码,直接将原文开源率置为 100% + } else { + log.error("找不到源代码,DBname:{},sourceFileMd5:{}", solrNameIndex, openFileMd5); + matchOpenFile.setOpenRate(100.00f); + } + matchOpenFile.setMd5(openFileMd5); + matchOpenFileList.add(matchOpenFile); + } + //统计被测件的总体开源率 + //获取开源率阈值,判断当前文件是否开源 + Integer openRateThreshold = analysisTask.getOpenRateThreshold(); + int openLineCount = openLineNum.size(); + BigDecimal totalLineCount = new BigDecimal(fileLines.size()); + BigDecimal openRate = new BigDecimal(openLineCount).divide(totalLineCount, 4, RoundingMode.HALF_UP).multiply(new BigDecimal(100)); + + //超过阈值,则认为当前文件是开源文件 + if (openRate.compareTo(new BigDecimal(openRateThreshold)) > 0) { + analysisFile.setOpenType(true); + } else { + analysisFile.setOpenType(false); + } + + //修改保存测试文件信息 + analysisFile.setOpenLineCount(openLineCount) + .setOpenRate(openRate.floatValue()); + + //组装开源信息 + matchOpenFileInfo.setFilePath(analysisFile.getFileUrl()) + .setOpenType(analysisFile.getOpenType()) + .setOpenRate(analysisFile.getOpenType() ? 100.00f : 0.00f) + .setMatchOpenFile(matchOpenFileList); + + //保存当前开源信息数据 + mongoTemplate.insert(matchOpenFileInfo); + + } + +} diff --git a/src/main/java/com/keyware/composeanalysis/task/FunctionAnalysisTask.java b/src/main/java/com/keyware/composeanalysis/task/FunctionAnalysisTask.java new file mode 100644 index 0000000..100c1d8 --- /dev/null +++ b/src/main/java/com/keyware/composeanalysis/task/FunctionAnalysisTask.java @@ -0,0 +1,409 @@ +package com.keyware.composeanalysis.task; + + +import cn.hutool.core.collection.CollectionUtil; +import cn.hutool.core.lang.Pair; +import cn.hutool.core.util.ObjUtil; +import com.alibaba.fastjson.JSONArray; +import com.keyware.composeanalysis.constant.FixedValue; +import com.keyware.composeanalysis.constant.RedisConst; +import com.keyware.composeanalysis.constant.SolrDBConst; +import com.keyware.composeanalysis.constant.enums.AnalysisLevelEnum; +import com.keyware.composeanalysis.constant.enums.AnalysisStatusEnum; +import com.keyware.composeanalysis.constant.enums.FileAnalysisStatusEnum; +import com.keyware.composeanalysis.entity.AnalysisTask; +import com.keyware.composeanalysis.mongo.FileDataMongoDto; +import com.keyware.composeanalysis.mongo.LineDataMongoDto; +import com.keyware.composeanalysis.mongo.MatchOpenFile; +import com.keyware.composeanalysis.mongo.MatchOpenFileMongoDto; +import com.keyware.composeanalysis.solr.VersionTree; +import com.keyware.composeanalysis.util.*; +import com.keyware.keyswan.common.LineModel; +import com.keyware.keyware.anaysis.Analysis; +import com.keyware.keyware.anaysis.AnalysisFactory; +import com.keyware.keyware.common.CodeFile; +import com.keyware.keyware.common.Function; +import com.keyware.utils.IdGenerator; +import lombok.extern.log4j.Log4j2; +import org.apache.commons.lang3.StringUtils; +import org.apache.solr.common.SolrDocument; +import org.apache.solr.common.SolrDocumentList; +import org.springframework.data.mongodb.core.MongoTemplate; +import org.springframework.data.mongodb.core.query.Update; + +import java.io.FileInputStream; +import java.math.BigDecimal; +import java.math.RoundingMode; +import java.util.*; +import java.util.concurrent.CountDownLatch; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +import static org.springframework.data.mongodb.core.query.Criteria.where; + +/** + * @author liuzongren + * @ClassName LineAnalysisTask + * @description: 函数级别溯源 任务 + * @datetime 2024年 07月 25日 16:19 + * @version: 1.0 + */ + +@Log4j2 +public class FunctionAnalysisTask extends IAnalysisTask { + + private MongoTemplate mongoTemplate; + private AnalysisTask analysisTask; + //被测件的文件信息 + private FileDataMongoDto analysisFile; + + private SolrUtils solrUtils; + + private RedisUtil redisUtil; + + private CountDownLatch countDownLatch; + + + public FunctionAnalysisTask(AnalysisTask analysisTask, FileDataMongoDto analysisFile, MongoTemplate mongoTemplate, CountDownLatch countDownLatch) { + this.mongoTemplate = mongoTemplate; + this.analysisTask = analysisTask; + this.analysisFile = analysisFile; + this.countDownLatch = countDownLatch; + this.solrUtils = SpringContextUtils.getBean(SolrUtils.class); + this.redisUtil = SpringContextUtils.getBean(RedisUtil.class); + } + + /** + * 方法 或者代码块 级别 源代码溯源 + * 当前任务 需要在 文件级分析完成后 进行 + */ + + @Override + public void run() { + //执行任务前,判断一下任务执行的状态 + Object status = redisUtil.get(String.format(RedisConst.TASK_RUNNING_STATUS_KEY_PREFIX, analysisTask.getId())); + if (status != null && (status.equals(AnalysisStatusEnum.STOP_ANALYSIS.getCode()) || status.equals(AnalysisStatusEnum.PAUSE_ANALYSIS.getCode()))) { + log.info("任务已取消,fileName:{}", analysisFile.getName()); + countDownLatch.countDown(); + return; + } + //获取文件地址 + String filePath = analysisFile.getFileUrl(); + //获取文件名称 + String fileName = analysisFile.getName(); + + try { + + //根据文件后缀判断需要查询的solr特征库库名称 + String featureCoreName = FixedValue.SUFFIX_SOLR_FILE.get(analysisFile.getSuffix()); + + //根据文件后缀,去检索sourceFileBase库,来获取文件版本信息 + String sourceFileBaseCoreName = FixedValue.SUFFIX_SOLR_VERSION.get(analysisFile.getSuffix()); + + //根据文件的名称获取函数解析器 + Analysis analysis = AnalysisFactory.getAnalysis(filePath); + //解析文件 + if (!ObjUtil.hasEmpty(featureCoreName, sourceFileBaseCoreName, analysis)) { + CodeFile codeFile = analysis.analysisFile(new FileInputStream(filePath)); + if (codeFile != null) { + List functionList = codeFile.getFunctionList(); + if (CollectionUtil.isNotEmpty(functionList)) { + //获取函数的特征MD5,cutMD5 + List featureFunctionMd5List = functionList.stream().map(Function::getMd5).collect(Collectors.toList()); + List cutFunctionMd5List = functionList.stream().map(Function::getSourceMd5).collect(Collectors.toList()); + Set queryMd5List = Stream.concat(featureFunctionMd5List.stream(), cutFunctionMd5List.stream()).collect(Collectors.toSet()); + String queryStr = "fun_hay:(" + StringUtils.join(queryMd5List, " OR ") + ")"; +// log.info("检索函数特征,coreName:{} ,queryStr:{}", featureCoreName, queryStr); + SolrDocumentList matchOpenFiles = solrUtils.query(featureCoreName, queryStr, "sourceMd5,fun_hay"); +// log.info("resp", sourceMd5); + //如果函数级特征匹配,能够匹配到开源文件信息,则根据开源文件的md5或者开源文件信息,做相似度对比 + if (matchOpenFiles != null) { + //对匹配到的文件进行分析 + doAnalysis(matchOpenFiles, sourceFileBaseCoreName, codeFile); + } else { + //因为函数的特征库较少,这里补充一个对比逻辑,如果当前文件解析失败,或者没有通过函数匹配到数据,则直接通过文件的md5 再次查询一次solr库 + checkByOriginalFileMd5(sourceFileBaseCoreName, analysisFile.getMd5()); + } + } + } + } else { + //因为函数的特征库较少,这里补充一个对比逻辑,如果当前文件解析失败,或者没有通过函数匹配到数据,则直接通过文件的md5 再次查询一次solr库 + checkByOriginalFileMd5(sourceFileBaseCoreName, analysisFile.getMd5()); + } + + //更新文件表的分析状态为3 函数级特征以分析完毕 + analysisFile.setFileAnalysisStatus(FileAnalysisStatusEnum.ANALYSIS_DONE.getCode()); + mongoTemplate.update(FileDataMongoDto.class) + .matching(where("_id").is(analysisFile.getId())) + .replaceWith(analysisFile) + .findAndReplace(); + + AnalysisLogUtil.insert(mongoTemplate, "【函数级分析】完成" + fileName); + log.info("文件" + fileName + ":函数级分析完成"); + } catch (Exception e) { + AnalysisLogUtil.insertErrorInfo(mongoTemplate, "【函数级级分析】失败" + fileName, e); + log.error("文件:" + fileName + "函数级别特征提取失败!", e); + //修改当前文件分析状态未失败 + mongoTemplate.update(FileDataMongoDto.class) + .matching(where("_id").is(analysisFile.getId())) + .apply(new Update().set("fileAnalysisStatus", FileAnalysisStatusEnum.FAILED_ANALYSIS.getCode())) + .first(); + } finally { + countDownLatch.countDown(); + } + } + + + /** + * 对比函数级文本相似度 + * + * @param matchOpenFiles 通过特征匹配到的开源文件的md5 + * @param sourceFileBaseCoreName 查询版开源文件版本ID的 solr库名称 + * @param fileAnalysisRes 被测件的函数解析结果 + * @throws Exception + */ + private void doAnalysis(SolrDocumentList matchOpenFiles, String sourceFileBaseCoreName, CodeFile fileAnalysisRes) throws Exception { + + //按照函数的特征md5进行分组,getter ,setter等方法的 特征值会重复 + Map> featureMd5FunctionMap = fileAnalysisRes.getFunctionList().stream().collect(Collectors.groupingBy(Function::getMd5)); + + //函数代码总函数 + int totalFunctionLineCount = fileAnalysisRes.getFunctionList().stream().mapToInt(Function::getCodeRowNum).sum(); + + //匹配到的特征函数Md5 + Set matchFeatureFunctionMd5s = new HashSet(); + + //匹配到源码的行号 + Set matchOpenLineRowsNum = new HashSet(); + + //计算与每个开源文件的开源率和特征相似度 + List matchOpenFilesRes = calculateSimilarityAndOpenRate(matchOpenFiles, fileAnalysisRes, sourceFileBaseCoreName, matchOpenLineRowsNum, matchFeatureFunctionMd5s); + + //计算文件的总体的特征相似度 + int matchFunctionLineCount = 0; + for (String matchFeatureFunctionMd5 : matchFeatureFunctionMd5s) { + matchFunctionLineCount += featureMd5FunctionMap.get(matchFeatureFunctionMd5).stream().mapToInt(Function::getCodeRowNum).sum(); + } + + BigDecimal featureSimilarity = new BigDecimal(matchFunctionLineCount).divide(new BigDecimal(totalFunctionLineCount), 4, RoundingMode.HALF_UP).multiply(new BigDecimal(100)).setScale(2); + + //计算文件的总体开源率 + BigDecimal openRate = new BigDecimal(matchOpenLineRowsNum.size()).divide(new BigDecimal(analysisFile.getCodeRowNum()), 4, RoundingMode.HALF_UP).multiply(new BigDecimal(100)).setScale(2); + + //获取开源率的阈值 + Integer openRateThreshold = analysisTask.getOpenRateThreshold(); + + //如果开源率大于阈值,则将当前文件设置成开源 + if (openRate.floatValue() > openRateThreshold) { + analysisFile.setOpenType(true); + } + + //保存当前文件的开源信息到mongo库中 + MatchOpenFileMongoDto matchOpenFileMongo = new MatchOpenFileMongoDto(); + matchOpenFileMongo.setId(IdGenerator.uuid32()) + .setFilePath(analysisFile.getFileUrl()) + .setFileName(analysisFile.getName()) + .setFeatureSimilarity(featureSimilarity.floatValue()) + .setOpenRate(openRate.floatValue()) + .setOpenType(analysisFile.getOpenType()) + .setMatchOpenFile(matchOpenFilesRes); + + mongoTemplate.save(matchOpenFileMongo); + } + + + /** + * 计算当前文件的特征相似度 和 开源率 + * + * @param matchOpenFiles 通过MD5 匹配到的所有开源文件 + * @param sourceFileBaseCoreName 当前文件特征文件的 solr coreName + * @param matchLineRowsNum 所有开源文件匹配到的开源行号列表 + * @param matchFeatureFunctionMd5s 所有开源文件匹配到的特征函数MD5 + * return 匹配的开源文件解析后的结果集 + */ + private List calculateSimilarityAndOpenRate(SolrDocumentList matchOpenFiles, CodeFile fileAnalysisRes, String sourceFileBaseCoreName, Set matchLineRowsNum, Set matchFeatureFunctionMd5s) { + + //匹配的开源文件列表 + List matchOpenFilesRes = new ArrayList<>(); + + //按照函数的特征md5进行分组,getter ,setter等方法的 特征值会重复 + Map> featureMd5FunctionMap = fileAnalysisRes.getFunctionList().stream().collect(Collectors.groupingBy(Function::getMd5)); + + //首先根据文件的MD5查询开源文件的版本ID,和路径信息 + Set openSourceFileMd5s = matchOpenFiles.stream().map(doc -> (String) doc.get("sourceMd5")).collect(Collectors.toSet()); + Map md5VersionInfoMap = solrUtils.batchQueryVersionIdFromSourceFileBaseBySourceMd5(sourceFileBaseCoreName, openSourceFileMd5s); + + //根据版本ID查询版本的详细信息 + //todo 这里 查询一个版本的信息 需要检索 两个 solr 库 而且还需要检索 versioinTree 后面需要优化 + Set openSourceFileVersionIds = md5VersionInfoMap.values().stream().map(doc -> (String) doc.get("versionId")).collect(Collectors.toSet()); + List versionTrees = solrUtils.queryBatchVersionInfoByVersionIds(openSourceFileVersionIds); + Map versionIdVersionInfoMap = versionTrees.stream().collect(Collectors.toMap(VersionTree::getVersionId, java.util.function.Function.identity())); + + + //函数总行数 + BigDecimal totalFunctionLineCount = new BigDecimal(fileAnalysisRes.getFunctionList().stream().mapToInt(Function::getCodeRowNum).sum()); + + for (SolrDocument openSourceFile : matchOpenFiles) { + + //开源文件md5 + String openSourceFileMd5 = openSourceFile.getFieldValue("sourceMd5").toString(); + + //解析文件的函数特征值 + List openFileFunctionList = getOpenFileFunctionList(openSourceFile); + + //根据源文件的MD5确定需要查询源码库的序号 + String openSourceCodeCoreIndex = openSourceFileMd5.substring(0, 1) + SolrDBConst.CORE_NAME_SUFFIX_SOURCE_FILE_INFO; + + //获取开源文件的文本信息 + SolrDocument openSourceContent = solrUtils.queryOne(openSourceCodeCoreIndex, "sourceFileMd5:" + openSourceFileMd5, "sourceContent"); + + //当前文件匹配特征函数总行数 + int currentFileMatchFeatureLineCount = 0; + + //当前文件所匹配的特征函数MD5 + Set currentFileMatchFeatureFunctionMd5 = new HashSet(); + + //遍历函数特征MD5 + for (String funFeatureMd5 : featureMd5FunctionMap.keySet()) { + List currentFueatureFunctionList = featureMd5FunctionMap.get(funFeatureMd5); + //源文件的特征函数列表 + for (Function openFunction : openFileFunctionList) { + if (funFeatureMd5.equals(openFunction.getMd5())) { + //每个特征函数 不能多次匹配,影响整体特征相似度 + //匹配成功后,相同的特征行 一并加上 + if (!currentFileMatchFeatureFunctionMd5.contains(funFeatureMd5)) { + currentFileMatchFeatureFunctionMd5.add(funFeatureMd5); + matchFeatureFunctionMd5s.add(funFeatureMd5); + currentFileMatchFeatureLineCount += currentFueatureFunctionList.stream().mapToInt(Function::getCodeRowNum).sum(); + } + } + } + } + + //当前文件的开源率 + Pair> openRateAndSaveRowNum = SimilarityUtil.getOpenRateAndSaveRowNum(new String(fileAnalysisRes.getFileContent()), openSourceContent.getFieldValue("sourceContent").toString()); + //将当前文件匹配的行号,存储到缓存中,方便统计整体的开源率 + matchLineRowsNum.addAll(openRateAndSaveRowNum.getValue()); + + //统计当前文件的特征相似度 + BigDecimal featureSimilarity = new BigDecimal(currentFileMatchFeatureLineCount).divide(totalFunctionLineCount, 4, RoundingMode.HALF_UP).multiply(new BigDecimal(100)).setScale(2); + + SolrDocument openEntries = md5VersionInfoMap.get(openSourceFileMd5); + VersionTree versionInfo = versionIdVersionInfoMap.get(openEntries.get("versionId")); + //组装当前开源文件的开源项目信息 + MatchOpenFile matchOpenFileInfo = new MatchOpenFile(); + matchOpenFileInfo.setPId(versionInfo.getProId()) + .setPName(versionInfo.getProName()) + .setSourceUrl((String) openEntries.get("fullPath")) + .setFeatureSimilarity(featureSimilarity.floatValue()) + .setOpenRate(openRateAndSaveRowNum.getKey()) + .setVersion(versionInfo.getVersionName()) + .setLicenseType(versionInfo.getLicenseType()) + .setAnalyzeType(AnalysisLevelEnum.FUNCTION_LEVEL.getCode()); + matchOpenFilesRes.add(matchOpenFileInfo); + } + return matchOpenFilesRes; + } + + + /** + * 防止函数特征库不全,再次根据文件MD5查询开源文件信息, 做二次校验 + * + * @param originalFileMd5 + * @param versionIdCoreName + */ + private void checkByOriginalFileMd5(String versionIdCoreName, String originalFileMd5) { + + //根据文件的MD5,查询特征库,看当前文件是否在开源代码库中 + SolrDocument versionIdAndPath = solrUtils.queryOne(versionIdCoreName, "sourceFileMd5:" + originalFileMd5, "versionId,fullPath,sourceFileMd5"); + + if (versionIdAndPath != null) { + //根据版本ID查询版本的详细信息 + VersionTree versionInfo = solrUtils.queryVersionInfoByVersionId((String) versionIdAndPath.get("versionId")); + if (versionInfo != null) { + //当前开源文件的开源项目信息 + MatchOpenFile matchOpenFileInfo = new MatchOpenFile(); + matchOpenFileInfo.setPId(versionInfo.getProId()) + .setPName(versionInfo.getProName()) + .setSourceUrl(versionInfo.getDownUrl()) + .setFeatureSimilarity(100.00f) + .setOpenRate(100.00f) + .setAnalyzeType(AnalysisLevelEnum.FUNCTION_LEVEL.getCode()); + + //保存当前文件的开源信息到mongo库中 + MatchOpenFileMongoDto matchOpenFileMongo = new MatchOpenFileMongoDto(); + matchOpenFileMongo.setId(IdGenerator.uuid32()) + .setFilePath(analysisFile.getFileUrl()) + .setFileName(analysisFile.getName()) + .setOpenRate(100.00f) + .setOpenType(analysisFile.getOpenType()) + .setMatchOpenFile(Arrays.asList(matchOpenFileInfo)); + + mongoTemplate.save(matchOpenFileMongo); + } + } + } + + + /** + * 获取当前文件的函数特征值 + * + * @param matchOpenFile + * @return + */ + private List getOpenFileFunctionList(SolrDocument matchOpenFile) { + try { + //解析文件的函数特征值 + String lineFeatureMd5s = matchOpenFile.getFieldValue("fun_hay").toString(); + lineFeatureMd5s = lineFeatureMd5s.replace("\\", "") + .replace("\"{", "{") + .replace("}\"", "}"); + return JSONArray.parseArray(lineFeatureMd5s, Function.class); + }catch (Exception e){ + log.error("解析文件特征值失败",e); + } + return new ArrayList(); + } + + /** + * 将特征值插入到mongo库中 + * + * @param features 特征集合 + * @param lineDataMongoDto 当前分析任务 ,特征信息存储 + * @param + */ + @Deprecated + private void insertFeatureValue(List features, LineDataMongoDto lineDataMongoDto) { + List batchInsertList = new ArrayList<>(); + if (CollectionUtil.isNotEmpty(features)) { + //这里的批量插入逻辑可以进行校验 + //每10条存一次,解析的数据量如果过大,可能会超过MongoDB数据限制 + int batchInsertStpe = 10; + int total = 0; + for (int i = 0; i < features.size(); i++) { + LineModel lineModel = features.get(i); + if (total != batchInsertStpe) { + batchInsertList.add(lineModel); + total++; + } + if (i == features.size() - 1 && total != batchInsertStpe) { + total = 0; + lineDataMongoDto.setId(IdGenerator.uuid32()) + .setLineModels(batchInsertList); + mongoTemplate.insert(lineDataMongoDto); + } + if (total == batchInsertStpe) { + total = 0; + lineDataMongoDto.setId(IdGenerator.uuid32()) + .setLineModels(batchInsertList); + mongoTemplate.insert(lineDataMongoDto); + batchInsertList.clear(); + } + } + } else { + lineDataMongoDto.setId(IdGenerator.uuid32()); + mongoTemplate.insert(lineDataMongoDto); + } + } + +} diff --git a/src/main/java/com/keyware/composeanalysis/task/IAnalysisTask.java b/src/main/java/com/keyware/composeanalysis/task/IAnalysisTask.java new file mode 100644 index 0000000..2c9b2f9 --- /dev/null +++ b/src/main/java/com/keyware/composeanalysis/task/IAnalysisTask.java @@ -0,0 +1,10 @@ +package com.keyware.composeanalysis.task; + +/** + * @author liuzongren + * @date 2024/7/31 + * @description 分析任务抽象接口 + */ +public abstract class IAnalysisTask implements Runnable{ + +} diff --git a/src/main/java/com/keyware/composeanalysis/task/LineAnalysisTask.java b/src/main/java/com/keyware/composeanalysis/task/LineAnalysisTask.java new file mode 100644 index 0000000..ac1bb75 --- /dev/null +++ b/src/main/java/com/keyware/composeanalysis/task/LineAnalysisTask.java @@ -0,0 +1,298 @@ +package com.keyware.composeanalysis.task; + + +import cn.hutool.core.collection.CollectionUtil; +import com.keyware.composeanalysis.constant.FixedValue; +import com.keyware.composeanalysis.constant.FunctionAndAnalysisAssemblyConst; +import com.keyware.composeanalysis.constant.RedisConst; +import com.keyware.composeanalysis.constant.SolrDBConst; +import com.keyware.composeanalysis.constant.enums.AnalysisLevelEnum; +import com.keyware.composeanalysis.constant.enums.AnalysisStatusEnum; +import com.keyware.composeanalysis.constant.enums.FileAnalysisStatusEnum; +import com.keyware.composeanalysis.entity.AnalysisTask; +import com.keyware.composeanalysis.mongo.FileDataMongoDto; +import com.keyware.composeanalysis.mongo.LineDataMongoDto; +import com.keyware.composeanalysis.mongo.MatchOpenFile; +import com.keyware.composeanalysis.mongo.MatchOpenFileMongoDto; +import com.keyware.composeanalysis.solr.VersionTree; +import com.keyware.composeanalysis.util.AnalysisLogUtil; +import com.keyware.composeanalysis.util.RedisUtil; +import com.keyware.composeanalysis.util.SolrUtils; +import com.keyware.composeanalysis.util.SpringContextUtils; +import com.keyware.keyswan.anaysis.Analysis; +import com.keyware.keyswan.anaysis.AnalysisFactory; +import com.keyware.keyswan.common.CodeFile; +import com.keyware.utils.IdGenerator; +import lombok.extern.log4j.Log4j2; +import org.apache.commons.lang3.StringUtils; +import org.apache.solr.common.SolrDocument; +import org.apache.solr.common.SolrDocumentList; +import org.springframework.data.mongodb.core.MongoTemplate; +import org.springframework.data.mongodb.core.query.Update; + +import java.math.BigDecimal; +import java.math.RoundingMode; +import java.util.*; +import java.util.concurrent.CountDownLatch; + +import static org.springframework.data.mongodb.core.query.Criteria.where; + +/** + * @author liuzongren + * @ClassName LineAnalysisTask + * @description: 行级别 特征提取定时任务 + * @datetime 2024年 07月 25日 16:19 + * @version: 1.0 + */ + +@Log4j2 +public class LineAnalysisTask extends IAnalysisTask { + + private MongoTemplate mongoTemplate; + private AnalysisTask analysisTask; + //被测件的文件信息 + private FileDataMongoDto analysisFile; + + private SolrUtils solrUtils; + + private RedisUtil redisUtil; + + private CountDownLatch countDownLatch; + + public LineAnalysisTask(AnalysisTask analysisTask, FileDataMongoDto analysisFile, MongoTemplate mongoTemplate, CountDownLatch countDownLatch) { + this.mongoTemplate = mongoTemplate; + this.analysisTask = analysisTask; + this.analysisFile = analysisFile; + this.countDownLatch = countDownLatch; + this.solrUtils = SpringContextUtils.getBean(SolrUtils.class); + this.redisUtil = SpringContextUtils.getBean(RedisUtil.class); + } + + /** + * 行级别 源代码溯源 + * 当前任务 需要在 文件级分析完成后 进行 + */ + + @Override + public void run() { + //执行任务前,判断一下任务执行的状态 + Object status = redisUtil.get(String.format(RedisConst.TASK_RUNNING_STATUS_KEY_PREFIX, analysisTask.getId())); + if (status != null && (status.equals(AnalysisStatusEnum.STOP_ANALYSIS.getCode()) || status.equals(AnalysisStatusEnum.PAUSE_ANALYSIS.getCode()))) { + log.info("任务已取消,fileName:{}", analysisFile.getName()); + countDownLatch.countDown(); + return; + } + + //获取文件地址 + String filePath = analysisFile.getFileUrl(); + //获取文件名称 + String fileName = analysisFile.getName(); + + AnalysisLogUtil.insert(mongoTemplate, "【行级特征提取】正在提取" + fileName); + try { + LineDataMongoDto lineDataMongoDto = new LineDataMongoDto(); + lineDataMongoDto.setFileId(analysisFile.getId()) + .setStatus(0) + .setIsSelect(false); + Analysis analysis = AnalysisFactory.getAnalysis(filePath); + CodeFile codeFile = null; + + //获取文件行级特征md5 + codeFile = analysis.analysisFile(filePath, FunctionAndAnalysisAssemblyConst.LINE_EXTRACT, FunctionAndAnalysisAssemblyConst.LINE_EXTRACT); + //每一行原内容MD5值集合 +// String cutFileLineMd5 = codeFile.getCutFileLineMd5(); + //每一行特征内容MD5值集合 + String traitFileLineMd5 = codeFile.getTraitFileLineMd5(); + + String[] featureMd5Arr = {}; + if (StringUtils.isNotBlank(traitFileLineMd5)) { + featureMd5Arr = traitFileLineMd5.split(","); + } + List lineFeatures = Arrays.asList(featureMd5Arr); + + //从solr中获取特征相似的 文件 + SolrDocumentList featureSimilarityFromSolr = getFeatureSimilarityFromSolr(lineFeatures); + + //计算文件的开源率 + calculateOpenRate(featureSimilarityFromSolr, lineFeatures); + + //更新文件表的分析状态为3 行级特征以分析完毕 + analysisFile.setFileAnalysisStatus(FileAnalysisStatusEnum.ANALYSIS_DONE.getCode()); + mongoTemplate.update(FileDataMongoDto.class) + .matching(where("_id").is(analysisFile.getId())) + .replaceWith(analysisFile) + .findAndReplace(); + + AnalysisLogUtil.insert(mongoTemplate, "【行级特征提取】提取完成" + fileName); + log.info("文件" + fileName + ":行级分析完成"); + } catch (Exception e) { + AnalysisLogUtil.insertErrorInfo(mongoTemplate, "【行级特征提取】提取失败" + fileName, e); + log.error("文件:" + fileName + "行级别特征提取失败!", e); + //修改当前文件分析状态未失败 + mongoTemplate.update(FileDataMongoDto.class) + .matching(where("_id").is(analysisFile.getId())) + .apply(new Update().set("fileAnalysisStatus", FileAnalysisStatusEnum.FAILED_ANALYSIS.getCode())) + .first(); + } finally { + countDownLatch.countDown(); + } + } + + + /** + * 计算开源率 被测件的开源率 + * + * @param matcheOpenSourceFiles + * @param lineFeatures + */ + private void calculateOpenRate(SolrDocumentList matcheOpenSourceFiles, List lineFeatures) { + + if (CollectionUtil.isEmpty(matcheOpenSourceFiles)) { + return; + } + + //根据文件后缀判断需要查询的文件版本库名称 + String versionIdCoreName = FixedValue.SUFFIX_SOLR_VERSION.get(analysisFile.getSuffix()); + + + //定义结果集对象 + MatchOpenFileMongoDto matchOpenFileMongo = new MatchOpenFileMongoDto(); + matchOpenFileMongo.setId(IdGenerator.uuid32()) + .setFilePath(analysisFile.getFileUrl()) + .setFileName(analysisFile.getName()); + + //开源文件信息保存结果集 + List matchOpenFileInfoList = new ArrayList<>(); + + //保存所有匹配的行数信息,方便统计总的匹配行数 + Set matchingLineSet = new HashSet<>(); + + //获取文件总行数 + BigDecimal totalCodeRowNum = new BigDecimal(analysisFile.getCodeRowNum()); + + //统计每个开源文件和被测件的匹配行数 + for (SolrDocument matchFile : matcheOpenSourceFiles) { + //解析文件的代码块特征值 + String lineFeatureMd5s = (String) matchFile.get("tz_line_hay"); + List matchedLineFeatures = Arrays.asList(lineFeatureMd5s.split(",")); + + //匹配的总行数 + int currentFileMatchLineCount = 0; + + //遍历当前文件的代码块特征,统计匹配的总行数 + for (String originalLineFeatureMd5 : lineFeatures) { + for (String matchLineFeatureMd5 : matchedLineFeatures) { + if (originalLineFeatureMd5.equals(matchLineFeatureMd5)) { + currentFileMatchLineCount++; + matchingLineSet.add(originalLineFeatureMd5); + } + } + } + + //首先根据文件的MD5查询开源文件的版本ID,和路径信息 + SolrDocument versionIdAndPath = solrUtils.queryOne(versionIdCoreName, "sourceFileMd5:" + matchFile.get("sourceMd5"), "versionId,fullPath,sourceFileMd5"); + + //根据版本ID查询版本的详细信息 + //todo 这里 查询一个版本的信息 需要检索 两个 solr 库 而且还需要检索 versioinTree 后面需要优化 + VersionTree versionInfo = solrUtils.queryVersionInfoByVersionId((String) versionIdAndPath.get("versionId")); + + //计算与当前开源文件的开源率 + BigDecimal openRate = new BigDecimal(currentFileMatchLineCount).divide(totalCodeRowNum, 4, RoundingMode.HALF_UP).multiply(new BigDecimal(100)); + + //当前开源文件的开源项目信息 + MatchOpenFile matchOpenFileInfo = new MatchOpenFile(); + matchOpenFileInfo.setPId(versionInfo.getProId()) + .setPName(versionInfo.getProName()) + .setSourceUrl(versionInfo.getDownUrl()) + .setOpenRate(openRate.floatValue()) + .setVersion(versionInfo.getVersionName()) + .setLicenseType(versionInfo.getLicenseType()) + .setAnalyzeType(AnalysisLevelEnum.LINE_LEVEL.getCode()); + matchOpenFileInfoList.add(matchOpenFileInfo); + } + + //统计当前文件的整体开源率 + BigDecimal openRate = new BigDecimal(matchingLineSet.size()).divide(totalCodeRowNum, 4, RoundingMode.HALF_UP).multiply(new BigDecimal(100)); + + //获取开源率的阈值 + Integer openRateThreshold = analysisTask.getOpenRateThreshold(); + + + //如果开源率大于阈值,则将当前文件设置成开源 + if (openRate.compareTo(new BigDecimal(openRateThreshold)) >= 0) { + analysisFile.setOpenType(true); + } + + //保存当前文件的开源信息 + matchOpenFileMongo.setOpenType(analysisFile.getOpenType()) + .setMatchOpenFile(matchOpenFileInfoList); + mongoTemplate.save(matchOpenFileMongo); + + } + + + /** + * 将特征值插入到mongo库中 + * + * @param features 特征集合 + * @param lineDataMongoDto 当前分析任务 ,特征信息存储 + * todo 后期 看看有没有插入的必要 + * @param + */ + @Deprecated + private void insertFeatureValue(String features, LineDataMongoDto lineDataMongoDto) { + String[] featureMd5Arr = {}; + if (StringUtils.isNotBlank(features)) { + featureMd5Arr = features.split(","); + } + List lineFeatures = Arrays.asList(featureMd5Arr); + List batchInsertList = new ArrayList<>(); + if (CollectionUtil.isNotEmpty(lineFeatures)) { + //这里的批量插入逻辑可以进行校验 + //每10条存一次,解析的数据量如果过大,可能会超过MongoDB数据限制 + int batchInsertStpe = 5000; + int total = 0; + for (int i = 0; i < lineFeatures.size(); i++) { + if (total != batchInsertStpe) { + batchInsertList.add(lineFeatures.get(i)); + total++; + } + if (i == lineFeatures.size() - 1 && total != batchInsertStpe) { + total = 0; + lineDataMongoDto.setId(IdGenerator.uuid32()) + .setLineFeatueMd5s(batchInsertList); + mongoTemplate.insert(lineDataMongoDto); + } + if (total == batchInsertStpe) { + total = 0; + lineDataMongoDto.setId(IdGenerator.uuid32()) + .setLineFeatueMd5s(batchInsertList); + mongoTemplate.insert(lineDataMongoDto); + batchInsertList.clear(); + } + } + } else { + lineDataMongoDto.setId(IdGenerator.uuid32()); + mongoTemplate.insert(lineDataMongoDto); + } + } + + + /** + * 根据 特征值 从特征库中检索 具有特征相似的 + * + * @param lineFeatureList 行特征信息 + * @return + */ + private SolrDocumentList getFeatureSimilarityFromSolr(List lineFeatureList) { + String solrCoreName = SolrDBConst.CORE_NAME_SOURCE_FILE_INFO_TEMP; + //拼接行特征查询条件 + String queryStr = "tz_line_hay:(" + StringUtils.join(lineFeatureList, " OR ") + ")"; + log.info("查询条件: solrCoreName:{},queryStr:{}", solrCoreName, queryStr); + SolrDocumentList result = solrUtils.query(solrCoreName, queryStr, "sourceMd5,tz_line_hay"); + log.info("查询结果: result:{}", result); + return result; + } + + +} diff --git a/src/main/java/com/keyware/composeanalysis/task/PorjectAnalysisTask.java b/src/main/java/com/keyware/composeanalysis/task/PorjectAnalysisTask.java new file mode 100644 index 0000000..4692050 --- /dev/null +++ b/src/main/java/com/keyware/composeanalysis/task/PorjectAnalysisTask.java @@ -0,0 +1,378 @@ +package com.keyware.composeanalysis.task; + +import cn.hutool.core.collection.CollectionUtil; +import com.google.common.collect.Sets; +import com.keyware.composeanalysis.constant.FixedValue; +import com.keyware.composeanalysis.constant.MongoDBConst; +import com.keyware.composeanalysis.constant.enums.AnalysisLevelEnum; +import com.keyware.composeanalysis.constant.enums.AnalysisStatusEnum; +import com.keyware.composeanalysis.constant.enums.FileAnalysisStatusEnum; +import com.keyware.composeanalysis.entity.AnalysisTask; +import com.keyware.composeanalysis.mongo.*; +import com.keyware.composeanalysis.service.impl.AnalysisTaskServiceImpl; +import com.keyware.composeanalysis.solr.VersionTree; +import com.keyware.composeanalysis.solr.VersionTreeNode; +import com.keyware.composeanalysis.util.AnalysisLogUtil; +import com.keyware.composeanalysis.util.SolrUtils; +import com.keyware.composeanalysis.util.SpringContextUtils; +import com.keyware.utils.IdGenerator; +import com.mongodb.client.MongoClient; +import lombok.extern.log4j.Log4j2; +import org.apache.commons.collections.CollectionUtils; +import org.apache.commons.lang3.StringUtils; +import org.apache.solr.common.SolrDocument; +import org.springframework.core.task.TaskExecutor; +import org.springframework.data.mongodb.core.MongoTemplate; +import org.springframework.data.mongodb.core.query.Query; +import org.springframework.data.mongodb.core.query.Update; + +import java.math.BigDecimal; +import java.math.RoundingMode; +import java.util.*; +import java.util.function.Function; +import java.util.stream.Collectors; + + +import static org.springframework.data.mongodb.core.query.Criteria.where; + +/** + * @author liuzongren + * @date 2024/7/26 + * desc 项目级溯源分析任务,先将所有文件进行项目级匹配,匹配不中的文件在进行细致级别的匹配 + * 项目级匹配前,需要完成文件解压工作 + */ +@Log4j2 +public class PorjectAnalysisTask { + private MongoTemplate mongoTemplate; + private MongoTemplate keyswanDBTemplate; + private AnalysisTask analysisTask; + private AnalysisTaskServiceImpl analysisService; + private SolrUtils solrUtils; + private TaskExecutor taskExecutor; + + /** + * 项目级分析 + * + * @param mongoClient + * @param analysisTask + * @param solrUtils + * @param analysisService + */ + public PorjectAnalysisTask(MongoClient mongoClient, AnalysisTask analysisTask, SolrUtils solrUtils, AnalysisTaskServiceImpl analysisService) { + this.analysisService = analysisService; + keyswanDBTemplate = new MongoTemplate(mongoClient, MongoDBConst.DB_NAME_KEYSWAN); + this.mongoTemplate = new MongoTemplate(mongoClient, MongoDBConst.DB_NAME_PREFIX + analysisTask.getId()); + this.analysisTask = analysisTask; + this.solrUtils = solrUtils; + this.taskExecutor = SpringContextUtils.getBean(TaskExecutor.class); + } + + + public void doAnalysis() { + try { + long startTime = System.currentTimeMillis(); + //首先从versionbasedata库中匹配当前被测件 + Boolean matchedPrject = matchByProjectMd5(); + + //从versionbase 中整体匹配不到项目信息, 拿项目的所有文件去匹配 solr库的versionTree去检索 + if (!matchedPrject) { + List unMatchedFiles = matchByAllFilesMd5(); + + //剩余没有匹配文件,用文件的md5去匹配solr库的versionTree + if (CollectionUtils.isNotEmpty(unMatchedFiles)) { + matchByFileMd5s(unMatchedFiles); + } + } + //todo 如果整体耗时较长,將matchOpenFileInfo存储到数据库的逻辑修改成异步的 + log.info("项目级分析完成,用时:" + (System.currentTimeMillis() - startTime) / 1000 + "s"); + } catch (Exception e) { + AnalysisLogUtil.insert(mongoTemplate, "成分分析失败:" + e.getStackTrace()); + log.error("项目级分析失败,项目名称:" + analysisTask.getFileName(), e); + analysisTask.setAnalysisStatus(AnalysisStatusEnum.FAIL_ANALYSIS.getCode()); + analysisService.updateById(analysisTask); + } + } + + //项目整体匹配, 查看整个项目是否开源 + private Boolean matchByProjectMd5() { + //判断当前项目整体是否开源,去版本库中匹配 + Query versionBaseQuery = new Query(where("MD5").is(analysisTask.getMd5())); + VersionbasedataMongoDto openSourceProject = keyswanDBTemplate.findOne(versionBaseQuery, VersionbasedataMongoDto.class); + //如果匹配中了开源项目,则将状态设置为开源,并将结果存储到数据库中 + if (openSourceProject != null) { + analysisTask.setOpenType(true); + //将匹配中的开源项目信息存入当前文件开源信息中 + ProjectAssemblyMongoDto projectAssembly = new ProjectAssemblyMongoDto(); + projectAssembly.setId(IdGenerator.uuid32()) + .setFileCount(analysisTask.getFileCount()) + .setMatchFileCount(analysisTask.getFileCount()) + .setProjectId(openSourceProject.getProjectId()) + .setVersionId(openSourceProject.getVersionId()) + .setVersionName(openSourceProject.getVersionName()) + .setSemblance(100.00d) + .setOpenSourceUrl(openSourceProject.getDownloadUrl()); + //根据版本信息去查询项目名称和许可证信息 + Query projectBaseQuery = new Query(where("ID").is(openSourceProject.getProjectId())); + ProjectBaseDataMongoDto projectbasedata = keyswanDBTemplate.findOne(projectBaseQuery, ProjectBaseDataMongoDto.class); + if (projectbasedata != null) { + projectAssembly.setProjectName(projectbasedata.getName()); + if (StringUtils.isNotEmpty(projectbasedata.getLicenseType())) { + projectAssembly.setLicenseType(Arrays.asList(projectbasedata.getLicenseType())); + } + } + //当前文件开源信息存入数据库中 + mongoTemplate.insert(projectAssembly); + + analysisService.updateById(analysisTask); + + //更新文件分析的状态 + mongoTemplate.update(FileDataMongoDto.class) + .matching(where("isDirectory").is(false)) + .apply(new Update().set("openType", true) + .set("openRate", 100.00d) + .set("fileAnalysisStatus", FileAnalysisStatusEnum.ANALYSIS_DONE.getCode())) + .all(); + + //保存具体开源文件信息 + VersionTree openProjectList = solrUtils.queryVersionTreeByVersionId(openSourceProject.getVersionId()); + Query fileQuery = new Query(where("isDirectory").is(false)); + List fileDataMongoDtos = mongoTemplate.find(fileQuery, FileDataMongoDto.class); + saveProjectOpenInfo(openProjectList, fileDataMongoDtos); + return true; + } + return false; + } + + //通过文件的md5 去匹配开源项目 + private List matchByAllFilesMd5() { + //检索当前项目的所有文件 + Query query = new Query(where("isDirectory").is(false)); + List projectFiles = mongoTemplate.query(FileDataMongoDto.class).matching(query).all(); + + //分多次拿所有文件匹配solr库 + Set matchedFileMd5Set = multipleMatchByAllFilesMd5(projectFiles); + + //统计未匹配的文件 + List unMatchedFiles = projectFiles.stream().filter(file -> !matchedFileMd5Set.contains(file.getMd5())).collect(Collectors.toList()); + return unMatchedFiles; + } + + //通过文件的md5去特征库匹配 + private void matchByFileMd5s(List unMatchedFiles) { + + //将文件按照后缀分组,方便查询solr库 + Map> allSuffixFiles = unMatchedFiles.stream().filter(file -> StringUtils.isNotEmpty(file.getSuffix())).collect(Collectors.groupingBy(FileDataMongoDto::getSuffix)); + + //统计语言的文件 + List otherLanguageFiles = new ArrayList<>(); + + //遍历主流32语言 + allSuffixFiles.forEach((suffix, data) -> { + //根据文件后缀名获取特征库名称 + if (FixedValue.SUFFIX_SOLR_VERSION.containsKey(suffix)) { + String currentCoreName = FixedValue.SUFFIX_SOLR_VERSION.get(suffix); + //通过md5去*_SourceFileBase中匹配版本Id + Set fileMd5s = data.stream().map(FileDataMongoDto::getMd5).collect(Collectors.toSet()); + Map md5VersionObjMap = solrUtils.batchQueryVersionIdFromSourceFileBaseBySourceMd5(currentCoreName, fileMd5s); + if (CollectionUtil.isEmpty(md5VersionObjMap)) { + return; + } + //保存结果数据 + saveMatchOpenFileInfo(md5VersionObjMap, data); + } else { + //非主流语言的,没有单独的特征库,统一到默认的特征库进行检索 + otherLanguageFiles.addAll(data); + } + }); + + //将无后缀的文件 归纳于 处理非32种语言的文件 + List noSuffixFiles = unMatchedFiles.stream().parallel().filter(file -> StringUtils.isEmpty(file.getSuffix())).collect(Collectors.toList()); + otherLanguageFiles.addAll(noSuffixFiles); + + if (CollectionUtils.isNotEmpty(otherLanguageFiles)) { + //非32种语言的会分2种MD5 + //暂时忽略字符流md5的匹配,因为大部分都是一样的 + Set fileMd5s = otherLanguageFiles.stream().map(FileDataMongoDto::getMd5).collect(Collectors.toSet()); + Map md5VersionIdMap = solrUtils.batchQueryVersionIdFromSourceFileBaseBySourceMd5(MongoDBConst.TABLE_NAME_SOURCE_FILE_BASE, fileMd5s); + if (md5VersionIdMap == null || md5VersionIdMap.isEmpty()) { + //如果没有匹配到,直接更新文件分析状态已完成,因为非32种语言的文件,无法进行解析,通过源文件的MD5匹配不到,就匹配不到了,无需进行下一步的匹配 + updateFileAnalysisStatus(fileMd5s); + return; + } + saveMatchOpenFileInfo(md5VersionIdMap, otherLanguageFiles); + //直接更改没有匹配的文件分析状态 + updateFileAnalysisStatus(Sets.difference(fileMd5s, md5VersionIdMap.keySet())); + } + + } + + + //匹配到开源文件后,补充详细信息然后保存到mongo中 + private void saveMatchOpenFileInfo(Map md5VersionIdMap, List originalFiles) { + List batchInsertCache = new ArrayList<>(); + //根据版本id查询版本的详细信息 + //todo 这段逻辑如果耗时的话,可以异步处理 补充文件的版本信息 + Set versionIds = md5VersionIdMap.values().stream().map(doc->(String)doc.get("versionId")).collect(Collectors.toSet()); + List versionInfos = solrUtils.queryBatchVersionInfoByVersionIds(versionIds); + Map versionTreeMap = versionInfos.stream().collect(Collectors.toMap(VersionTree::getVersionId, Function.identity(), (key1, key2) -> key1)); + Map fileMd5ObjMap = originalFiles.stream().collect(Collectors.toMap(FileDataMongoDto::getMd5, Function.identity(), (key1, key2) -> key1)); + md5VersionIdMap.forEach((fileMd5, versionObj) -> { + String versionId = (String) versionObj.get("versionId"); + VersionTree versionInfo = versionTreeMap.get(versionId); + if (versionInfo == null){ + log.error("根据versionId,未在versionTree中找到版本信息,fileMd5:{},versionId:{}",fileMd5, versionId); + return; + } + FileDataMongoDto fileDataMongoDto = fileMd5ObjMap.get(fileMd5); + MatchOpenFileMongoDto matchOpenFile = getMatchOpenFile(versionInfo, fileDataMongoDto, versionObj.get("fullPath").toString()); + batchInsertCache.add(matchOpenFile); + }); + + if (CollectionUtils.isNotEmpty(batchInsertCache)) { + mongoTemplate.insert(batchInsertCache, MatchOpenFileMongoDto.class); + //更新文件分析的状态 + updateFileAnalysisStatus(md5VersionIdMap.keySet()); + } + } + + + //匹配到开源项目后,保存各个文件的开源信息 + private void saveProjectOpenInfo(VersionTree versionInfo, List originalFiles) { + Map originalMd5ObjMap = originalFiles.stream().collect(Collectors.toMap(FileDataMongoDto::getMd5, Function.identity(), (key1, key2) -> key1)); + Set matchedMd5s = new HashSet<>(); + List batchInsertCache = new ArrayList<>(); + List fileInfos = versionInfo.getDirTree(); + + fileInfos.forEach(versionTreeNodeObj->{ + String openFileMd5 = versionTreeNodeObj.getSourceFileMd5(); + //看是否和被测件的md5匹配 + if (originalMd5ObjMap.keySet().contains(openFileMd5)) { + //匹配的文件只保存一次 + if (!matchedMd5s.contains(openFileMd5)) { + MatchOpenFileMongoDto matchOpenFile = getMatchOpenFile(versionInfo, originalMd5ObjMap.get(openFileMd5),versionTreeNodeObj.getFullPath()); + batchInsertCache.add(matchOpenFile); + matchedMd5s.add(openFileMd5); + } + } + //分批保存,防止单个项目太大,撑爆内存 或 超过 mongodb最大插入数 + if (batchInsertCache.size() >= 1000) { + mongoTemplate.insert(batchInsertCache, MatchOpenFileMongoDto.class); + batchInsertCache.clear(); + } + }); + + if (batchInsertCache.size() != 0) { + mongoTemplate.insert(batchInsertCache, MatchOpenFileMongoDto.class); + } + + //更新文件分析的状态 + updateFileAnalysisStatus(matchedMd5s); + } + + //获取匹配到的开源文件信息 + private MatchOpenFileMongoDto getMatchOpenFile(VersionTree versionInfo, FileDataMongoDto originalFile,String openFilePath) { + //设置匹配文件的信息 + MatchOpenFile matchOpenFile = new MatchOpenFile(); + matchOpenFile.setId(IdGenerator.uuid32()) + .setVersionId(versionInfo.getVersionId()) + .setSourceFilePath(openFilePath) + .setSourceUrl(versionInfo.getDownUrl()) + .setPId(versionInfo.getProId()) + .setPName(versionInfo.getProName()) + .setLicenseType(versionInfo.getLicenseType()) + .setAnalyzeType(AnalysisLevelEnum.FILE_LEVEL.getCode()) + .setVersion(versionInfo.getVersionName()) + .setFeatureSimilarity(100.00f) + .setOpenRate(100.00f); + + //创建当前文件与开源代码的匹配信息 + MatchOpenFileMongoDto matchOpenFileInfo = new MatchOpenFileMongoDto(); + matchOpenFileInfo.setId(IdGenerator.uuid32()) + .setFileName(originalFile.getName()) + .setFilePath(originalFile.getFileUrl()) + .setOpenType(originalFile.getOpenType()) + .setFeatureSimilarity(100.00f) + .setOpenRate(100.00f) + .setMatchOpenFile(Arrays.asList(matchOpenFile)); + return matchOpenFileInfo; + } + + //匹配拿所有文件的md5去versionTree中,需要分多次匹配,单次匹配多个结果集的话,会导致solr响应长时间阻塞 + //多次匹配,每次匹配上一次未匹配种的文件 + //todo 这里需要设置一个阈值,一共匹配多少次,或者当相似度达到多少的时候,停止整体匹配 + //目前默认查询三次 + private Set multipleMatchByAllFilesMd5(List projectFiles) { + + //获取被测件所有文件的md5 + Set projectFilesMd5 = projectFiles.stream().map(FileDataMongoDto::getMd5).collect(Collectors.toSet()); + + //匹配到的文件md5存入set中 + Set matchedFileMd5Set = new HashSet<>(); + + //剩余未匹配中的文件md5 + Set unMatchedFileMd5s = projectFilesMd5; + + //循环匹配5次,进行整体的文件匹配 + for (int i = 0; i < 5; i++){ + //检索versionTree库 + String queryStr = "dirTree:(" + StringUtils.join(unMatchedFileMd5s, " OR ") + ")"; + log.info("versionTree queryStr: " + queryStr); + long startTime = System.currentTimeMillis(); + VersionTree openProject = solrUtils.queryVersionTree(queryStr); + log.info("query versionTree cost:{}s", (System.currentTimeMillis() - startTime) / 1000); + //如果存在没有匹配到开源数据的情况,直接退出循环匹配 + if (openProject == null){ + break; + } + + //异步保存匹配的开源文件信息 + taskExecutor.execute(() -> saveProjectOpenInfo(openProject, projectFiles)); + + //获取开源项目的所有文件md5集合 + List openFilesMd5 = openProject.getDirTree().stream().map(VersionTreeNode::getSourceFileMd5).collect(Collectors.toList()); + //获取被测件和开源项目相同的文件 + Set matchedFiles = unMatchedFileMd5s.stream().filter(item -> openFilesMd5.contains(item)).collect(Collectors.toSet()); + + //保存已匹配的文件md5,后续需要统计整体的开源率 + matchedFileMd5Set.addAll(matchedFiles); + + //计算与当前项目的相似度 + BigDecimal semblance = new BigDecimal(matchedFiles.size()).divide(new BigDecimal(projectFilesMd5.size()), 4, RoundingMode.HALF_UP).multiply(new BigDecimal(100)); + + //当相似度小于30%,不保存项目级的信息 + if (semblance.compareTo(new BigDecimal(30)) < 0){ + break; + } + + ProjectAssemblyMongoDto projectAssembly = new ProjectAssemblyMongoDto(); + projectAssembly.setId(IdGenerator.uuid32()) + .setFileCount(openFilesMd5.size()) + .setMatchFileCount(matchedFiles.size()) + .setProjectId(openProject.getProId()) + .setProjectName(openProject.getProName()) + .setVersionName(openProject.getVersionName()) + .setOpenSourceUrl(openProject.getDownUrl()) + .setSemblance(semblance.doubleValue()); + mongoTemplate.insert(projectAssembly); + + //获取未匹配中的文件md5,更新下次匹配的md5集合 + unMatchedFileMd5s = Sets.difference(unMatchedFileMd5s, matchedFiles); + //如果没有剩余未匹配文件,退出整体匹配 + if (CollectionUtils.isEmpty(unMatchedFileMd5s) ) { + break; + } + } + return matchedFileMd5Set; + } + + //更新文件分析的状态 + private void updateFileAnalysisStatus(Set fileMd5Set) { + mongoTemplate.update(FileDataMongoDto.class) + .matching(where("md5").in(fileMd5Set)) + .apply(new Update().set("openType", true) + .set("openRate", 100.00f) + .set("fileAnalysisStatus", FileAnalysisStatusEnum.ANALYSIS_DONE.getCode())) + .all(); + } + +} diff --git a/src/main/java/com/keyware/composeanalysis/util/AnalysisLogUtil.java b/src/main/java/com/keyware/composeanalysis/util/AnalysisLogUtil.java new file mode 100644 index 0000000..bff61e7 --- /dev/null +++ b/src/main/java/com/keyware/composeanalysis/util/AnalysisLogUtil.java @@ -0,0 +1,33 @@ +package com.keyware.composeanalysis.util; + +import cn.hutool.core.date.DateTime; +import com.keyware.composeanalysis.mongo.AnalysisLogMongoDto; +import org.springframework.data.mongodb.core.MongoTemplate; +import org.springframework.scheduling.annotation.Async; + +import java.io.PrintWriter; +import java.io.StringWriter; + +/** + * @author liuzongren + * @date 2024/7/30 + * @description 分析日志工具 + */ +public class AnalysisLogUtil { + + public static void insert(MongoTemplate mongoTemplate,String logInfo) { + mongoTemplate.insert(new AnalysisLogMongoDto().setLogInfo(logInfo).setCreateTime(new DateTime())); + } + + public static void insertErrorInfo(MongoTemplate mongoTemplate, String logInfo, Exception e) { + mongoTemplate.insert(new AnalysisLogMongoDto().setLogInfo(logInfo + getErrorMsg(e)).setCreateTime(new DateTime())); + } + + + private static String getErrorMsg(Exception e) { + StringWriter errors = new StringWriter(); + e.printStackTrace(new PrintWriter(errors)); + return errors.toString(); + } + +} diff --git a/src/main/java/com/keyware/composeanalysis/util/BeanUtil.java b/src/main/java/com/keyware/composeanalysis/util/BeanUtil.java new file mode 100644 index 0000000..c44c002 --- /dev/null +++ b/src/main/java/com/keyware/composeanalysis/util/BeanUtil.java @@ -0,0 +1,32 @@ +package com.keyware.composeanalysis.util; + +import com.alibaba.fastjson.JSON; +import com.alibaba.fastjson.JSONArray; +import com.keyware.composeanalysis.solr.VersionTree; +import com.keyware.composeanalysis.solr.VersionTreeNode; +import org.apache.solr.common.SolrDocument; + +import java.util.List; + +/** + * @author liuzongren + * @date 2024/8/7 + * @description dom 转 entity + */ +public class BeanUtil { + + public static VersionTree domToVersionTree(SolrDocument dom) { + String dirTree = String.valueOf(dom.get("dirTree")); + dirTree = dirTree.replace("\\", ""); + dirTree = dirTree.replace("\"{", "{"); + dirTree = dirTree.replace("}\"", "}"); + dom.put("dirTree", null); + JSONArray treeArray = JSON.parseArray(dirTree); + List treeList = treeArray.toJavaList(VersionTreeNode.class); + String domObj = JSON.toJSONString(dom); + VersionTree versionTree = JSON.parseObject(domObj, VersionTree.class); + versionTree.setDirTree(treeList); + return versionTree; + } + +} diff --git a/src/main/java/com/keyware/composeanalysis/util/ConvertUtil.java b/src/main/java/com/keyware/composeanalysis/util/ConvertUtil.java new file mode 100644 index 0000000..44e4fa5 --- /dev/null +++ b/src/main/java/com/keyware/composeanalysis/util/ConvertUtil.java @@ -0,0 +1,38 @@ +package com.keyware.composeanalysis.util; + +import com.alibaba.fastjson.JSON; +import com.mongodb.BasicDBObject; +import org.bson.Document; +import org.bson.json.JsonWriterSettings; + +/** + * @author liuzongren + * @date 2024/7/24 + * 类型转化工具类 + */ +public class ConvertUtil { + + public T documentToBean(BasicDBObject dbObject, Class clzss) { + String realJson = dbObject.toJson(JsonWriterSettings.builder().build()); + T obj = JSON.parseObject(realJson, clzss); + return obj; + } + + public static T documentToBean(Document document, Class clzss) { + String realJson = document.toJson(JsonWriterSettings.builder().build()); + T obj = JSON.parseObject(realJson, clzss); + return obj; + } + + public static BasicDBObject toDBObject(T object) { + String json = JSON.toJSONString(object); + BasicDBObject basicDBObject = BasicDBObject.parse(json); + return basicDBObject; + } + + public static Document beanToDocument(T object) { + String json = JSON.toJSONString(object); + Document document = Document.parse(json); + return document; + } +} diff --git a/src/main/java/com/keyware/composeanalysis/util/IpUtil.java b/src/main/java/com/keyware/composeanalysis/util/IpUtil.java new file mode 100644 index 0000000..e299588 --- /dev/null +++ b/src/main/java/com/keyware/composeanalysis/util/IpUtil.java @@ -0,0 +1,23 @@ +package com.keyware.composeanalysis.util; + +import lombok.extern.log4j.Log4j2; + +import java.net.InetAddress; +import java.net.UnknownHostException; + +/** + * @author liuzongren + * @date 2024/7/30 + */ +@Log4j2 +public class IpUtil { + + public static String getHostIp() { + try { + return InetAddress.getLocalHost().getHostAddress(); + } catch (UnknownHostException e) { + log.error(e.getMessage(), e); + } + return "127.0.0.1"; + } +} diff --git a/src/main/java/com/keyware/composeanalysis/util/RedisUtil.java b/src/main/java/com/keyware/composeanalysis/util/RedisUtil.java new file mode 100644 index 0000000..c9b02a8 --- /dev/null +++ b/src/main/java/com/keyware/composeanalysis/util/RedisUtil.java @@ -0,0 +1,537 @@ +package com.keyware.composeanalysis.util; + + +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.beans.factory.annotation.Qualifier; +import org.springframework.data.redis.core.RedisTemplate; +import org.springframework.stereotype.Component; + +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.TimeUnit; + +/** + * liuzongren + * data 2024/04/02 + */ +@Component +public class RedisUtil { + + + @Autowired + private RedisTemplate redisTemplate; + + /** + * 给一个指定的 key 值附加过期时间 + * + * @param key + * @param time + * @return + */ + public boolean expire(String key, long time) { + return this.redisTemplate.expire(key, time, TimeUnit.SECONDS); + } + + /** + * 根据key 获取过期时间 + * + * @param key + * @return + */ + public long getTime(String key) { + return redisTemplate.getExpire(key, TimeUnit.SECONDS); + } + + /** + * 根据key 获取过期时间 + * + * @param key + * @return + */ + public boolean hasKey(String key) { + return redisTemplate.hasKey(key); + } + + /** + * 移除指定key 的过期时间 + * + * @param key + * @return + */ + public boolean persist(String key) { + return redisTemplate.boundValueOps(key).persist(); + } + + //- - - - - - - - - - - - - - - - - - - - - String类型 - - - - - - - - - - - - - - - - - - - - + + /** + * 根据key获取值 + * + * @param key 键 + * @return 值 + */ + public Object get(String key) { + return key == null ? null : redisTemplate.opsForValue().get(key); + } + + /** + * 将值放入缓存 + * + * @param key 键 + * @param value 值 + * @return true成功 false 失败 + */ + public void set(String key, Object value) { + redisTemplate.opsForValue().set(key, value); + } + + /** + * 将值放入缓存并设置时间 + * + * @param key 键 + * @param value 值 + * @param time 时间(秒) -1为无期限 + * @return true成功 false 失败 + */ + public void set(String key, String value, long time) { + if (time > 0) { + redisTemplate.opsForValue().set(key, value, time, TimeUnit.SECONDS); + } else { + redisTemplate.opsForValue().set(key, value); + } + } + + public void delKey(String key) { + redisTemplate.delete(key); + } + + /** + * 批量添加 key (重复的键会覆盖) + * + * @param keyAndValue + */ + public void batchSet(Map keyAndValue) { + redisTemplate.opsForValue().multiSet(keyAndValue); + } + + /** + * 批量添加 key-value 只有在键不存在时,才添加 + * map 中只要有一个key存在,则全部不添加 + * + * @param keyAndValue + */ + public void batchSetIfAbsent(Map keyAndValue) { + redisTemplate.opsForValue().multiSetIfAbsent(keyAndValue); + } + + /** + * 对一个 key-value 的值进行加减操作, + * 如果该 key 不存在 将创建一个key 并赋值该 number + * 如果 key 存在,但 value 不是长整型 ,将报错 + * + * @param key + * @param number + */ + public Long increment(String key, long number) { + return redisTemplate.opsForValue().increment(key, number); + } + + /** + * 对一个 key-value 的值进行加减操作, + * 如果该 key 不存在 将创建一个key 并赋值该 number + * 如果 key 存在,但 value 不是 纯数字 ,将报错 + * + * @param key + * @param number + */ + public Double increment(String key, double number) { + return redisTemplate.opsForValue().increment(key, number); + } + + //- - - - - - - - - - - - - - - - - - - - - set类型 - - - - - - - - - - - - - - - - - - - - + + /** + * 将数据放入set缓存 + * + * @param key 键 + * @return + */ + public void sSet(String key, String value) { + redisTemplate.opsForSet().add(key, value); + } + + /** + * 获取变量中的值 + * + * @param key 键 + * @return + */ + public Set members(String key) { + return redisTemplate.opsForSet().members(key); + } + + /** + * 随机获取变量中指定个数的元素 + * + * @param key 键 + * @param count 值 + * @return + */ + public void randomMembers(String key, long count) { + redisTemplate.opsForSet().randomMembers(key, count); + } + + /** + * 随机获取变量中的元素 + * + * @param key 键 + * @return + */ + public Object randomMember(String key) { + return redisTemplate.opsForSet().randomMember(key); + } + + /** + * 弹出变量中的元素 + * + * @param key 键 + * @return + */ + public Object pop(String key) { + return redisTemplate.opsForSet().pop("setValue"); + } + + /** + * 获取变量中值的长度 + * + * @param key 键 + * @return + */ + public long size(String key) { + return redisTemplate.opsForSet().size(key); + } + + /** + * 根据value从一个set中查询,是否存在 + * + * @param key 键 + * @param value 值 + * @return true 存在 false不存在 + */ + public boolean sHasKey(String key, Object value) { + return redisTemplate.opsForSet().isMember(key, value); + } + + /** + * 检查给定的元素是否在变量中。 + * + * @param key 键 + * @param obj 元素对象 + * @return + */ + public boolean isMember(String key, Object obj) { + return redisTemplate.opsForSet().isMember(key, obj); + } + + /** + * 转移变量的元素值到目的变量。 + * + * @param key 键 + * @param value 元素对象 + * @param destKey 元素对象 + * @return + */ + public boolean move(String key, String value, String destKey) { + return redisTemplate.opsForSet().move(key, value, destKey); + } + + /** + * 批量移除set缓存中元素 + * + * @param key 键 + * @param values 值 + * @return + */ + public void remove(String key, Object... values) { + redisTemplate.opsForSet().remove(key, values); + } + + /** + * 通过给定的key求2个set变量的差值 + * + * @param key 键 + * @param destKey 键 + * @return + */ + public Set difference(String key, String destKey) { + return redisTemplate.opsForSet().difference(key, destKey); + } + + + //- - - - - - - - - - - - - - - - - - - - - hash类型 - - - - - - - - - - - - - - - - - - - - + + /** + * 加入缓存 + * + * @param key 键 + * @param map 键 + * @return + */ + public void add(String key, Map map) { + redisTemplate.opsForHash().putAll(key, map); + } + + /** + * 获取 key 下的 所有 hashkey 和 value + * + * @param key 键 + * @return + */ + public Map getHashEntries(String key) { + return redisTemplate.opsForHash().entries(key); + } + + /** + * 验证指定 key 下 有没有指定的 hashkey + * + * @param key + * @param hashKey + * @return + */ + public boolean hashKey(String key, String hashKey) { + return redisTemplate.opsForHash().hasKey(key, hashKey); + } + + /** + * 获取指定key的值string + * + * @param key 键 + * @param key2 键 + * @return + */ + public String getMapString(String key, String key2) { + return redisTemplate.opsForHash().get("map1", "key1").toString(); + } + + /** + * 获取指定的值Int + * + * @param key 键 + * @param key2 键 + * @return + */ + public Integer getMapInt(String key, String key2) { + return (Integer) redisTemplate.opsForHash().get("map1", "key1"); + } + + /** + * 弹出元素并删除 + * + * @param key 键 + * @return + */ + public String popValue(String key) { + return redisTemplate.opsForSet().pop(key).toString(); + } + + /** + * 删除指定 hash 的 HashKey + * + * @param key + * @param hashKeys + * @return 删除成功的 数量 + */ + public Long delete(String key, String... hashKeys) { + return redisTemplate.opsForHash().delete(key, hashKeys); + } + + /** + * 给指定 hash 的 hashkey 做增减操作 + * + * @param key + * @param hashKey + * @param number + * @return + */ + public Long increment(String key, String hashKey, long number) { + return redisTemplate.opsForHash().increment(key, hashKey, number); + } + + /** + * 给指定 hash 的 hashkey 做增减操作 + * + * @param key + * @param hashKey + * @param number + * @return + */ + public Double increment(String key, String hashKey, Double number) { + return redisTemplate.opsForHash().increment(key, hashKey, number); + } + + /** + * 获取 key 下的 所有 hashkey 字段 + * + * @param key + * @return + */ + public Set hashKeys(String key) { + return redisTemplate.opsForHash().keys(key); + } + + /** + * 获取指定 hash 下面的 键值对 数量 + * + * @param key + * @return + */ + public Long hashSize(String key) { + return redisTemplate.opsForHash().size(key); + } + + //- - - - - - - - - - - - - - - - - - - - - list类型 - - - - - - - - - - - - - - - - - - - - + + /** + * 在变量左边添加元素值 + * + * @param key + * @param value + * @return + */ + public void leftPush(String key, Object value) { + redisTemplate.opsForList().leftPush(key, value); + } + + /** + * 获取集合指定位置的值。 + * + * @param key + * @param index + * @return + */ + public Object index(String key, long index) { + return redisTemplate.opsForList().index("list", 1); + } + + /** + * 获取指定区间的值。 + * + * @param key + * @param start + * @param end + * @return + */ + public List range(String key, long start, long end) { + return redisTemplate.opsForList().range(key, start, end); + } + + /** + * 把最后一个参数值放到指定集合的第一个出现中间参数的前面, + * 如果中间参数值存在的话。 + * + * @param key + * @param pivot + * @param value + * @return + */ + public void leftPush(String key, String pivot, String value) { + redisTemplate.opsForList().leftPush(key, pivot, value); + } + + /** + * 向左边批量添加参数元素。 + * + * @param key + * @param values + * @return + */ + public void leftPushAll(String key, String... values) { +// redisTemplate.opsForList().leftPushAll(key,"w","x","y"); + redisTemplate.opsForList().leftPushAll(key, values); + } + + /** + * 向集合最右边添加元素。 + * + * @param key + * @param value + * @return + */ + public void leftPushAll(String key, String value) { + redisTemplate.opsForList().rightPush(key, value); + } + + /** + * 向左边批量添加参数元素。 + * + * @param key + * @param values + * @return + */ + public void rightPushAll(String key, String... values) { + //redisTemplate.opsForList().leftPushAll(key,"w","x","y"); + redisTemplate.opsForList().rightPushAll(key, values); + } + + /** + * 向已存在的集合中添加元素。 + * + * @param key + * @param value + * @return + */ + public void rightPushIfPresent(String key, Object value) { + redisTemplate.opsForList().rightPushIfPresent(key, value); + } + + /** + * 向已存在的集合中添加元素。 + * + * @param key + * @return + */ + public long listLength(String key) { + return redisTemplate.opsForList().size(key); + } + + /** + * 移除集合中的左边第一个元素。 + * + * @param key + * @return + */ + public void leftPop(String key) { + redisTemplate.opsForList().leftPop(key); + } + + /** + * 移除集合中左边的元素在等待的时间里,如果超过等待的时间仍没有元素则退出。 + * + * @param key + * @return + */ + public void leftPop(String key, long timeout, TimeUnit unit) { + redisTemplate.opsForList().leftPop(key, timeout, unit); + } + + /** + * 移除集合中右边的元素。 + * + * @param key + * @return + */ + public void rightPop(String key) { + redisTemplate.opsForList().rightPop(key); + } + + /** + * 移除集合中右边的元素在等待的时间里,如果超过等待的时间仍没有元素则退出。 + * + * @param key + * @return + */ + public void rightPop(String key, long timeout, TimeUnit unit) { + redisTemplate.opsForList().rightPop(key, timeout, unit); + } +} \ No newline at end of file diff --git a/src/main/java/com/keyware/composeanalysis/util/SimilarityUtil.java b/src/main/java/com/keyware/composeanalysis/util/SimilarityUtil.java new file mode 100644 index 0000000..b920f98 --- /dev/null +++ b/src/main/java/com/keyware/composeanalysis/util/SimilarityUtil.java @@ -0,0 +1,206 @@ +package com.keyware.composeanalysis.util; + +import cn.hutool.core.lang.Pair; +import cn.hutool.core.util.ArrayUtil; +import cn.hutool.core.util.ByteUtil; +import cn.hutool.core.util.StrUtil; +import io.micrometer.common.util.StringUtils; +import org.apache.commons.collections.CollectionUtils; + +import java.math.BigDecimal; +import java.math.RoundingMode; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashSet; +import java.util.List; + +public class SimilarityUtil { + + + + private SimilarityUtil() { + + } + + public static void main(String[] args) { + String s1=""; + String s2=""; + double similarity = getSimilarityMe(s1, s2); + System.out.println(similarity); + } + + /** + * 获得两个文件的相似度 + * @param sentence1 + * @param sentence2 + * @return + */ + public static double getSimilarityMe(String sentence1, String sentence2) { + //被测件文件行 + List sent1Words = getSplitWords(sentence1); + if (sentence1.length()==0){ + return 0.00; + } + //溯源到文件行 + HashSet sent2Words = getSplitWords1(sentence2); + //匹配到的行数 + double count=0; + for (String sent1Word : sent1Words) { + if (sent2Words.contains(sent1Word)){ + count++; + } + } + return count/sent1Words.size(); + } + + + /** + * 获取开源率和开源行号 + * @param analysisFile 被测件内容 + * @param openSourceFile 开源文件内容 + * @return + */ +// public static Pair> getOpenRateAndSaveRowNum(String analysisFile, String openSourceFile) { +// if (StrUtil.hasBlank(analysisFile,openSourceFile)){ +// return new Pair<>(0.00f,new HashSet<>()); +// } +// //匹配到的行号 +// HashSet matchedRowsNum = new HashSet<>(); +// +// //被测件文件行 +// List analysisFileLineInfo = getSplitWords(analysisFile); +// +// //溯源到文件行 +// HashSet openSourceFileLineInfo = getSplitWords1(openSourceFile); +// +// for (int i = 0; i < analysisFileLineInfo.size(); i++) { +// String sent1Word = analysisFileLineInfo.get(i); +// if (openSourceFileLineInfo.contains(sent1Word)) { +// matchedRowsNum.add(i); +// } +// } +// +// //计算开源率 +// BigDecimal openRate = new BigDecimal(matchedRowsNum.size()).divide(new BigDecimal(analysisFileLineInfo.size()), 4, RoundingMode.HALF_UP).multiply(new BigDecimal(100)).setScale(2); +// +// return new Pair<>(openRate.floatValue(), matchedRowsNum); +// } + + +// public static Pair> getOpenRateAndSaveRowNum(byte[] analysisFile, byte[] openSourceFile) { +// if (ArrayUtil.hasNull(analysisFile,openSourceFile)){ +// return new Pair<>(0.00f,new HashSet<>()); +// } +// //匹配到的行号 +// HashSet matchedRowsNum = new HashSet<>(); +// +// //被测件文件行 +// List analysisFileLineInfo = getSplitWords(new String(analysisFile)); +// +// //溯源到文件行 +// HashSet openSourceFileLineInfo = getSplitWords1(new String(openSourceFile)); +// +// for (int i = 0; i < analysisFileLineInfo.size(); i++) { +// String sent1Word = analysisFileLineInfo.get(i); +// if (openSourceFileLineInfo.contains(sent1Word)) { +// matchedRowsNum.add(i); +// } +// } +// +// //计算开源率 +// BigDecimal openRate = new BigDecimal(matchedRowsNum.size()).divide(new BigDecimal(analysisFileLineInfo.size()), 4, RoundingMode.HALF_UP).multiply(new BigDecimal(100)).setScale(2); +// +// return new Pair<>(openRate.floatValue(), matchedRowsNum); +// } + + public static Pair> getOpenRateAndSaveRowNum(String analysisFile, String openSourceFile) { + if (StrUtil.hasBlank(analysisFile,openSourceFile)){ + return new Pair<>(0.00f,new HashSet<>()); + } + //匹配到的行号 + HashSet matchedRowsNum = new HashSet<>(); + + //被测件文件行 + List analysisFileLineInfo = getSplitWords(analysisFile); + + //溯源到文件行 + HashSet openSourceFileLineInfo = getSplitWords1(openSourceFile); + + for (int i = 0; i < analysisFileLineInfo.size(); i++) { + String sent1Word = analysisFileLineInfo.get(i); + if (openSourceFileLineInfo.contains(sent1Word)) { + matchedRowsNum.add(i); + } + } + + //计算开源率 + BigDecimal openRate = new BigDecimal(matchedRowsNum.size()).divide(new BigDecimal(analysisFileLineInfo.size()), 4, RoundingMode.HALF_UP).multiply(new BigDecimal(100)).setScale(2); + + return new Pair<>(openRate.floatValue(), matchedRowsNum); + } + + + /** + * 获得两个文件的相似度,并将被匹配的行 + * @param matchLineInfos 被匹配的行信息 + * @param sentence2 开源文件内容 + * @return + */ + public static double getSimilarityAndSaveRowNum(List matchLineInfos, String sentence2,HashSet matchRows) { + if (CollectionUtils.isEmpty(matchLineInfos)){ + return 0.00d; + } + //溯源到文件行 + HashSet sent2Words = getSplitWords1(sentence2); + //匹配到的行数 + double count = 0d; + for (int i = 0; i < matchLineInfos.size(); i++) { + String lineContents = matchLineInfos.get(i); + if (sent2Words.contains(lineContents)) { + //保存匹配中的行序号 + matchRows.add(i); + count++; + } + } + return count / matchLineInfos.size(); + } + + + + public static List getSplitWords(String sentence) { + List lineList = new ArrayList(); + if (StringUtils.isBlank(sentence)){ + return lineList; + } + sentence = sentence.replaceAll("\n\r", "\n").replaceAll("\r\n", "\n").replaceAll("\r", "\n"); + List list = Arrays.asList(sentence.split("\n")); + for (String string : list) { + if (string != null && !"".equals(string.trim())) { + lineList.add(string.replaceAll(" ","")); + } + } + return lineList; + +// // 去除掉html标签 +// +// sentence = Jsoup.parse(sentence.replace(" ","")).body().text(); +// +// +// // 标点符号会被单独分为一个Term,去除之 +// +// return HanLP.segment(sentence).stream().map(a -> a.word).filter(s -> !"`~!@#$^&*()=|{}':;',\\[\\].<>/?~!@#¥……&*()——|{}【】‘;:”“'。,、? ".contains(s)).collect(Collectors.toList()); + + } + private static HashSet getSplitWords1(String sentence) { + HashSet set = new HashSet<>(); + sentence = sentence.replaceAll("\n\r", "\n").replaceAll("\r\n", "\n").replaceAll("\r", "\n"); + List list = Arrays.asList(sentence.split("\n")); + for (String string : list) { + if (string != null && !"".equals(string.trim())) { + set.add(string.replaceAll(" ","")); + } + } + return set; + } + +} diff --git a/src/main/java/com/keyware/composeanalysis/util/SolrUtils.java b/src/main/java/com/keyware/composeanalysis/util/SolrUtils.java new file mode 100644 index 0000000..342aa6e --- /dev/null +++ b/src/main/java/com/keyware/composeanalysis/util/SolrUtils.java @@ -0,0 +1,321 @@ +package com.keyware.composeanalysis.util; + +import com.keyware.composeanalysis.constant.MongoDBConst; +import com.keyware.composeanalysis.solr.VersionTree; +import lombok.Data; +import lombok.extern.log4j.Log4j2; +import org.apache.commons.collections.CollectionUtils; +import org.apache.commons.lang3.StringUtils; +import org.apache.solr.client.solrj.SolrClient; +import org.apache.solr.client.solrj.SolrRequest; +import org.apache.solr.client.solrj.SolrServerException; +import org.apache.solr.client.solrj.impl.HttpSolrClient; +import org.apache.solr.client.solrj.response.QueryResponse; +import org.apache.solr.common.SolrDocument; +import org.apache.solr.common.SolrDocumentList; +import org.apache.solr.common.params.*; +import org.springframework.beans.factory.annotation.Value; +import org.springframework.stereotype.Component; + +import java.io.IOException; +import java.util.*; +import java.util.function.Function; +import java.util.stream.Collectors; + + +/** + * 注意: 注意: 注意 + * solr同步更新专用 solr地址以solr + * + * @author liuzongren + */ +@Log4j2 +@Component +@Data +public class SolrUtils { + + @Value("${solr.solrUrl}") + private String clientUrl; + + @Value("${solr.row}") + private String ROWS; + + //源码上传和解压的地址 + @Value("${codeResourcePath}") + private String codeResourcePath; + + private String fileAndFunSolrUrl; + + //client 连接池 + private Map coreClientMap = new HashMap<>(); + + + /** + * @param coreName 表名 + * @return todo 这里的client 不知道是否支持并发,后续需要测试优化 + * @describe 获取solr连接 + */ + public HttpSolrClient getClient(String coreName) { + if (coreClientMap.containsKey(coreName)) { + return coreClientMap.get(coreName); + } else { + HttpSolrClient solr = new HttpSolrClient.Builder(clientUrl + "" + coreName) + .withConnectionTimeout(6000000) + .withSocketTimeout(6000000) + .allowCompression(true) + .build(); + coreClientMap.put(coreName, solr); + return solr; + } + } + + + /** + * 简单查询,指定返回字段 + * + * @param searchContent 检索内容 + * @param returneFields 返回字段 + * @return + * @throws Exception + */ + public SolrDocumentList query(String coreName, String searchContent, String returneFields) { + SolrDocumentList docsList = null; + try { + HttpSolrClient client = getClient(coreName); + Map map = new HashMap(); + map.put(CommonParams.Q, searchContent); + map.put(CommonParams.FL, returneFields); + map.put(CommonParams.START, "0"); + map.put(CommonParams.ROWS, ROWS); + SolrParams params = new MapSolrParams(map); + QueryResponse query = client.query(params, SolrRequest.METHOD.POST); + if (!query.getResults().isEmpty()){ + docsList = query.getResults(); + } + } catch (SolrServerException | IOException e) { + log.error("solr查询失败,coreName:{},queryStr:{}", coreName, searchContent, e); + } + return docsList; + } + + + + /** + * 根据文件的MD5 去*_SourceFileBase获取当前文件的版本ID + * @param coreName solrCoreName + * @param originalFileMd5s 需要检索的文件md5 + * todo 1.这里有一个极端的情况:如果查询的文件数量过多,返回值不知道会不会过大 + * todo 2.这里没有查询出dirTreeId, 下一步并没有从VersionTree中查询出当前文件的具体信息,只是从versionTree查询出版本信息 + * @return + */ + public Map batchQueryVersionIdFromSourceFileBaseBySourceMd5(String coreName, Set originalFileMd5s) { + String queryStr = "sourceFileMd5:(" + StringUtils.join(originalFileMd5s, " OR ") + ")"; + Map openFileMd5VersionIdMap = new HashMap<>(); + long strtTime = System.currentTimeMillis(); + log.info("batchQueryVersionIdFromSourceFileBaseBySourceMd5 queryStr:{},size:{}", queryStr, originalFileMd5s.size()); + try { + HttpSolrClient client = getClient(coreName); + Map map = new HashMap<>(); + map.put(CommonParams.Q, queryStr); + map.put(CommonParams.FL, "sourceFileMd5,versionId,fullPath"); + map.put(CommonParams.START, "0"); + map.put(CommonParams.ROWS, String.valueOf(originalFileMd5s.size())); + //分组查询,某一个开源文件匹配一次即可 + //todo 这里把匹配次数也查询出来了,貌似还是扫描了很多文档,看是否还有方法只匹配一次的 + map.put(GroupParams.GROUP,"true"); + map.put(GroupParams.GROUP_FIELD, "sourceFileMd5"); + map.put(GroupParams.GROUP_LIMIT,"1"); + map.put(GroupParams.GROUP_FORMAT,"simple"); + SolrParams params = new MapSolrParams(map); + QueryResponse query = client.query(params, SolrRequest.METHOD.POST); + if (query.getGroupResponse().getValues().size() > 0){ + //拿到sourceFileMd5分组数据 + SolrDocumentList result = query.getGroupResponse().getValues().get(0).getValues().get(0).getResult(); + openFileMd5VersionIdMap = result.stream().collect(Collectors.toMap(doc -> (String) doc.get("sourceFileMd5"), Function.identity())); + } + } catch (Exception e) { + log.error("solr查询失败,coreName:{},queryStr:{}", coreName, queryStr, e); + } + log.info("batchQueryVersionIdFromSourceFileBaseBySourceMd5 cost:{}s", (System.currentTimeMillis()-strtTime) / 1000); + return openFileMd5VersionIdMap; + } + + + + + + + /** + * 简单查询,指定返回字段 + * + * @param searchContent 检索内容 + * @param returneFields 返回字段 + * @return + * @throws Exception + */ + public SolrDocument queryOne(String coreName, String searchContent, String returneFields) { + SolrDocument result = null; + try { + HttpSolrClient client = getClient(coreName); + Map map = new HashMap(); + map.put(CommonParams.Q, searchContent); + map.put(CommonParams.FL, returneFields); + map.put(CommonParams.START, "0"); + map.put(CommonParams.ROWS, "1"); + SolrParams params = new MapSolrParams(map); + QueryResponse query = client.query(params, SolrRequest.METHOD.POST); + SolrDocumentList resp = query.getResults(); + if (CollectionUtils.isNotEmpty(resp)) { + return resp.get(0); + } + } catch (SolrServerException | IOException e) { + log.error("查询solr失败!,coreName:{},queryStr:{}",coreName , searchContent, e); + } + return result; + } + + + /** + * 查询 versionTree + * + * @param searchContent 检索内容 + * @return + * @throws Exception + */ + public VersionTree queryVersionTree(String searchContent) { + String returneFields = "proId,proName,versionName,downUrl,licenseType,dirTree"; + VersionTree results = null; + try { + HttpSolrClient client = getClient(MongoDBConst.VERSION_TREE); + Map map = new HashMap(); + map.put(CommonParams.Q, searchContent); + map.put(CommonParams.FL, returneFields); + map.put(CommonParams.START, "0"); + map.put(CommonParams.ROWS, "1"); + SolrParams params = new MapSolrParams(map); + QueryResponse query = client.query(params, SolrRequest.METHOD.POST); + SolrDocumentList response = query.getResults(); + if (!response.isEmpty()) { + //转化对象 + results = BeanUtil.domToVersionTree(response.get(0)); + } + } catch (SolrServerException | IOException e) { + log.error("查询solr失败!,queryStr:{}" , searchContent, e); + } + return results; + } + + + /** + * 查询 versionTree + * + * @param versionId 版本ID + * @return + */ + public VersionTree queryVersionTreeByVersionId(String versionId) { + String returneFields = "proId,proName,versionName,downUrl,licenseType,dirTree"; + String queryStr = "versionId:"+ versionId; + VersionTree results = null; + try { + HttpSolrClient client = getClient(MongoDBConst.VERSION_TREE); + Map map = new HashMap(); + map.put(CommonParams.Q, queryStr); + map.put(CommonParams.FL, returneFields); + map.put(CommonParams.START, "0"); + map.put(CommonParams.ROWS, "1"); + SolrParams params = new MapSolrParams(map); + QueryResponse query = client.query(params, SolrRequest.METHOD.POST); + SolrDocumentList response = query.getResults(); + //转化对象 + if (!response.isEmpty()){ + results = BeanUtil.domToVersionTree(response.get(0)); + }else { + log.error("根据版本ID查询VersionTree失败,versionId:{}" , versionId); + } + } catch (SolrServerException | IOException e) { + log.error("查询solr失败!,queryStr:{}" , queryStr, e); + } + return results; + } + + + /** + * 查询 version 的具体信息 + * + * @param versionId versionId + * @return + * @throws Exception + */ + public VersionTree queryVersionInfoByVersionId(Object versionId) { + String returneFields = "proId,proName,versionName,downUrl,licenseType"; + VersionTree result = new VersionTree(); + try { + HttpSolrClient client = getClient(MongoDBConst.VERSION_TREE); + Map map = new HashMap(); + map.put(CommonParams.Q, "versionId:" + versionId); + map.put(CommonParams.FL, returneFields); + map.put(CommonParams.START, "0"); + map.put(CommonParams.ROWS, "1"); + SolrParams params = new MapSolrParams(map); + QueryResponse query = client.query(params, SolrRequest.METHOD.POST); + SolrDocumentList response = query.getResults(); + //转化对象 + if (CollectionUtils.isNotEmpty(response)) { + cn.hutool.core.bean.BeanUtil.copyProperties(response.get(0), result); + result.setLicenseType(response.get(0).get("licenseType") == null ? "" : response.get(0).get("licenseType").toString()); + }else { + log.error("根据版本ID查询版本信息失败,versionId:{}" , versionId); + } + } catch (SolrServerException | IOException e) { + log.error("查询solr失败!,queryStr:{}" , versionId, e); + } + return result; + } + + + /** + * 批量查询 version 的具体信息 + * + * @param versionIds versionIds + * @return + * @throws Exception + */ + public List queryBatchVersionInfoByVersionIds(Collection versionIds) { + List results = new ArrayList<>(); + if (CollectionUtils.isEmpty(versionIds)) { + return results; + } + //去一波重 + versionIds = versionIds.stream().collect(Collectors.toSet()); + String queryStr = "versionId:(" + StringUtils.join(versionIds, " OR ") + ")"; + String returneFields = "versionId,proId,proName,versionName,downUrl,licenseType"; + try { + HttpSolrClient client = getClient(MongoDBConst.VERSION_TREE); + Map map = new HashMap(); + map.put(CommonParams.Q, queryStr); + map.put(CommonParams.FL, returneFields); + map.put(CommonParams.START, "0"); + map.put(CommonParams.ROWS,String.valueOf(versionIds.size())); + SolrParams params = new MapSolrParams(map); + QueryResponse query = client.query(params, SolrRequest.METHOD.POST); + SolrDocumentList response = query.getResults(); + //转化对象 + if (!response.isEmpty()) { + for (int i = 0; i < response.size(); i++) { + VersionTree versionTree = new VersionTree(); + try { + cn.hutool.core.bean.BeanUtil.copyProperties(response.get(i), versionTree); + versionTree.setLicenseType(response.get(i).get("licenseType") == null ? "" : response.get(i).get("licenseType").toString()); + results.add(versionTree); + } catch (Exception e) { + e.printStackTrace(); + } + } + } + } catch (SolrServerException | IOException e) { + log.error("查询solr失败!,queryStr:{}" , queryStr, e); + } + return results; + } + +} diff --git a/src/main/java/com/keyware/composeanalysis/util/SpringContextUtils.java b/src/main/java/com/keyware/composeanalysis/util/SpringContextUtils.java new file mode 100644 index 0000000..036522f --- /dev/null +++ b/src/main/java/com/keyware/composeanalysis/util/SpringContextUtils.java @@ -0,0 +1,53 @@ +package com.keyware.composeanalysis.util; + +import org.springframework.beans.BeansException; +import org.springframework.context.ApplicationContext; +import org.springframework.context.ApplicationContextAware; +import org.springframework.stereotype.Component; + +@Component +public class SpringContextUtils implements ApplicationContextAware { + + /** + * 上下文对象实例 + */ + private static ApplicationContext applicationContext; + + @Override + public void setApplicationContext(ApplicationContext applicationContext) throws BeansException { + SpringContextUtils.applicationContext = applicationContext; + } + + /** + * 获取applicationContext + */ + public static ApplicationContext getApplicationContext() { + //判断是否为null + if (applicationContext == null) { + throw new IllegalStateException("applicaitonContext未注入,请在applicationContext.xml中定义SpringContextHolder."); + } + return applicationContext; + } + + /** + * 通过name获取Bean + */ + public static Object getBean(String name) { + return getApplicationContext().getBean(name); + } + + /** + * 通过class获取Bean + */ + public static T getBean(Class clazz) { + return getApplicationContext().getBean(clazz); + } + + /** + * 通过name和class获取Bean + */ + public static T getBean(String name, Class clazz) { + return getApplicationContext().getBean(name, clazz); + } + +} diff --git a/src/main/resources/application.yaml b/src/main/resources/application.yaml new file mode 100644 index 0000000..8f0706a --- /dev/null +++ b/src/main/resources/application.yaml @@ -0,0 +1,18 @@ +server: + port: 8001 + +spring: + application: + name: compose-analysis-service + cloud: + nacos: + discovery: + server-addr: 172.16.36.100:8848 + namespace: 7f9bb282-8ee3-4948-8182-24b7dcadcd5a + config: + server-addr: 172.16.36.100:8848 + namespace: 7f9bb282-8ee3-4948-8182-24b7dcadcd5a + group: dev_group + file-extension: yaml + config: + import: nacos:compose-analysis-dev.yaml diff --git a/src/main/resources/logback-spring.xml b/src/main/resources/logback-spring.xml new file mode 100644 index 0000000..253e16c --- /dev/null +++ b/src/main/resources/logback-spring.xml @@ -0,0 +1,215 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + debug + + + UTF-8 + + ${CONSOLE_LOG_PATTERN} + + + + + + ${logPath}/debug/${appName}_debug.log + + + + ${logPath}/debug/${appName}_debug-%d{yyyy-MM-dd}.%i.log.gz + 128MB + ${maxHistory} + 10GB + + + ${logPattern} + utf-8 + + + DEBUG + ACCEPT + DENY + + + + + + ${logPath}/info/${appName}_info.log + + + + ${logPath}/info/${appName}_info-%d{yyyy-MM-dd}.%i.log.gz + 128MB + ${maxHistory} + 10GB + + + ${logPattern} + utf-8 + + + INFO + ACCEPT + DENY + + + + + + ${logPath}/warn/${appName}_warn.log + + + + ${logPath}/warn/${appName}_warn-%d{yyyy-MM-dd}.%i.log.gz + 128MB + ${maxHistory} + 10GB + + + ${logPattern} + utf-8 + + + WARN + ACCEPT + DENY + + + + + + ${logPath}/error/${appName}_error.log + + + + ${logPath}/error/${appName}_error-%d{yyyy-MM-dd}.%i.log.gz + 128MB + ${maxHistory} + 10GB + + + ${logPattern} + utf-8 + + + ERROR + ACCEPT + DENY + + + + + + + 0 + + ${queueSize} + + true + + + + + + + 0 + + ${queueSize} + + true + + + + + + + 0 + + ${queueSize} + + true + + + + + + + 0 + + ${queueSize} + + true + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/src/main/resources/mapper/AnalysisTaskService.xml b/src/main/resources/mapper/AnalysisTaskService.xml new file mode 100644 index 0000000..54f436e --- /dev/null +++ b/src/main/resources/mapper/AnalysisTaskService.xml @@ -0,0 +1,25 @@ + + + + + + + + + + + + + + + + + + + + + + + + +