first commit

master
liuzongren 7 months ago
commit 0aa44dcca4
  1. 33
      .gitignore
  2. 148
      pom.xml
  3. 21
      src/main/java/com/keyware/composeanalysis/ComposeAnalyzeApplication.java
  4. 20
      src/main/java/com/keyware/composeanalysis/config/AnalysisConfig.java
  5. 94
      src/main/java/com/keyware/composeanalysis/config/RedisConfig.java
  6. 31
      src/main/java/com/keyware/composeanalysis/config/RedissionConfig.java
  7. 25
      src/main/java/com/keyware/composeanalysis/config/SolrConfig.java
  8. 60
      src/main/java/com/keyware/composeanalysis/config/thread/TaskExecutePool.java
  9. 340
      src/main/java/com/keyware/composeanalysis/constant/FixedValue.java
  10. 26
      src/main/java/com/keyware/composeanalysis/constant/FunctionAndAnalysisAssemblyConst.java
  11. 69
      src/main/java/com/keyware/composeanalysis/constant/MongoDBConst.java
  12. 20
      src/main/java/com/keyware/composeanalysis/constant/RedisConst.java
  13. 33
      src/main/java/com/keyware/composeanalysis/constant/SolrDBConst.java
  14. 48
      src/main/java/com/keyware/composeanalysis/constant/enums/AnalysisLevelEnum.java
  15. 48
      src/main/java/com/keyware/composeanalysis/constant/enums/AnalysisStatusEnum.java
  16. 42
      src/main/java/com/keyware/composeanalysis/constant/enums/FileAnalysisStatusEnum.java
  17. 119
      src/main/java/com/keyware/composeanalysis/controller/ComposeAnalysisController.java
  18. 157
      src/main/java/com/keyware/composeanalysis/entity/AnalysisTask.java
  19. 18
      src/main/java/com/keyware/composeanalysis/mapper/AnalyzeTaskMapper.java
  20. 37
      src/main/java/com/keyware/composeanalysis/mongo/AnalysisLogMongoDto.java
  21. 96
      src/main/java/com/keyware/composeanalysis/mongo/AssemblyMongoDto.java
  22. 106
      src/main/java/com/keyware/composeanalysis/mongo/FileDataMongoDto.java
  23. 56
      src/main/java/com/keyware/composeanalysis/mongo/LineDataMongoDto.java
  24. 62
      src/main/java/com/keyware/composeanalysis/mongo/MatchOpenFile.java
  25. 58
      src/main/java/com/keyware/composeanalysis/mongo/MatchOpenFileMongoDto.java
  26. 50
      src/main/java/com/keyware/composeanalysis/mongo/MatchOpenProjectMongoDto.java
  27. 75
      src/main/java/com/keyware/composeanalysis/mongo/ProjectAssemblyMongoDto.java
  28. 75
      src/main/java/com/keyware/composeanalysis/mongo/ProjectBaseDataMongoDto.java
  29. 75
      src/main/java/com/keyware/composeanalysis/mongo/VersionbasedataMongoDto.java
  30. 68
      src/main/java/com/keyware/composeanalysis/schedule/AnalysisStatusSchedule.java
  31. 56
      src/main/java/com/keyware/composeanalysis/service/AnalysisTaskService.java
  32. 215
      src/main/java/com/keyware/composeanalysis/service/impl/AnalysisTaskServiceImpl.java
  33. 56
      src/main/java/com/keyware/composeanalysis/solr/VersionTree.java
  34. 42
      src/main/java/com/keyware/composeanalysis/solr/VersionTreeNode.java
  35. 45
      src/main/java/com/keyware/composeanalysis/task/AnalysisTaskFactory.java
  36. 356
      src/main/java/com/keyware/composeanalysis/task/CodeBlockAnalysisTask.java
  37. 232
      src/main/java/com/keyware/composeanalysis/task/FileAnalysisTask.java
  38. 409
      src/main/java/com/keyware/composeanalysis/task/FunctionAnalysisTask.java
  39. 10
      src/main/java/com/keyware/composeanalysis/task/IAnalysisTask.java
  40. 298
      src/main/java/com/keyware/composeanalysis/task/LineAnalysisTask.java
  41. 378
      src/main/java/com/keyware/composeanalysis/task/PorjectAnalysisTask.java
  42. 33
      src/main/java/com/keyware/composeanalysis/util/AnalysisLogUtil.java
  43. 32
      src/main/java/com/keyware/composeanalysis/util/BeanUtil.java
  44. 38
      src/main/java/com/keyware/composeanalysis/util/ConvertUtil.java
  45. 23
      src/main/java/com/keyware/composeanalysis/util/IpUtil.java
  46. 537
      src/main/java/com/keyware/composeanalysis/util/RedisUtil.java
  47. 206
      src/main/java/com/keyware/composeanalysis/util/SimilarityUtil.java
  48. 321
      src/main/java/com/keyware/composeanalysis/util/SolrUtils.java
  49. 53
      src/main/java/com/keyware/composeanalysis/util/SpringContextUtils.java
  50. 18
      src/main/resources/application.yaml
  51. 215
      src/main/resources/logback-spring.xml
  52. 25
      src/main/resources/mapper/AnalysisTaskService.xml

33
.gitignore vendored

@ -0,0 +1,33 @@
HELP.md
target/
!.mvn/wrapper/maven-wrapper.jar
!**/src/main/**/target/
!**/src/test/**/target/
### STS ###
.apt_generated
.classpath
.factorypath
.project
.settings
.springBeans
.sts4-cache
### IntelliJ IDEA ###
.idea
*.iws
*.iml
*.ipr
### NetBeans ###
/nbproject/private/
/nbbuild/
/dist/
/nbdist/
/.nb-gradle/
build/
!**/src/main/**/build/
!**/src/test/**/build/
### VS Code ###
.vscode/

@ -0,0 +1,148 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>com.keyware</groupId>
<artifactId>keyware-cloud</artifactId>
<version>1.0.0</version>
</parent>
<groupId>com.keyware</groupId>
<artifactId>compose-analysis</artifactId>
<version>1.0.0</version>
<name>compose-analysis</name>
<description>compose-analysis</description>
<properties>
<java.version>17</java.version>
</properties>
<dependencies>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-web</artifactId>
</dependency>
<!-- nacos 服务的注册发现 -->
<dependency>
<groupId>com.alibaba.cloud</groupId>
<artifactId>spring-cloud-starter-alibaba-nacos-discovery</artifactId>
</dependency>
<!-- nacos 配置中心做依赖管理 -->
<dependency>
<groupId>com.alibaba.cloud</groupId>
<artifactId>spring-cloud-starter-alibaba-nacos-config</artifactId>
</dependency>
<!-- mongodb -->
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-data-mongodb</artifactId>
</dependency>
<!-- 集成redis依赖 -->
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-data-redis</artifactId>
</dependency>
<!-- 集成redisson依赖 -->
<dependency>
<groupId>org.redisson</groupId>
<artifactId>redisson-spring-boot-starter</artifactId>
<version>3.13.6</version>
</dependency>
<!-- 集成solr依赖 -->
<dependency>
<groupId>org.apache.solr</groupId>
<artifactId>solr-solrj</artifactId>
<version>7.6.0</version>
<exclusions>
<exclusion>
<groupId>org.apache.zookeeper</groupId>
<artifactId>zookeeper</artifactId>
</exclusion>
</exclusions>
</dependency>
<!-- 函数解析器-->
<dependency>
<groupId>com.keyware</groupId>
<artifactId>keyswan-analysis</artifactId>
<version>releases-1.1.5</version>
</dependency>
<dependency>
<groupId>com.keyware</groupId>
<artifactId>keyswan-function</artifactId>
<version>release-1.1.2</version>
</dependency>
<!-- 公共依赖-->
<dependency>
<groupId>com.keyware</groupId>
<artifactId>keyware-common</artifactId>
<version>1.0.0</version>
</dependency>
<!-- 引入 api 接口-->
<dependency>
<groupId>com.keyware</groupId>
<artifactId>compose-analysis-api</artifactId>
<version>1.0.0</version>
</dependency>
</dependencies>
<repositories>
<repository>
<id>keyware-repos</id>
<name>KeyWare Repository</name>
<url>http://218.30.67.85:19201/nexus/content/groups/public/</url>
</repository>
<repository>
<id>keyware-repos-2</id>
<name>KeyWare Repository-2</name>
<url>http://218.30.67.85:19201/nexus/content/repositories/releases/</url>
</repository>
<!-- <repository>-->
<!-- <id>aliyun-repository</id>-->
<!-- <name>aliyun repository</name>-->
<!-- <url>https://maven.aliyun.com/repository/public/</url>-->
<!-- </repository>-->
<!-- <repository>-->
<!-- <id>aliyun-repos</id>-->
<!-- <name>Aliyun Repository</name>-->
<!-- <url>http://maven.aliyun.com/nexus/content/groups/public</url>-->
<!-- <releases>-->
<!-- <enabled>true</enabled>-->
<!-- </releases>-->
<!-- <snapshots>-->
<!-- <enabled>false</enabled>-->
<!-- </snapshots>-->
<!-- </repository>-->
</repositories>
<build>
<plugins>
<plugin>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-maven-plugin</artifactId>
<version>3.2.7</version>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<configuration>
<source>16</source>
<target>16</target>
</configuration>
</plugin>
</plugins>
</build>
</project>

@ -0,0 +1,21 @@
package com.keyware.composeanalysis;
import org.mybatis.spring.annotation.MapperScan;
import org.springframework.boot.SpringApplication;
import org.springframework.boot.autoconfigure.SpringBootApplication;
import org.springframework.cloud.client.discovery.EnableDiscoveryClient;
import org.springframework.cloud.context.config.annotation.RefreshScope;
import org.springframework.scheduling.annotation.EnableAsync;
@MapperScan("com.keyware.composeanalysis.mapper")
@SpringBootApplication
@EnableDiscoveryClient
@RefreshScope
@EnableAsync
public class ComposeAnalyzeApplication {
public static void main(String[] args) {
SpringApplication.run(ComposeAnalyzeApplication.class, args);
}
}

@ -0,0 +1,20 @@
package com.keyware.composeanalysis.config;
import lombok.Data;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.context.annotation.Configuration;
/**
* @author liuzongren
* @date 2024/7/25
* @description 分析配置类
*/
@Configuration
@Data
public class AnalysisConfig {
//源码上传和解压的地址
@Value("${codeResourcePath}")
private String codeResourcePath;
}

@ -0,0 +1,94 @@
package com.keyware.composeanalysis.config;
import org.redisson.Redisson;
import org.redisson.api.RedissonClient;
import org.redisson.codec.JsonJacksonCodec;
import org.redisson.config.Config;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import org.springframework.data.redis.connection.RedisConnectionFactory;
import org.springframework.data.redis.connection.RedisStandaloneConfiguration;
import org.springframework.data.redis.connection.lettuce.LettuceConnectionFactory;
import org.springframework.data.redis.core.RedisTemplate;
import org.springframework.data.redis.serializer.Jackson2JsonRedisSerializer;
import org.springframework.data.redis.serializer.StringRedisSerializer;
@Configuration
public class RedisConfig {
@Value("${spring.data.redis.host}")
private String redisHost;
@Value("${spring.data.redis.port}")
private int redisPort;
//配置文件中的密码 到这里已经被转义过了
@Value("${spring.data.redis.password}")
private String redisPassword;
@Bean(name = "redisTemplate")
public RedisTemplate<String, Object> getRedisTemplate(RedisConnectionFactory factory) {
RedisTemplate<String, Object> template = new RedisTemplate<String, Object>();
template.setConnectionFactory(factory);
//配置序列化方式
Jackson2JsonRedisSerializer jackson2JsonRedisSerializer = new Jackson2JsonRedisSerializer(Object.class);
StringRedisSerializer stringRedisSerializer = new StringRedisSerializer();
//key 采用String的序列化方式
template.setKeySerializer(stringRedisSerializer);
//hash
template.setHashKeySerializer(jackson2JsonRedisSerializer);
//value
template.setValueSerializer(jackson2JsonRedisSerializer);
template.afterPropertiesSet();
return template;
}
@Bean
public RedissonClient getRedisson() {
Config config = new Config();
config.useSingleServer().
setAddress("redis://" + redisHost + ":" + redisPort).
setPassword(redisPassword);
config.setCodec(new JsonJacksonCodec());
return Redisson.create(config);
}
@Bean(name = "oneDBRedisTemplateClient")
public RedisTemplate<String, Object> redisTemplate() {
//为了开发方便,一般直接使用<String,Object>
RedisTemplate<String, Object> template = new RedisTemplate<>();
template.setConnectionFactory(redisConnection(1));
//配置序列化方式
Jackson2JsonRedisSerializer jackson2JsonRedisSerializer = new Jackson2JsonRedisSerializer(Object.class);
StringRedisSerializer stringRedisSerializer = new StringRedisSerializer();
//key 采用String的序列化方式
template.setKeySerializer(stringRedisSerializer);
//hash
template.setHashKeySerializer(jackson2JsonRedisSerializer);
//value
template.setValueSerializer(jackson2JsonRedisSerializer);
template.afterPropertiesSet();
return template;
}
private LettuceConnectionFactory redisConnection(int db) {
RedisStandaloneConfiguration server = new RedisStandaloneConfiguration();
server.setHostName(redisHost);
server.setDatabase(db);
server.setPort(redisPort);
server.setPassword(redisPassword);
LettuceConnectionFactory factory = new LettuceConnectionFactory(server);
factory.afterPropertiesSet();
return factory;
}
}

@ -0,0 +1,31 @@
package com.keyware.composeanalysis.config;
import org.redisson.Redisson;
import org.redisson.api.RBucket;
import org.redisson.api.RedissonClient;
import org.redisson.codec.JsonJacksonCodec;
import org.redisson.config.Config;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
/**
* @author liuzongren
* @date 2024/7/30
*/
@Configuration
public class RedissionConfig {
@Value("${spring.data.redis.host}")
private String redisHost;
@Value("${spring.data.redis.port}")
private int redisPort;
@Value("${spring.data.redis.password}")
private String redisPassword;
}

@ -0,0 +1,25 @@
package com.keyware.composeanalysis.config;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.boot.context.properties.ConfigurationProperties;
import org.springframework.stereotype.Component;
/**
* @author liuzongren
* @date 2024/7/24
* description solr的配置中心
*/
@Component
@ConfigurationProperties(prefix = "solr")
public class SolrConfig {
/**
* 每次查询返回的最大行数
*/
@Value("${solr.row:5}")
private String ROWS;
@Value("${solr.solrUrl}")
private String solrUrl;
}

@ -0,0 +1,60 @@
package com.keyware.composeanalysis.config.thread;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import org.springframework.scheduling.concurrent.ThreadPoolTaskExecutor;
import java.util.concurrent.Executor;
import java.util.concurrent.ThreadPoolExecutor;
/**
* 创建线程池配置类
*/
@Configuration
public class TaskExecutePool {
/**
* 核心线程数
*/
private int coreThreadsSize = 10;
/**
* 最大线程数
*/
private int maxThreadsSize = 50;
/**
* 存活时间
*/
private int keepAliveSeconds = 60;
/**
* 队列容量
*/
private int queueCapacity = 10000;
@Bean
public Executor taskExecutor() {
ThreadPoolTaskExecutor executor = new ThreadPoolTaskExecutor();
//核心线程池大小
executor.setCorePoolSize(coreThreadsSize);
//最大线程数
executor.setMaxPoolSize(maxThreadsSize);
//队列容量
executor.setQueueCapacity(queueCapacity);
//活跃时间
executor.setKeepAliveSeconds(keepAliveSeconds);
//线程名字前缀
executor.setThreadNamePrefix("ComposeAnalysisExecutePool-");
// setRejectedExecutionHandler:当pool已经达到max size的时候,如何处理新任务
// CallerRunsPolicy:不在新线程中执行任务,而是由调用者所在的线程来执行
executor.setRejectedExecutionHandler(new ThreadPoolExecutor.CallerRunsPolicy());
// 等待所有任务结束后再关闭线程池
executor.setWaitForTasksToCompleteOnShutdown(true);
executor.initialize();
return executor;
}
}

@ -0,0 +1,340 @@
package com.keyware.composeanalysis.constant;
import java.util.*;
/**
* 常见语言缓存标识
*/
public class FixedValue {
//压缩格式
public final static Set<String> COMPRESSED_FORMAT = new HashSet<String>();
static {
COMPRESSED_FORMAT.add(".exe");
COMPRESSED_FORMAT.add(".msi");
COMPRESSED_FORMAT.add(".zip");
COMPRESSED_FORMAT.add(".rar");
COMPRESSED_FORMAT.add(".tar");
COMPRESSED_FORMAT.add(".jar");
COMPRESSED_FORMAT.add(".war");
COMPRESSED_FORMAT.add(".tar.gz");
COMPRESSED_FORMAT.add(".gz");
COMPRESSED_FORMAT.add(".tar.bz2");
COMPRESSED_FORMAT.add(".bz2");
COMPRESSED_FORMAT.add(".tar.z");
COMPRESSED_FORMAT.add(".z");
COMPRESSED_FORMAT.add(".tgz");
COMPRESSED_FORMAT.add(".7z");
COMPRESSED_FORMAT.add(".xz");
}
//分析结果类型
//许可协议对应的检测结果
public final static Map<String, String> LINCEN_RESULT = new HashMap<String, String>();
static {
LINCEN_RESULT.put("BSD", "1. 如果再发布的产品中包含源代码,则在源代码中必须带有原来代码中的BSD协议。\n" +
"2. 如果再发布的只是二进制类库/软件,则需要在类库/软件的文档和版权声明中包含原来代码中的BSD协议。\n" +
"3. 不可以用开源代码的作者/机构名字和原来产品的名字做市场推广。");
LINCEN_RESULT.put("Apache License","1. 需要给代码的用户一份Apache Licence\n" +
"2. 如果你修改了代码,需要再被修改的文件中说明。\n" +
"3. 在延伸的代码中(修改和有源代码衍生的代码中)需要带有原来代码中的协议,商标,专利声明和其他原来作者规定需要包含的说明。\n" +
"4. 如果再发布的产品中包含一个Notice文件,则在Notice文件中需要带有Apache Licence。你可以在Notice中增加自己的许可,但不可以表现为对Apache Licence构成更改。");
LINCEN_RESULT.put("GNU General Public License","不允许修改后和衍生的代码做为闭源的商业软件发布和销售");
LINCEN_RESULT.put("GNU Lesser General Public License","可以被商业软件作为类库引用并发布和销售");
LINCEN_RESULT.put("MIT","必须在你的发行版里包含原许可协议的声明");
}
//语言后缀与语言类型
public final static Map<String, String> SUFFIX_LANG = new HashMap<String, String>();
static {
SUFFIX_LANG.put(".java", "Java");
SUFFIX_LANG.put(".c", "C");
SUFFIX_LANG.put(".h", "C");
SUFFIX_LANG.put(".cpp", "Cpp");//C++
//add by 2022/07/13
SUFFIX_LANG.put(".hpp", "Cpp");//C++
SUFFIX_LANG.put(".cs", "Cs");//C#
SUFFIX_LANG.put(".m", "OC");//Objective-C
//add by 2022/07/13
SUFFIX_LANG.put(".mm", "OC");//Objective-C
SUFFIX_LANG.put(".py", "Python");
SUFFIX_LANG.put(".go", "Golang");
SUFFIX_LANG.put(".pl", "Perl");
SUFFIX_LANG.put(".rb", "Ruby");
SUFFIX_LANG.put(".php", "PHP");
SUFFIX_LANG.put(".sql", "Plsql");//PL/SQL
SUFFIX_LANG.put(".abap", "Abap");
SUFFIX_LANG.put(".lua", "Lua");
SUFFIX_LANG.put(".erl", "Erlang");
SUFFIX_LANG.put(".swift", "Swift");
SUFFIX_LANG.put(".groovy", "Groovy");
SUFFIX_LANG.put(".frm", "VB");//VB.net
SUFFIX_LANG.put(".bas", "VB");
SUFFIX_LANG.put(".cls", "VB");
SUFFIX_LANG.put(".ctl", "VB");
SUFFIX_LANG.put(".vb", "VB");
SUFFIX_LANG.put(".vbs", "VB");
SUFFIX_LANG.put(".pp", "Puppet");
SUFFIX_LANG.put(".clj", "Clojure");
SUFFIX_LANG.put(".fs", "F");//F#
SUFFIX_LANG.put(".fsx", "F");//F#
SUFFIX_LANG.put(".fsscript", "F");//F#
SUFFIX_LANG.put(".hs", "Haskell");
SUFFIX_LANG.put(".js", "Javascript");
SUFFIX_LANG.put(".ts", "Typescript");
SUFFIX_LANG.put(".r", "R");
//add by 2022/07/13
SUFFIX_LANG.put(".R", "R");
SUFFIX_LANG.put(".sc", "Scala");
SUFFIX_LANG.put(".scala", "Scala");
//add by 2022/07/13
SUFFIX_LANG.put(".pas", "Pascal");
SUFFIX_LANG.put(".cob", "Cobol");
SUFFIX_LANG.put(".as", "ActionScript");
SUFFIX_LANG.put(".rs", "Rust");
SUFFIX_LANG.put(".ino", "Arduino");
SUFFIX_LANG.put(".asm", "Assembly");
SUFFIX_LANG.put(".f", "Fortran");
SUFFIX_LANG.put(".f90", "Fortran");
SUFFIX_LANG.put(".sh", "Shell");
SUFFIX_LANG.put(".html", "Html");
SUFFIX_LANG.put(".htm", "Html");
SUFFIX_LANG.put(".css", "Css");
SUFFIX_LANG.put(".rpg", "Rpg");
SUFFIX_LANG.put(".xml", "Xml");
SUFFIX_LANG.put(".pli", "Pli");//PL/I
SUFFIX_LANG.put(".p","OpenEdge");//OpenEdge
SUFFIX_LANG.put(".abl","OpenEdge");//OpenEdge
//SUFFIX_LANG.put(".jar", "Jar");
//SUFFIX_LANG.put(".war", "War");
}
//语言后缀与语言类型
public final static Map<String, String> SUFFIX_LANG1 = new HashMap<String, String>();
static {
SUFFIX_LANG1.put(".java", "java");
SUFFIX_LANG1.put(".c", "c");
SUFFIX_LANG1.put(".h", "c");
SUFFIX_LANG1.put(".cpp", "c++");//C++
//add by 2022/07/13
SUFFIX_LANG1.put(".hpp", "c++");//C++
SUFFIX_LANG1.put(".cs", "c#");//C#
SUFFIX_LANG1.put(".m", "Objective-C");//Objective-C
//add by 2022/07/13
SUFFIX_LANG1.put(".mm", "Objective-C");//Objective-C
SUFFIX_LANG1.put(".py", "python");
SUFFIX_LANG1.put(".go", "go");
SUFFIX_LANG1.put(".pl", "perl");
SUFFIX_LANG1.put(".rb", "ruby");
SUFFIX_LANG1.put(".php", "php");
SUFFIX_LANG1.put(".sql", "plsql");//PL/SQL
SUFFIX_LANG1.put(".abap", "abap");
SUFFIX_LANG1.put(".lua", "lua");
SUFFIX_LANG1.put(".erl", "erlang");
SUFFIX_LANG1.put(".swift", "swift");
SUFFIX_LANG1.put(".groovy", "groovy");
SUFFIX_LANG1.put(".frm", "vb");//VB.net
SUFFIX_LANG1.put(".bas", "vb");
SUFFIX_LANG1.put(".cls", "vb");
SUFFIX_LANG1.put(".ctl", "vb");
SUFFIX_LANG1.put(".vb", "vb");
SUFFIX_LANG1.put(".vbs", "vb");
SUFFIX_LANG1.put(".pp", "puppet");
SUFFIX_LANG1.put(".clj", "clojure");
SUFFIX_LANG1.put(".fs", "f");//F#
SUFFIX_LANG1.put(".fsx", "f");//F#
SUFFIX_LANG1.put(".fsscript", "f");//F#
SUFFIX_LANG1.put(".hs", "haskell");
SUFFIX_LANG1.put(".js", "javaScript");
SUFFIX_LANG1.put(".ts", "typeScript");
SUFFIX_LANG1.put(".r", "r");
//add by 2022/07/13
SUFFIX_LANG1.put(".R", "r");
SUFFIX_LANG1.put(".sc", "scala");
SUFFIX_LANG1.put(".scala", "scala");
//add by 2022/07/13
SUFFIX_LANG1.put(".pas", "pascal");
SUFFIX_LANG1.put(".cob", "cobol");
SUFFIX_LANG1.put(".as", "actionScript");
SUFFIX_LANG1.put(".rs", "rust");
SUFFIX_LANG1.put(".ino", "arduino");
SUFFIX_LANG1.put(".asm", "assembly");
SUFFIX_LANG1.put(".f", "fortran");
SUFFIX_LANG1.put(".f90", "fortran");
SUFFIX_LANG1.put(".sh", "shell");
SUFFIX_LANG1.put(".html", "html");
SUFFIX_LANG1.put(".htm", "html");
SUFFIX_LANG1.put(".css", "css");
SUFFIX_LANG1.put(".rpg", "rpg");
SUFFIX_LANG1.put(".xml", "xml");
SUFFIX_LANG1.put(".pli", "pli");//PL/I
SUFFIX_LANG1.put(".p","OpenEdge");//OpenEdge
SUFFIX_LANG1.put(".abl","OpenEdge");//OpenEdge
//SUFFIX_LANG.put(".jar", "Jar");
//SUFFIX_LANG.put(".war", "War");
}
//文件后缀对应的solr特征库库名称
public final static Map<String, String> SUFFIX_SOLR_FILE = new HashMap<String, String>();
static {
SUFFIX_SOLR_FILE.put("java", "Java_CutFileInfo");
SUFFIX_SOLR_FILE.put("c", "C_CutFileInfo");
SUFFIX_SOLR_FILE.put("h", "C_CutFileInfo");
SUFFIX_SOLR_FILE.put("cpp", "Cpp_CutFileInfo");//C++
SUFFIX_SOLR_FILE.put("hpp", "Cpp_CutFileInfo");//C++
SUFFIX_SOLR_FILE.put("cs", "Cs_CutFileInfo");//C#
SUFFIX_SOLR_FILE.put("m", "OC_CutFileInfo");//Objective-C
SUFFIX_SOLR_FILE.put("mm", "OC_CutFileInfo");//Objective-C
SUFFIX_SOLR_FILE.put("py", "Python_CutFileInfo");
SUFFIX_SOLR_FILE.put("go", "Golang_CutFileInfo");
SUFFIX_SOLR_FILE.put("pl", "Perl_CutFileInfo");
SUFFIX_SOLR_FILE.put("rb", "Ruby_CutFileInfo");
SUFFIX_SOLR_FILE.put("php", "PHP_CutFileInfo");
SUFFIX_SOLR_FILE.put("sql", "Plsql_CutFileInfo");//PL/SQL
SUFFIX_SOLR_FILE.put("abap", "Abap_CutFileInfo");
SUFFIX_SOLR_FILE.put("lua", "Lua_CutFileInfo");
SUFFIX_SOLR_FILE.put("erl", "Erlang_CutFileInfo");
SUFFIX_SOLR_FILE.put("swift", "Swift_CutFileInfo");
SUFFIX_SOLR_FILE.put("groovy", "Groovy_CutFileInfo");
SUFFIX_SOLR_FILE.put("frm", "VB_CutFileInfo");//VB.net
SUFFIX_SOLR_FILE.put("bas", "VB_CutFileInfo");
SUFFIX_SOLR_FILE.put("cls", "VB_CutFileInfo");
SUFFIX_SOLR_FILE.put("ctl", "VB_CutFileInfo");
SUFFIX_SOLR_FILE.put("vb", "VB_CutFileInfo");
SUFFIX_SOLR_FILE.put("vbs", "VB_CutFileInfo");
SUFFIX_SOLR_FILE.put("pp", "Puppet_CutFileInfo");
SUFFIX_SOLR_FILE.put("clj", "Clojure_CutFileInfo");
SUFFIX_SOLR_FILE.put("fs", "F_CutFileInfo");//F#
SUFFIX_SOLR_FILE.put("fsx", "F_CutFileInfo");//F#
SUFFIX_SOLR_FILE.put("fsscript", "F_CutFileInfo");//F#
SUFFIX_SOLR_FILE.put("hs", "Haskell_CutFileInfo");
SUFFIX_SOLR_FILE.put("js", "Javascript_CutFileInfo");
SUFFIX_SOLR_FILE.put("ts", "Typescript_CutFileInfo");
SUFFIX_SOLR_FILE.put("r", "R_CutFileInfo");
SUFFIX_SOLR_FILE.put("R", "R_CutFileInfo");
SUFFIX_SOLR_FILE.put("sc", "Scala_CutFileInfo");
SUFFIX_SOLR_FILE.put("scala", "Scala_CutFileInfo");
SUFFIX_SOLR_FILE.put("pas", "Pascal_CutFileInfo");
SUFFIX_SOLR_FILE.put("cob", "Cobol_CutFileInfo");
SUFFIX_SOLR_FILE.put("as", "ActionScript_CutFileInfo");
SUFFIX_SOLR_FILE.put("rs", "Rust_CutFileInfo");
SUFFIX_SOLR_FILE.put("ino", "Arduino_CutFileInfo");
SUFFIX_SOLR_FILE.put("asm", "Assembly_CutFileInfo");
SUFFIX_SOLR_FILE.put("f", "Fortran_CutFileInfo");
SUFFIX_SOLR_FILE.put("f90", "Fortran_CutFileInfo");
SUFFIX_SOLR_FILE.put("sh", "Shell_CutFileInfo");
SUFFIX_SOLR_FILE.put("html", "Html_CutFileInfo");
SUFFIX_SOLR_FILE.put("htm", "Html_CutFileInfo");
SUFFIX_SOLR_FILE.put("css", "Css_CutFileInfo");
SUFFIX_SOLR_FILE.put("rpg", "Rpg_CutFileInfo");
SUFFIX_SOLR_FILE.put("xml", "Xml_CutFileInfo");
SUFFIX_SOLR_FILE.put("pli", "Pli_CutFileInfo");//PL/I
}
//文件后缀对应的solr文件库库名称
public final static Map<String, String> SUFFIX_SOLR_VERSION = new HashMap<String, String>();
static {
SUFFIX_SOLR_VERSION.put("java", "Java_SourceFileBase");
SUFFIX_SOLR_VERSION.put("c", "C_SourceFileBase");
SUFFIX_SOLR_VERSION.put("h", "C_SourceFileBase");
SUFFIX_SOLR_VERSION.put("cpp", "Cpp_SourceFileBase");//C++
SUFFIX_SOLR_VERSION.put("hpp", "Cpp_SourceFileBase");//C++
SUFFIX_SOLR_VERSION.put("cs", "Cs_SourceFileBase");//C#
SUFFIX_SOLR_VERSION.put("m", "OC_SourceFileBase");//Objective-C
SUFFIX_SOLR_VERSION.put("mm", "OC_SourceFileBase");//Objective-C
SUFFIX_SOLR_VERSION.put("py", "Python_SourceFileBase");
SUFFIX_SOLR_VERSION.put("go", "Golang_SourceFileBase");
SUFFIX_SOLR_VERSION.put("pl", "Perl_SourceFileBase");
SUFFIX_SOLR_VERSION.put("rb", "Ruby_SourceFileBase");
SUFFIX_SOLR_VERSION.put("php", "PHP_SourceFileBase");
SUFFIX_SOLR_VERSION.put("sql", "Plsql_SourceFileBase");//PL/SQL
SUFFIX_SOLR_VERSION.put("abap", "Abap_SourceFileBase");
SUFFIX_SOLR_VERSION.put("lua", "Lua_SourceFileBase");
SUFFIX_SOLR_VERSION.put("erl", "Erlang_SourceFileBase");
SUFFIX_SOLR_VERSION.put("swift", "Swift_SourceFileBase");
SUFFIX_SOLR_VERSION.put("groovy", "Groovy_SourceFileBase");
SUFFIX_SOLR_VERSION.put("frm", "VB_SourceFileBase");//VB.net
SUFFIX_SOLR_VERSION.put("bas", "VB_SourceFileBase");
SUFFIX_SOLR_VERSION.put("cls", "VB_SourceFileBase");
SUFFIX_SOLR_VERSION.put("ctl", "VB_SourceFileBase");
SUFFIX_SOLR_VERSION.put("vb", "VB_SourceFileBase");
SUFFIX_SOLR_VERSION.put("vbs", "VB_SourceFileBase");
SUFFIX_SOLR_VERSION.put("pp", "Puppet_SourceFileBase");
SUFFIX_SOLR_VERSION.put("clj", "Clojure_SourceFileBase");
SUFFIX_SOLR_VERSION.put("fs", "F_SourceFileBase");//F#
SUFFIX_SOLR_VERSION.put("fsx", "F_SourceFileBase");//F#
SUFFIX_SOLR_VERSION.put("fsscript", "F_SourceFileBase");//F#
SUFFIX_SOLR_VERSION.put("hs", "Haskell_SourceFileBase");
SUFFIX_SOLR_VERSION.put("js", "Javascript_SourceFileBase");
SUFFIX_SOLR_VERSION.put("ts", "Typescript_SourceFileBase");
SUFFIX_SOLR_VERSION.put("r", "R_SourceFileBase");
SUFFIX_SOLR_VERSION.put("R", "R_SourceFileBase");
SUFFIX_SOLR_VERSION.put("sc", "Scala_SourceFileBase");
SUFFIX_SOLR_VERSION.put("scala", "Scala_SourceFileBase");
SUFFIX_SOLR_VERSION.put("pas", "Pascal_SourceFileBase");
SUFFIX_SOLR_VERSION.put("cob", "Cobol_SourceFileBase");
SUFFIX_SOLR_VERSION.put("as", "ActionScript_SourceFileBase");
SUFFIX_SOLR_VERSION.put("rs", "Rust_SourceFileBase");
SUFFIX_SOLR_VERSION.put("ino", "Arduino_SourceFileBase");
SUFFIX_SOLR_VERSION.put("asm", "Assembly_SourceFileBase");
SUFFIX_SOLR_VERSION.put("f", "Fortran_SourceFileBase");
SUFFIX_SOLR_VERSION.put("f90", "Fortran_SourceFileBase");
SUFFIX_SOLR_VERSION.put("sh", "Shell_SourceFileBase");
SUFFIX_SOLR_VERSION.put("html", "Html_SourceFileBase");
SUFFIX_SOLR_VERSION.put("htm", "Html_SourceFileBase");
SUFFIX_SOLR_VERSION.put("css", "Css_SourceFileBase");
SUFFIX_SOLR_VERSION.put("rpg", "Rpg_SourceFileBase");
SUFFIX_SOLR_VERSION.put("xml", "Xml_SourceFileBase");
SUFFIX_SOLR_VERSION.put("pli", "Pli_SourceFileBase");//PL/I
// SUFFIX_SOLR_VERSION.put(".class", "mavenBinaryVersionTree");//class二进制文件
// SUFFIX_SOLR_VERSION.put(".jar", "mavenBinaryVersionTree");//class二进制文件
// SUFFIX_SOLR_VERSION.put(".dll", "nugetBinaryVersionTree");//dll二进制文件
}
public final static Map<String, String> SUFFIX_BIN = new HashMap<String, String>();
static {
SUFFIX_BIN.put(".class", "java");
SUFFIX_BIN.put(".dll", "c++");
}
public final static Map<String, String> SUFFIX_BIN_SOLR = new HashMap<String, String>();
static {
SUFFIX_BIN_SOLR.put(".class", "mavenBinaryVersionTree");
SUFFIX_BIN_SOLR.put(".dll", "nugetBinaryVersionTree");
}
public final static Map<String, String> SUFFIX_TFILE = new HashMap<String, String>();
static {
SUFFIX_TFILE.put(".c", "C");
SUFFIX_TFILE.put(".cc", "C++");
SUFFIX_TFILE.put(".cpp", "C++");
SUFFIX_TFILE.put(".cs", "C#");
}
public final static Map<String, Integer> CVE_LEVE = new HashMap<String, Integer>();
static {
CVE_LEVE.put("HIGH", 2);
CVE_LEVE.put("MEDIUM", 1);
CVE_LEVE.put("LOW", 0);
}
// //系统允许的最大执行分析个数
public static int MAX_ANALYSIS_SIZE = 0;
//
// //系统允许的最大等待个数
public static int MAX_WAIT_SIZE = 0;
// //正在执行的任务个数
public static int IN_PROGRESS_NUM = 0;
}

@ -0,0 +1,26 @@
package com.keyware.composeanalysis.constant;
/**
* @Author liuzongren
* @Date 2024/7/24
* @Description functon analysis 组件 常用常量
* @return
**/
public interface FunctionAndAnalysisAssemblyConst {
/**
* analysis 组件 行级特征提取
*/
String LINE_EXTRACT= "2";
/**
* analysis 组件 行级特征提取-按每6行 滚动提取
*/
String LINE_EXTRACT_BY_6_LINE = "1";
}

@ -0,0 +1,69 @@
package com.keyware.composeanalysis.constant;
/**
* @Author liuzongren
* @Description mongoDB 数据库 常量
* @Date 2024/7/23
* @Param
* @return
**/
public interface MongoDBConst {
/**
* mongodb KEYSWAN 数据库名称
*/
String DB_NAME_KEYSWAN = "KEYSWAN";
/**
* mongodb 每个任务数据库前缀
*/
String DB_NAME_PREFIX = "keyswan_task_";
/**
* mongodb VERSIONBASEDATA 数据表 存储开源项目的MD5等信息
*/
String TABLE_NAME_VERSIONBASEDATA = "VERSIONBASEDATA";
//非32种语言的文件库
String TABLE_NAME_SOURCE_FILE_BASE = "Other_SourceFileBase";
//版本树库
String VERSION_TREE = "versionTree";
/**
* file_data 用于存储文件分析的相关信息
*/
String TABLE_NAME_FILE_DATA = "file_data";
/**
* match_open_file 用于存储 匹配到的开源项目的信息
*/
String TABLE_NAME_MATCH_OPEN_FILE = "match_open_file";
//mongodb line_data数据库
String DB_TABLE_NAME_LINE_DATA = "line_data";
// file_data 库 isAnalyze 分析状态字段
String ANALYSIS_STATUS = "isAnalyze";
// file_data 库 isParent 是否是目录 标识
String IS_DIR = "isParent";
// file_data 库 assFlag 组件分析是否完成 标识
String ASS_FLAG = "assFlag";
// file_data 库 isSelect 0:完成文件解压,未进行任何提取 1:进入文件级特征提取 2:进入行级特征提取 3:进入成分分析特征提取
//match_open_file isSelect 0 初始状态 1 代表已经查询过
//line_hay 0:进入行级特征提取
String IS_SELECT = "isSelect";
}

@ -0,0 +1,20 @@
package com.keyware.composeanalysis.constant;
/**
* @author liuzongren
* @date 2024/7/31
* @description redis key 常量池
*/
public interface RedisConst {
/**
* 分析任务全局分布式锁前缀
*/
String TASK_LOCK_KEY_PREFIX = "ANALYSIS_TASK_LOCK_ID_%s";
/**
* 分析任务运行状态前缀
*/
String TASK_RUNNING_STATUS_KEY_PREFIX = "ANALYSIS_TASK_RUNNING_STATUS_%s";
}

@ -0,0 +1,33 @@
package com.keyware.composeanalysis.constant;
/**
* @Author liuzongren
* @Description solrDB 数据库 常量
* @Date 2024/7/24
* @Param
* @return
**/
public interface SolrDBConst {
/**
* solr versionTree 数据库名称 , 版本树 存储 开源项目的版本信息以及各个版本的文件目录信息
*/
String VERSION_TREE = "versionTree";
/**
* solr _SourceFileInfo 数据库后缀 , 开源项目-源代码信息 ,根据源文件MD5值的第一位进行区分
*/
String CORE_NAME_SUFFIX_SOURCE_FILE_INFO = "_SourceFileInfo";
/**
* solr SourceFileInfoTemp , 存储文件行特征信息
*/
String CORE_NAME_SOURCE_FILE_INFO_TEMP = "SourceFileInfoTemp";
}

@ -0,0 +1,48 @@
package com.keyware.composeanalysis.constant.enums;
/**
* 分析等级枚举对象
*/
public enum AnalysisLevelEnum {
//数据库的类型 还没有进行 更改,后面修改数据库 这里需要进行变更
//检测特征级别:0文件,1函数 2:代码块 3:行
FILE_LEVEL("文件级", 0),
FUNCTION_LEVEL("函数级", 1),
BLOCK_LEVEL("代码块级", 2),
LINE_LEVEL("行级", 3);
// 状态
private String status;
//状态码
private Integer code;
AnalysisLevelEnum(String status, Integer code) {
this.status = status;
this.code = code;
}
public String getStatus() {
return status;
}
public Integer getCode() {
return code;
}
public static AnalysisLevelEnum getAnalysisLevelEnum(Integer code){
for (AnalysisLevelEnum analysisLevelEnum : AnalysisLevelEnum.values()) {
if(analysisLevelEnum.getCode().equals(code)){
return analysisLevelEnum;
}
}
return null;
}
}

@ -0,0 +1,48 @@
package com.keyware.composeanalysis.constant.enums;
/**
* 分析状态枚举类
*/
public enum AnalysisStatusEnum {
//0:未分析 1:正在分析 2:分析完成 3:暂停分析 4:等待 5:开始分析 6:终止分析 7:分析失败
UN_ANALYSIS("未分析",0),
ANALYSISING("正在分析",1),
ANALYSIS_DONE("分析完成",2),
PAUSE_ANALYSIS("暂停分析",3),
WAIT_ANALYSIS("等待分析",4),
START_ANALYSIS("开始分析",5),
STOP_ANALYSIS("终止分析",6),
FAIL_ANALYSIS("分析失败",7);
// 状态
private String status;
//状态码
private Integer code;
AnalysisStatusEnum(String status, Integer code) {
this.status = status;
this.code = code;
}
public String getStatus() {
return status;
}
public Integer getCode() {
return code;
}
}

@ -0,0 +1,42 @@
package com.keyware.composeanalysis.constant.enums;
/**
* 文件分析状态枚举类
*/
public enum FileAnalysisStatusEnum {
//旧的文件分析状态(0:未分析;1:文件级已分析完成 2:文件级 3:行级别特征提取完成 4:分析失败 5:暂停分析 6:成分分析完成)
UN_START_ANALYSIS("未开始分析",0),
FILE_ANALYSIS_DONE("文件级已分析完成",1),
FAILED_ANALYSIS("分析失败",2),
PAUSE_ANALYSIS("暂停分析",3),
ANALYSIS_DONE("成分分析完成",4);
// 状态
private String status;
//状态码
private Integer code;
FileAnalysisStatusEnum(String status, Integer code) {
this.status = status;
this.code = code;
}
public String getStatus() {
return status;
}
public Integer getCode() {
return code;
}
}

@ -0,0 +1,119 @@
package com.keyware.composeanalysis.controller;
import com.keyware.common.constant.RedisConst;
import com.keyware.composeanalysis.api.ComposeAnalysisApi;
import com.keyware.composeanalysis.constant.enums.AnalysisStatusEnum;
import com.keyware.composeanalysis.entity.AnalysisTask;
import com.keyware.composeanalysis.response.AnalysisResp;
import com.keyware.composeanalysis.service.AnalysisTaskService;
import com.keyware.composeanalysis.util.IpUtil;
import jakarta.annotation.Resource;
import lombok.extern.log4j.Log4j2;
import org.redisson.api.RLock;
import org.redisson.api.RedissonClient;
import org.springframework.web.bind.annotation.RestController;
/**
* @author liuzongren
* @date 2024/7/30
*/
@Log4j2
@RestController
public class ComposeAnalysisController implements ComposeAnalysisApi {
@Resource
private AnalysisTaskService taskService;
@Resource
private RedissonClient redissonClient;
@Override
public AnalysisResp startComposeAnalysisTask(String taskId) {
AnalysisResp result = new AnalysisResp();
result.setNodeIp(IpUtil.getHostIp());
RLock lock = redissonClient.getLock(String.format(RedisConst.TASK_LOCK_KEY_PREFIX, taskId));
try {
//执行任务前 首先获取当前任务的锁,防止多节点并发分析同一任务
if (lock.tryLock()) {
AnalysisTask analysisTask = taskService.getById(taskId);
//校验任务是否存在
if (analysisTask == null) {
result.setCode(202);
result.setResponseMsg("当前任务不存在");
return result;
}
if (analysisTask.getAnalysisStatus().equals(AnalysisStatusEnum.ANALYSIS_DONE.getCode())) {
result.setCode(202);
result.setResponseMsg("任务已分析完成,如需重新分析,请点击重新分析");
return result;
}
//执行成分分析任务
result.setCode(200);
taskService.doComposeAnalyze(analysisTask);
result.setResponseMsg("任务执行成功");
}
} catch (Exception e) {
result.setCode(500);
result.setResponseMsg("任务执行失败");
log.error("任务执行失败", e);
lock.unlock();
}finally {
lock.unlock();
}
return result;
}
@Override
public void stopComposeAnalysisTask(String taskId) {
taskService.stopComposeAnalysisTask(taskId);
}
@Override
public AnalysisResp restartComposeAnalysisTask(String taskId) {
AnalysisResp result = new AnalysisResp();
result.setNodeIp(IpUtil.getHostIp());
AnalysisTask analysisTask = taskService.getById(taskId);
//校验任务是否存在
if (analysisTask == null) {
result.setCode(202);
result.setResponseMsg("当前任务不存在");
return result;
}
Boolean isRestart = taskService.restartComposeAnalysisTask(taskId);
if (isRestart) {
result.setCode(200);
result.setResponseMsg("任务重启成功");
} else {
result.setCode(202);
result.setResponseMsg("任务重启失败");
}
return result;
}
@Override
public AnalysisResp recoveryComposeAnalysisTask(String taskId) {
AnalysisResp result = new AnalysisResp();
result.setNodeIp(IpUtil.getHostIp());
AnalysisTask analysisTask = taskService.getById(taskId);
//校验任务是否存在
if (analysisTask == null) {
result.setCode(202);
result.setResponseMsg("当前任务不存在");
return result;
}
taskService.recoveryComposeAnalysisTask(analysisTask);
result.setCode(200);
result.setResponseMsg("任务恢复成功");
return result;
}
}

@ -0,0 +1,157 @@
package com.keyware.composeanalysis.entity;
import com.baomidou.mybatisplus.annotation.TableField;
import com.baomidou.mybatisplus.annotation.TableId;
import com.baomidou.mybatisplus.annotation.TableName;
import lombok.AllArgsConstructor;
import lombok.Data;
import lombok.NoArgsConstructor;
import lombok.experimental.Accessors;
import java.io.Serializable;
import java.util.Date;
/**
* <p>
* 成分分析任务 实体对象
* </p>
*
* @author liuzongren
* @since 2024-07-23
*/
@Data
@NoArgsConstructor
@AllArgsConstructor
@Accessors(chain = true)
@TableName("analysis_task")
public class AnalysisTask implements Serializable {
private static final long serialVersionUID = 1L;
/**
* 逻辑主键IDUUID
*/
@TableId("id")
private String id;
/**
* 被测件的名称上传文件的名称
*/
@TableField("file_name")
private String fileName;
/**
* 被测件版本
*/
@TableField("version")
private String version;
/**
* 成分分析等级
* 检测特征级别0文件1函数 2:代码块 3:
*/
@TableField("analysis_level")
private Integer analysisLevel;
/**
* 开源比例阈值当开源比例超过此阈值则判断当前文件开源
*/
@TableField("open_rate_threshold")
private Integer openRateThreshold;
/**
* 当前被测件是否是开源的
*/
@TableField("open_type")
private Boolean openType;
/**
* 被测件的md5值
*/
@TableField("md5")
private String md5;
/**
* 被测件的文件总数
*/
@TableField("file_count")
private Integer fileCount;
/**
* 成分分析的状态
*/
@TableField("analysis_status")
private Integer analysisStatus;
/**
* 分析开始时间
*/
@TableField("analysis_start_time")
private Date analysisStartTime;
/**
* 分析结束时间
*/
@TableField("analysis_end_time")
private Date analysisEndTime;
/**
* 成分分析是否完成0:未完成 1:分析中 2:已完成
*/
@TableField("compose_flag")
private Integer composeFlag;
/**
* 组件分析是否完成0:未完成 1:分析中 2:已完成
*/
@TableField("assembly_flag")
private Integer assemblyFlag;
/**
* 漏洞分析是否完成0:未完成 1:分析中 2:已完成
*/
@TableField("hold_flag")
private Integer holdFlag;
/**
* 许可证分析是否完成0:未完成 1:分析中 2:已完成
*/
@TableField("licence_flag")
private Integer licenceFlag;
/**
* 文件解压缩是否完成false:未解压 2:true
*/
@TableField("decompression_flag")
private Boolean decompressionFlag = false;
/**
* 任务创建时间
*/
@TableField("create_time")
private Date createTime;
/**
* 任务创建ID
*/
@TableField("create_user_id")
private String createUserId;
/**
* 分析总耗时
* 格式为--
*/
@TableField(exist = false)
private String analysisUsedTime;
/**
* 分析进度
* 100%
*/
@TableField(exist = false)
private String analysisProgress;
}

@ -0,0 +1,18 @@
package com.keyware.composeanalysis.mapper;
import com.baomidou.mybatisplus.core.mapper.BaseMapper;
import com.keyware.composeanalysis.entity.AnalysisTask;
import org.apache.ibatis.annotations.Mapper;
import org.apache.ibatis.annotations.Param;
import java.io.Serializable;
import java.util.List;
/**
* @author liuzongren
* @since 2024-07-23
*/
@Mapper
public interface AnalyzeTaskMapper extends BaseMapper<AnalysisTask> {
}

@ -0,0 +1,37 @@
package com.keyware.composeanalysis.mongo;
import cn.hutool.core.date.DateTime;
import lombok.Data;
import lombok.experimental.Accessors;
import org.springframework.data.annotation.Id;
import org.springframework.data.mongodb.core.mapping.Document;
import java.io.Serializable;
/**
* @author liuzongren
* @ClassName AnalysisLogMongoDto
* @description: 分析过程日志记录
* @datetime 2024年 07月 23日 18:05
* @version: 1.0
*/
@Data
@Accessors(chain =true)
@Document(collection = "analysis_log")
public class AnalysisLogMongoDto implements Serializable {
@Id
private String id;
/**
* 日志内容
*/
private String logInfo;
/**
* 创建时间
*/
private DateTime createTime;
}

@ -0,0 +1,96 @@
package com.keyware.composeanalysis.mongo;
import lombok.Data;
import org.springframework.data.annotation.Id;
import org.springframework.data.mongodb.core.mapping.Document;
import java.util.ArrayList;
import java.util.List;
/**
* AssemblyMongoDto 用于封装 MongoDB 中的装配数据文档
* 此类映射到名为 "assembly_data" MongoDB 集合
*
* @author liuzongren
* @date 2024/7/9
*/
@Document(collection = "assembly_data")
@Data
public class AssemblyMongoDto {
/**
* MongoDB 文档的唯一标识符
*/
@Id
private String id;
/**
* 项目引用文件的名称
*/
private String fileName;
/**
* 匹配到的开源组件名称
*/
private String assemblyName;
/**
* 匹配到的开源组件版本号
*/
private String assemblyVersion;
/**
* 组件的来源 URL
*/
private String url;
/**
* 开源率描述组件开源程度的指标
*/
private String semblance;
/**
* 许可证列表描述组件的许可协议
*/
private List<String> license;
/**
* 任务 ID与分析任务关联
*/
private String testFileId;
/**
* 文件 ID与特定文件关联
*/
private String fileId;
/**
* 父节点编号用于构建树形结构
*/
private int parentNode;
/**
* 节点所在的层次
*/
private int layer;
/**
* 当前节点的编号
*/
private int node;
/**
* 子节点集合用于构建树形结构
*/
private List<AssemblyMongoDto> children = new ArrayList<>();
/**
* 添加子节点的方法
*
* @param child 要添加的子节点
*/
public void addChild(AssemblyMongoDto child) {
children.add(child);
}
}

@ -0,0 +1,106 @@
package com.keyware.composeanalysis.mongo;
import lombok.Data;
import lombok.experimental.Accessors;
import org.springframework.data.annotation.Id;
import org.springframework.data.mongodb.core.mapping.Document;
import java.io.Serializable;
import java.util.Date;
@Document(collection = "file_data")
@Data
@Accessors(chain =true)
public class FileDataMongoDto implements Serializable {
@Id
private String id;
//文件名称
private String name;
//文件的MD5
private String md5;
/**
* 场景1 文件级别匹配 成功后 赋值
*/
//文件特征MD5
@Deprecated
private String cutMd5;
//文件大小, 单位是字节
private Integer fileSize;
//代码行数
private Integer codeRowNum;
//函数个数,目测无用,后期测试删除
@Deprecated
private Integer funCount;
//父级目录ID
private String pId;
//直接获取的文件hash,暂时无用
@Deprecated
private String sourceMd5;
//语言
@Deprecated
private String lang;
//标识军用代码(1:军用代码;2:民用代码)
@Deprecated
private String militaryType;
//是否是目录(0:false; 1:true)
private Boolean isDirectory;
//文件分析状态(0:未分析;1:文件级已分析同时匹配到数据 2:文件级未匹配到需要行级别提取 3:行级别提取完成 4:分析失败 5:暂停分析 6:成分分析完成)
private Integer fileAnalysisStatus;
//关联的文件ID
@Deprecated
private String testFileId;
//创建时间
private Date createTime;
//文件地址
private String fileUrl;
//文件后缀
private String suffix;
//组件分析是否完成(0:否 1:是)
private String assFlag;
//许可证分析是否完成(0:否 1:是)
private String licenceFlag;
//漏洞分析是否完成(0:否 1:是)
private String holdFlag;
//文件开源率
private Float openRate = 0f;
/**
* 场景1 文件级别匹配 使用 文件特征MD5 或者 源文件md5匹配中 文件匹配成功后直接设置为文件的总行数
*/
//文件开源代码行数
private Integer openLineCount = 0;
//文件开源类型 (false:自研 true:开源)
private Boolean openType = false;
//当前文件是否在分析中
//充当文件分析状态的第二个字段,0:刚刚解压完成
/**
* 场景1刚刚解压完成 赋值为 0
* 场景2 文件级分析完但是没有分析到结果还需要进行下一级的分析赋值为 1
*/
@Deprecated
private String isSelect;
}

@ -0,0 +1,56 @@
package com.keyware.composeanalysis.mongo;
import com.keyware.keyswan.common.LineModel;
import lombok.Data;
import lombok.experimental.Accessors;
import org.springframework.data.annotation.Id;
import org.springframework.data.mongodb.core.mapping.Document;
import java.util.List;
/**
* @author liuzongren
* @date 2024/7/25
*/
@Document(collection = "line_data")
@Data
@Accessors(chain =true)
public class LineDataMongoDto {
/**
* MongoDB 文档的唯一标识符
*/
@Id
private String id;
/**
* 文件ID
*/
private String fileId;
/**
* 状态
* 场景1: 新建的情况下赋值为 0
*/
@Deprecated
private Integer status;
/**
* 选中状态 ?
* 场景1: 新建的情况下赋值为false
*/
@Deprecated
private Boolean isSelect;
/**
* 当前文件行的特征MD5值
*/
private List<String> lineFeatueMd5s;
/**
* 当前文件,代码块的MD5值
*/
private List<LineModel> lineModels;
}

@ -0,0 +1,62 @@
package com.keyware.composeanalysis.mongo;
import lombok.Data;
import lombok.experimental.Accessors;
import org.springframework.data.annotation.Id;
import java.io.Serializable;
import java.util.List;
/**
* @author liuzongren
* @ClassName MatchOpenFile
* @description: 匹配的开源文件信息
* @datetime 2024年 07月 23日 18:05
* @version: 1.0
*/
@Data
@Accessors(chain =true)
public class MatchOpenFile implements Serializable {
//ID
@Id
private String id;
//开源项目版本名称
private String version;
//组件版本id
private String versionId;
//开源项目名称
private String pName;
//开源项目id
private String pId;
//与被测文件的特征相似度
private Float featureSimilarity;
//开源地址
private String sourceUrl;
//开源文件的详细路径
private String sourceFilePath;
//开源许可协议类型
private List<String> licenseType;
//长度
private Integer fileSize;
//文件MD5值
private String md5;
//分析类型 (0文件,1函数 2:代码块 3:行)
private int analyzeType;
//文件开源率
private Float openRate;
}

@ -0,0 +1,58 @@
package com.keyware.composeanalysis.mongo;
import lombok.Data;
import lombok.experimental.Accessors;
import org.springframework.data.annotation.Id;
import org.springframework.data.mongodb.core.mapping.Document;
import java.io.Serializable;
import java.util.List;
/**
* @author liuzongren
* @ClassName MatchOpenFile
* @description: 文件开源率匹配信息 成分分析表
* @datetime 2024年 07月 23日 18:05
* @version: 1.0
*/
@Data
@Document(collection = "match_open_file")
@Accessors(chain =true)
public class MatchOpenFileMongoDto implements Serializable {
@Id
private String id;
/**
* 文件名称
*/
private String fileName;
/**
* 文件路径
*/
@Deprecated
private String filePath;
/**
* 当前 文件 是否 开源 false:不开源 true:开源
*/
private Boolean openType;
/**
* 当前文件的开源率
*/
private float openRate;
/**
* 特征相似度
*/
private Float featureSimilarity;
/**
* 匹配的开源文件信息
*/
List<MatchOpenFile> matchOpenFile;
}

@ -0,0 +1,50 @@
package com.keyware.composeanalysis.mongo;
import lombok.Data;
import lombok.experimental.Accessors;
import org.springframework.data.annotation.Id;
import org.springframework.data.mongodb.core.mapping.Document;
import java.io.Serializable;
import java.util.List;
/**
* @author liuzongren
* @ClassName MatchOpenProjectMongoDto
* @description: 匹配的开源项目信息
* @datetime 2024年 07月 26日 18:05
* @version: 1.0
*/
@Data
@Document(collection = "match_open_project")
@Accessors(chain =true)
public class MatchOpenProjectMongoDto implements Serializable {
@Id
private String id;
//开源项目版本
private String version;
//开源项目id
private String projectId;
//开源项目名称
private String projectName;
//开源项目文件数量
private Integer projectFileNum;
//匹配到的开源项目文件数量
private Integer matchFileNum;
//开源地址
private String sourceUrl;
//匹配到的开源项目文件md5值集合
private List<String> matchFilesMd5;
//与开源项目相似度 matchFileNum / projectFileNum
private Double similarity;
}

@ -0,0 +1,75 @@
package com.keyware.composeanalysis.mongo;
import lombok.Data;
import lombok.experimental.Accessors;
import org.springframework.data.annotation.Id;
import org.springframework.data.mongodb.core.mapping.Document;
import java.util.List;
/**
* 此类映射到名为 "project_assembly" MongoDB 集合
* <p>
* 当前项目匹配到的开源项目的信息
*
* @author liuzongren
* @date 2024/7/9
*/
@Document(collection = "project_assembly")
@Data
@Accessors(chain = true)
public class ProjectAssemblyMongoDto {
/**
* MongoDB 文档的唯一标识符
*/
@Id
private String id;
/**
* 项目的文件数量
*/
private Integer fileCount;
/**
* 匹配到的开源文件的数量
*/
private Integer matchFileCount;
/**
* 匹配到的开源项目版本Id
*/
private String versionId;
/**
* 匹配到的开源项目版本名称
*/
private String versionName;
/**
* 匹配到的开源项目在开源网站的项目序号
*/
private String projectId;
/**
* 匹配到的开源项目的名称
*/
private String projectName;
/**
* 开源项目的地址
*/
private String openSourceUrl;
/**
* 被测件和当前项目的整体相似度
*/
private Double semblance;
/**
* 当前开源项目的开源协议
*/
private List<String> licenseType;
}

@ -0,0 +1,75 @@
package com.keyware.composeanalysis.mongo;
import lombok.Data;
import org.springframework.data.annotation.Id;
import org.springframework.data.mongodb.core.mapping.Document;
import org.springframework.data.mongodb.core.mapping.Field;
import java.io.Serializable;
/**
* <p>
* 项目基本信息
* </p>
*
* @author liuzongren
* @since 2024-07-23
*/
@Data
@Document(collection = "PROJECTBASEDATA")
public class ProjectBaseDataMongoDto implements Serializable {
private static final long serialVersionUID = 1L;
@Id
private String id;
/**
* 项目id
*/
@Field("ID")
private String projectId;
/**
* 项目名称
*/
@Field("NAME")
private String name;
/**
* 项目类型
*/
@Field("TYPE")
private String type;
/**
* 当前开源项目被stars 的次数
*/
@Field("STARS")
private String stars;
/**
* 项目描述
*/
@Field("DESCRIBE")
private String describe;
/**
* 项目URL
*/
@Field("URL")
private String url;
/**
* 许可类型
*/
@Field("LICENSETYPE")
private String licenseType;
/**
* 创建时间
*/
@Field("CREATE_TIME")
private String createTime;
}

@ -0,0 +1,75 @@
package com.keyware.composeanalysis.mongo;
import lombok.Data;
import lombok.EqualsAndHashCode;
import org.springframework.data.annotation.Id;
import org.springframework.data.mongodb.core.mapping.Document;
import org.springframework.data.mongodb.core.mapping.Field;
import java.io.Serializable;
/**
* <p>
* 項目的版本信息
* </p>
*
* @author liuzongren
* @since 2024-07-23
*/
@Data
@EqualsAndHashCode(callSuper = false)
@Document(collection = "VERSIONBASEDATA")
public class VersionbasedataMongoDto implements Serializable {
private static final long serialVersionUID = 1L;
/**
* 主键
*/
@Id
private String id;
/**
* 版本ID
*/
@Field("ID")
private String versionId;
/**
* pid 项目ID
*/
@Field("PID")
private String projectId;
/**
* 版本名称
*/
@Field("NAME")
private String versionName;
/**
* 版本下载地址
*/
@Field("DOWNURL")
private String downloadUrl;
/**
* 項目的相对路径
*/
@Field("PATH")
private String path;
/**
* 创建时间
*/
@Field("CREATE_TIME")
private String createTime;
/**
* 版本描述
*/
@Field("DESCRIBE")
private String description;
}

@ -0,0 +1,68 @@
//package com.keyware.composeanalysis.schedule;
//
//import cn.hutool.core.date.DateUnit;
//import cn.hutool.core.date.DateUtil;
//import com.baomidou.mybatisplus.core.conditions.query.LambdaQueryWrapper;
//import com.keyware.common.constant.enums.AnalysisStatusEnum;
//import com.keyware.composeanalysis.constant.MongoDBConst;
//import com.keyware.composeanalysis.constant.enums.FileAnalysisStatusEnum;
//import com.keyware.composeanalysis.entity.AnalysisTask;
//import com.keyware.composeanalysis.mongo.FileDataMongoDto;
//import com.keyware.composeanalysis.service.impl.AnalysisTaskServiceImpl;
//import com.keyware.composeanalysis.util.AnalysisLogUtil;
//import com.mongodb.client.MongoClient;
//import jakarta.annotation.Resource;
//import lombok.extern.log4j.Log4j2;
//import org.springframework.context.annotation.Configuration;
//import org.springframework.data.mongodb.core.MongoTemplate;
//import org.springframework.data.mongodb.core.query.Query;
//import org.springframework.scheduling.annotation.EnableScheduling;
//import org.springframework.scheduling.annotation.Scheduled;
//
//import java.util.List;
//
//import static org.springframework.data.mongodb.core.query.Criteria.where;
//
///**
// * 定时检测 分析任务是否完成
// */
//@Log4j2
//@EnableScheduling
//@Configuration
//public class AnalysisStatusSchedule {
//
// @Resource
// private AnalysisTaskServiceImpl taskService;
//
// @Resource
// private MongoClient mongoClient;
//
// /**
// * 定时查询任务库 ,看是否存在已经分析完成的任务,如果存在 变更任务的状态
// */
// @Scheduled(cron = "*/1 * * * * ?") // 每五秒钟执行一次
// public void startTask() {
// //查询正在进行成分分析的任务
// LambdaQueryWrapper<AnalysisTask> taskQueryWrapper = new LambdaQueryWrapper<>();
// taskQueryWrapper.eq(AnalysisTask::getAnalysisStatus, AnalysisStatusEnum.ANALYSISING.getCode());
// taskQueryWrapper.eq(AnalysisTask::getDecompressionFlag,true);
// taskQueryWrapper.eq(AnalysisTask::getComposeFlag,AnalysisStatusEnum.ANALYSISING.getCode());
// List<AnalysisTask> composeAnalysisTasks = taskService.list(taskQueryWrapper);
//
// //循环遍历任务状态
// for (AnalysisTask composeTask : composeAnalysisTasks) {
// MongoTemplate mongoTemplate = new MongoTemplate(mongoClient, MongoDBConst.DB_NAME_PREFIX+composeTask.getId());
//
// Query fileQuery = new Query(where("isDirectory").is(false)
// .and("fileAnalysisStatus").in(FileAnalysisStatusEnum.FAILED_ANALYSIS.getCode(),FileAnalysisStatusEnum.ANALYSIS_DONE.getCode()));
// Long finishedAnalysisFileCount = mongoTemplate.count(fileQuery, FileDataMongoDto.class);
//
// //所有文件分析完毕,将成分分析的状态 更改为已完成
// if (finishedAnalysisFileCount.intValue() == composeTask.getFileCount()){
// composeTask.setComposeFlag(AnalysisStatusEnum.ANALYSIS_DONE.getCode());
// taskService.updateById(composeTask);
// AnalysisLogUtil.insert(mongoTemplate,"成分分析已完成,耗时:"+ DateUtil.between(composeTask.getCreateTime(),DateUtil.date(), DateUnit.SECOND) +"秒");
// }
// }
// }
//}

@ -0,0 +1,56 @@
package com.keyware.composeanalysis.service;
import com.baomidou.mybatisplus.extension.service.IService;
import com.keyware.composeanalysis.entity.AnalysisTask;
import com.keyware.composeanalysis.response.AnalysisResp;
import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.PathVariable;
import org.springframework.web.bind.annotation.PostMapping;
/**
* <p>
* 服务类
* </p>
*
* @author liuzongren
* @since 2024-07-23
*/
public interface AnalysisTaskService extends IService<AnalysisTask> {
/**
* 执行成分分析
*
* @param analysisTask
*/
void doComposeAnalyze(AnalysisTask analysisTask) throws InterruptedException;
/**
* 停止或暂停分析任务
*
* @param taskId 任务id
* @return AnalysisResp 成分分析任务响应
* author liuzongren
*/
void stopComposeAnalysisTask(String taskId);
/**
* 重新分析任务,
*
* @param taskId 任务id
* @return AnalysisResp 成分分析任务响应
* author liuzongren
*/
Boolean restartComposeAnalysisTask(String taskId);
/**
* 恢复分析任务
*
* @param analysisTask 任务
* @return AnalysisResp 成分分析任务响应
* author liuzongren
*/
void recoveryComposeAnalysisTask(AnalysisTask analysisTask);
}

@ -0,0 +1,215 @@
package com.keyware.composeanalysis.service.impl;
import cn.hutool.core.date.DateUnit;
import cn.hutool.core.date.DateUtil;
import com.baomidou.mybatisplus.extension.service.impl.ServiceImpl;
import com.keyware.common.constant.RedisConst;
import com.keyware.common.constant.enums.AnalysisStatusEnum;
import com.keyware.composeanalysis.constant.MongoDBConst;
import com.keyware.composeanalysis.constant.enums.FileAnalysisStatusEnum;
import com.keyware.composeanalysis.entity.AnalysisTask;
import com.keyware.composeanalysis.mapper.AnalyzeTaskMapper;
import com.keyware.composeanalysis.mongo.FileDataMongoDto;
import com.keyware.composeanalysis.mongo.MatchOpenFileMongoDto;
import com.keyware.composeanalysis.mongo.ProjectAssemblyMongoDto;
import com.keyware.composeanalysis.service.AnalysisTaskService;
import com.keyware.composeanalysis.task.*;
import com.keyware.composeanalysis.util.AnalysisLogUtil;
import com.keyware.composeanalysis.util.RedisUtil;
import com.keyware.composeanalysis.util.SolrUtils;
import com.mongodb.client.MongoClient;
import jakarta.annotation.Resource;
import lombok.extern.log4j.Log4j2;
import org.apache.commons.collections.CollectionUtils;
import org.springframework.core.task.TaskExecutor;
import org.springframework.data.mongodb.core.MongoTemplate;
import org.springframework.data.mongodb.core.query.Query;
import org.springframework.data.mongodb.core.query.Update;
import org.springframework.scheduling.annotation.Async;
import org.springframework.stereotype.Service;
import java.math.BigDecimal;
import java.math.RoundingMode;
import java.util.List;
import java.util.concurrent.CountDownLatch;
import static org.springframework.data.mongodb.core.query.Criteria.where;
/**
* <p>
* 成分分析服务实现类
* </p>
*
* @author liuzongren
* @since 2024-07-23
*/
@Log4j2
@Service
public class AnalysisTaskServiceImpl extends ServiceImpl<AnalyzeTaskMapper, AnalysisTask> implements AnalysisTaskService {
@Resource
private MongoClient mongoClient;
@Resource
private SolrUtils solrUtils;
@Resource
private TaskExecutor taskExecutor;
@Resource
private RedisUtil redisUtil;
@Override
@Async
public void doComposeAnalyze(AnalysisTask analysisTask) throws InterruptedException {
long startTime = System.currentTimeMillis();
log.info("开始成份分析,taskName:{}",analysisTask.getFileName());
//校验文件压缩是否完成
retryGetDecompressionFlag(analysisTask);
//开始分析前,将成分分析的状态为 进行中
analysisTask.setComposeFlag(AnalysisStatusEnum.ANALYSISING.getCode());
this.updateById(analysisTask);
MongoTemplate mongoTemplate = new MongoTemplate(mongoClient, MongoDBConst.DB_NAME_PREFIX + analysisTask.getId());
AnalysisLogUtil.insert(mongoTemplate, "【成分分析】开始:" + analysisTask.getFileName());
//首先进行项目级别的分析,将所有文件的源MD5批量去solr库中匹配
PorjectAnalysisTask projectAnalysisTask = new PorjectAnalysisTask(mongoClient, analysisTask, solrUtils, this);
projectAnalysisTask.doAnalysis();
//项目级的分析完成后,没有匹配中的文件,根据分析的级别,对每个文件进行相应级别的分析
analysisFile(mongoTemplate,analysisTask);
//成份分析完成后,查询所有开源文件,判断当前项目是否开源
checkProjectIfOpen(mongoTemplate,analysisTask);
//修改成分分析状态为完成
analysisTask.setComposeFlag(AnalysisStatusEnum.ANALYSIS_DONE.getCode());
this.updateById(analysisTask);
//插入分析日志
AnalysisLogUtil.insert(mongoTemplate,"【成分分析】已完成,耗时:"+ DateUtil.between(analysisTask.getAnalysisStartTime(),DateUtil.date(), DateUnit.SECOND) +"秒");
log.info("成份分析完成,taskName:{},耗时:{}",analysisTask.getFileName(),(System.currentTimeMillis()-startTime)/1000 +"秒");
}
@Override
public void stopComposeAnalysisTask(String taskId) {
//将成分分析的任务状态的标志位置为暂停,让线程池中的排队的任务队列停止分析
redisUtil.set(String.format(RedisConst.TASK_RUNNING_STATUS_KEY_PREFIX, taskId), AnalysisStatusEnum.ANALYSIS_PAUSED.getCode());
}
@Override
public Boolean restartComposeAnalysisTask(String taskId) {
boolean result = false;
try {
//删除匹配的开源项目信息
MongoTemplate mongoTemplate = new MongoTemplate(mongoClient, MongoDBConst.DB_NAME_PREFIX + taskId);
mongoTemplate.remove(ProjectAssemblyMongoDto.class);
//删除项目匹配的开源文件
mongoTemplate.remove(MatchOpenFileMongoDto.class);
//将文件分析状态设置为未开始分析
mongoTemplate.update(FileDataMongoDto.class)
.apply(new Update().set("openType", false)
.set("fileAnalysisStatus", FileAnalysisStatusEnum.UN_START_ANALYSIS.getCode()))
.all();
//重新开始分析任务
doComposeAnalyze(getById(taskId));
result = true;
} catch (Exception e) {
log.error("重新分析失败", e);
}
return result;
}
@Override
@Async
public void recoveryComposeAnalysisTask(AnalysisTask analysisTask) {
/**
* todo 这里存在一个逻辑缺陷
* 项目级别的分析是无法终止的,当前任务恢复恢复的是文件级的成分分析如果文件级的没有分析完成这里可能会将所有文件进行文件级别的分析
*/
try {
//将成分分析的任务状态的标志位置改为进行中
redisUtil.set(String.format(RedisConst.TASK_RUNNING_STATUS_KEY_PREFIX, analysisTask.getId()), AnalysisStatusEnum.ANALYSISING.getCode());
MongoTemplate mongoTemplate = new MongoTemplate(mongoClient, MongoDBConst.DB_NAME_PREFIX + analysisTask.getId());
//项目级的分析完成后
Query unAnalyzedFileQuery = new Query(where("fileAnalysisStatus").ne(FileAnalysisStatusEnum.UN_START_ANALYSIS.getCode())
.and("isDirectory").is(false));
List<FileDataMongoDto> unAnalyzedFiles = mongoTemplate.find(unAnalyzedFileQuery, FileDataMongoDto.class);
if (CollectionUtils.isNotEmpty(unAnalyzedFiles)){
//使用线程池 并行的分析文件
CountDownLatch countDownLatch = new CountDownLatch(unAnalyzedFiles.size());
unAnalyzedFiles.parallelStream().forEach(fileDataMongoDto -> {
IAnalysisTask task = AnalysisTaskFactory.createAnalysisTask(analysisTask, fileDataMongoDto, mongoTemplate, countDownLatch);
taskExecutor.execute(task);
});
countDownLatch.await();
//修改成分分析状态为完成
analysisTask.setComposeFlag(AnalysisStatusEnum.ANALYSIS_DONE.getCode());
this.updateById(analysisTask);
AnalysisLogUtil.insert(mongoTemplate,"成分分析已完成,耗时:"+ DateUtil.between(analysisTask.getCreateTime(),DateUtil.date(), DateUnit.SECOND) +"秒");
}
} catch (Exception e) {
log.error("恢复分析失败", e);
}
}
//引入解压缩有可能会很慢,这里添加重试机制,最多重试6次,60s
private boolean retryGetDecompressionFlag(AnalysisTask analysisTask) {
int retryCount = 0;
while (retryCount < 60) {
AnalysisTask latestAnalysisTask = this.getById(analysisTask.getId());
if (latestAnalysisTask.getDecompressionFlag()) {
analysisTask.setDecompressionFlag(true);
analysisTask.setFileCount(latestAnalysisTask.getFileCount());
return true;
}
try {
Thread.sleep(1000);
} catch (InterruptedException e) {
log.error("线程休眠异常", e);
}
retryCount++;
}
return false;
}
//开启单个文件的分析
private void analysisFile(MongoTemplate mongoTemplate,AnalysisTask analysisTask) throws InterruptedException {
Query unAnalyzedFileQuery = new Query(where("fileAnalysisStatus").is(FileAnalysisStatusEnum.UN_START_ANALYSIS.getCode())
.and("isDirectory").is(false));
List<FileDataMongoDto> unAnalyzedFiles = mongoTemplate.find(unAnalyzedFileQuery, FileDataMongoDto.class);
//使用线程池 并行的分析
CountDownLatch countDownLatch = new CountDownLatch(unAnalyzedFiles.size());
unAnalyzedFiles.parallelStream().forEach(fileDataMongoDto -> {
IAnalysisTask task = AnalysisTaskFactory.createAnalysisTask(analysisTask, fileDataMongoDto, mongoTemplate, countDownLatch);
taskExecutor.execute(task);
});
countDownLatch.await();
}
//校验当前项目是否开源
private void checkProjectIfOpen(MongoTemplate mongoTemplate,AnalysisTask analysisTask){
Query openFileQuery = new Query(where("openType").is(true));
Long openFilesCount = mongoTemplate.count(openFileQuery, FileDataMongoDto.class);
//是否开源阈值
Integer openThread = analysisTask.getOpenRateThreshold();
BigDecimal totalFileCount = new BigDecimal(analysisTask.getFileCount());
//统计开源率
BigDecimal openRate = new BigDecimal(openFilesCount).divide(totalFileCount, 4, RoundingMode.HALF_UP).multiply(new BigDecimal(100));
//超过阈值认为开源
if (openRate.compareTo(new BigDecimal(openThread)) >= 0) {
analysisTask.setOpenType(true);
}
}
}

@ -0,0 +1,56 @@
package com.keyware.composeanalysis.solr;
import lombok.Data;
import lombok.experimental.Accessors;
import java.util.Arrays;
import java.util.List;
/**
* @author liuzongren
* @date 2024/7/26
* @Description solr VersionTree树信息 保存项目的整体信息
*/
@Data
@Accessors(chain = true)
public class VersionTree {
/**
* 项目ID
*/
private String proId;
/**
* 项目名称
*/
private String proName;
/**
* 项目编号
*/
private String versionId;
/**
* 项目版本
*/
private String versionName;
/**
* 项目地址
*/
private String downUrl;
public void setLicenseType(String licenseType) {
if (licenseType != null){
this.licenseType = Arrays.asList(licenseType.split("@@@"));
}
}
private List<String> licenseType;
/**
* 项目所有文件的信息
*/
private List<VersionTreeNode> dirTree;
}

@ -0,0 +1,42 @@
package com.keyware.composeanalysis.solr;
import lombok.Data;
/**
* @author liuzongren
* @date 2024/7/26
*/
@Data
public class VersionTreeNode {
/**
* 节点编号
*/
private String id;
/**
* 父节点编号
*/
private String pid;
/**
* 文件名称
*/
private String name;
/**
* 是否是父节点
*/
private Boolean isParent;
/**
* 文件的MD5
*/
private String sourceFileMd5;
/**
* 文件在项目中的相对路径
*/
private String fullPath;
}

@ -0,0 +1,45 @@
package com.keyware.composeanalysis.task;
import com.keyware.composeanalysis.constant.enums.AnalysisLevelEnum;
import com.keyware.composeanalysis.entity.AnalysisTask;
import com.keyware.composeanalysis.mongo.FileDataMongoDto;
import org.springframework.data.mongodb.core.MongoTemplate;
import java.util.concurrent.CountDownLatch;
/**
* @author liuzongren
* @date 2024/7/31
* @description
*/
public class AnalysisTaskFactory {
/**
* 根据分析类型创建具体的分析任务
* @param analysisTask
* @param analysisFile
* @param mongoTemplate
* @param countDownLatch 任务总数控制器
* @return
*/
public static IAnalysisTask createAnalysisTask(AnalysisTask analysisTask, FileDataMongoDto analysisFile, MongoTemplate mongoTemplate, CountDownLatch countDownLatch) {
AnalysisLevelEnum analysisLevel = AnalysisLevelEnum.getAnalysisLevelEnum(analysisTask.getAnalysisLevel());
switch (analysisLevel) {
case FILE_LEVEL:
return new FileAnalysisTask(analysisTask,analysisFile,mongoTemplate,countDownLatch);
case FUNCTION_LEVEL:
return new FunctionAnalysisTask(analysisTask,analysisFile,mongoTemplate,countDownLatch);
case BLOCK_LEVEL:
return new CodeBlockAnalysisTask(analysisTask,analysisFile,mongoTemplate,countDownLatch);
case LINE_LEVEL:
return new LineAnalysisTask(analysisTask,analysisFile,mongoTemplate,countDownLatch);
default:
break;
}
return null;
}
}

@ -0,0 +1,356 @@
package com.keyware.composeanalysis.task;
import cn.hutool.core.collection.CollectionUtil;
import cn.hutool.core.lang.Pair;
import com.alibaba.fastjson.JSONArray;
import com.keyware.composeanalysis.constant.FixedValue;
import com.keyware.composeanalysis.constant.FunctionAndAnalysisAssemblyConst;
import com.keyware.composeanalysis.constant.RedisConst;
import com.keyware.composeanalysis.constant.SolrDBConst;
import com.keyware.composeanalysis.constant.enums.AnalysisLevelEnum;
import com.keyware.composeanalysis.constant.enums.AnalysisStatusEnum;
import com.keyware.composeanalysis.constant.enums.FileAnalysisStatusEnum;
import com.keyware.composeanalysis.entity.AnalysisTask;
import com.keyware.composeanalysis.mongo.FileDataMongoDto;
import com.keyware.composeanalysis.mongo.LineDataMongoDto;
import com.keyware.composeanalysis.mongo.MatchOpenFile;
import com.keyware.composeanalysis.mongo.MatchOpenFileMongoDto;
import com.keyware.composeanalysis.solr.VersionTree;
import com.keyware.composeanalysis.util.*;
import com.keyware.keyswan.anaysis.Analysis;
import com.keyware.keyswan.anaysis.AnalysisFactory;
import com.keyware.keyswan.common.CodeFile;
import com.keyware.keyswan.common.LineModel;
import com.keyware.utils.IdGenerator;
import lombok.extern.log4j.Log4j2;
import org.apache.commons.lang3.StringUtils;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrDocumentList;
import org.springframework.data.mongodb.core.MongoTemplate;
import org.springframework.data.mongodb.core.query.Update;
import java.math.BigDecimal;
import java.math.RoundingMode;
import java.util.*;
import java.util.concurrent.CountDownLatch;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import static com.keyware.composeanalysis.util.SimilarityUtil.getOpenRateAndSaveRowNum;
import static org.springframework.data.mongodb.core.query.Criteria.where;
/**
* @author liuzongren
* @ClassName LineAnalysisTask
* @description: 代码块级别溯源 任务
* @datetime 2024年 07月 25日 16:19
* @version: 1.0
*/
@Log4j2
public class CodeBlockAnalysisTask extends IAnalysisTask {
private MongoTemplate mongoTemplate;
private AnalysisTask analysisTask;
//被测件的文件信息
private FileDataMongoDto analysisFile;
private SolrUtils solrUtils;
private RedisUtil redisUtil;
private CountDownLatch countDownLatch;
public CodeBlockAnalysisTask(AnalysisTask analysisTask, FileDataMongoDto analysisFile, MongoTemplate mongoTemplate, CountDownLatch countDownLatch) {
this.mongoTemplate = mongoTemplate;
this.analysisTask = analysisTask;
this.analysisFile = analysisFile;
this.countDownLatch = countDownLatch;
this.solrUtils = SpringContextUtils.getBean(SolrUtils.class);
this.redisUtil = SpringContextUtils.getBean(RedisUtil.class);
}
/**
* 方法 或者代码块 级别 源代码溯源
* 当前任务 需要在 文件级分析完成后 进行
*/
@Override
public void run() {
//执行任务前,判断一下任务执行的状态
Object status = redisUtil.get(String.format(RedisConst.TASK_RUNNING_STATUS_KEY_PREFIX, analysisTask.getId()));
if (status != null && (status.equals(AnalysisStatusEnum.STOP_ANALYSIS.getCode()) || status.equals(AnalysisStatusEnum.PAUSE_ANALYSIS.getCode()))) {
log.info("任务已取消,fileName:{}", analysisFile.getName());
countDownLatch.countDown();
return;
}
//获取文件地址
String filePath = analysisFile.getFileUrl();
//获取文件名称
String fileName = analysisFile.getName();
try {
LineDataMongoDto lineDataMongoDto = new LineDataMongoDto();
lineDataMongoDto.setFileId(analysisFile.getId());
Analysis analysis = AnalysisFactory.getAnalysis(filePath);
//将代码块特征存入MongoDB
//提取文件的代码块信息
CodeFile codeFile = analysis.analysisFile(filePath, FunctionAndAnalysisAssemblyConst.LINE_EXTRACT, FunctionAndAnalysisAssemblyConst.LINE_EXTRACT_BY_6_LINE);
List<LineModel> lineFeatures = codeFile.getLine_hay();
//根据文件后缀判断需要查询的solr特征库库名称
String featureCoreName = FixedValue.SUFFIX_SOLR_FILE.get(analysisFile.getSuffix());
//从solr库中获取特征相似的文件
SolrDocumentList matchOpenSourceFiles = getFeatureSimilarityFromSolr(featureCoreName, lineFeatures);
//计算开源率
doAnalysis(matchOpenSourceFiles, codeFile);
//更新文件表的分析状态为3 行级特征以分析完毕
analysisFile.setFileAnalysisStatus(FileAnalysisStatusEnum.ANALYSIS_DONE.getCode());
mongoTemplate.update(FileDataMongoDto.class)
.matching(where("_id").is(analysisFile.getId()))
.replaceWith(analysisFile)
.findAndReplace();
AnalysisLogUtil.insert(mongoTemplate, "【代码块级分析】完成" + fileName);
log.info("文件" + fileName + ":代码块级分析完成");
} catch (Exception e) {
AnalysisLogUtil.insertErrorInfo(mongoTemplate, "【代码块分析】失败" + fileName, e);
log.error("文件:" + fileName + "代码块级分析失败!", e);
//修改当前文件分析状态未失败
mongoTemplate.update(FileDataMongoDto.class)
.matching(where("_id").is(analysisFile.getId()))
.apply(new Update().set("fileAnalysisStatus", FileAnalysisStatusEnum.FAILED_ANALYSIS.getCode()))
.first();
} finally {
countDownLatch.countDown();
}
}
/**
* 根据 特征值 从特征库中检索 具有特征相似的
*
* @param solrCoreName 检索的solr 库名称
* @param functionAndCodeBlockInfos
* @return
*/
private SolrDocumentList getFeatureSimilarityFromSolr(String solrCoreName, List<LineModel> functionAndCodeBlockInfos) {
//获取函数获取代码块的特征MD5值
Set<String> traitLineMd5Arr = functionAndCodeBlockInfos.stream().map(LineModel::getTraitLineMd5).collect(Collectors.toSet());
Set<String> cuttLineMd5Arr = functionAndCodeBlockInfos.stream().map(LineModel::getCutLineMd5).collect(Collectors.toSet());
Set<String> queryMd5Arr = Stream.concat(traitLineMd5Arr.stream(), cuttLineMd5Arr.stream()).collect(Collectors.toSet());
String queryStr = "line_hay:(" + StringUtils.join(queryMd5Arr, " OR ") + ")";
log.info("查询条件: solrCoreName:{},queryStr:{}", solrCoreName, queryStr);
SolrDocumentList result = solrUtils.query(solrCoreName, queryStr, "sourceMd5,line_hay");
log.info("查询结果: result:{}", result);
return result;
}
/**
* 计算开源率 被测件的开源率
*
* @param matcheOpenSourceFiles 匹配的开源文件信息
* @param fileAnalysisRes 被测件的解析结果
*/
private void doAnalysis(SolrDocumentList matcheOpenSourceFiles, CodeFile fileAnalysisRes) {
if (CollectionUtil.isEmpty(matcheOpenSourceFiles)) {
return;
}
//根据文件后缀判断需要查询的文件版本库名称
String sourceFileBaseCoreName = FixedValue.SUFFIX_SOLR_VERSION.get(analysisFile.getSuffix());
//保存所有匹配的特征代码块MD5信息,方便统计总的匹配行数
Set<String> matchingTraitLineSet = new HashSet<>();
//匹配的特征代码块MD5
Set<Integer> matchedLineRowsNum = new HashSet<>();
//统计每个文件的开源率
List<MatchOpenFile> matchOpenFilesRes = calculateSimilarityAndOpenRate(matcheOpenSourceFiles, fileAnalysisRes, sourceFileBaseCoreName, matchedLineRowsNum, matchingTraitLineSet);
//计算文件的总体的特征相似度
Map<String, LineModel> traitMd5Map = fileAnalysisRes.getLine_hay().stream().collect(Collectors.toMap(LineModel::getTraitLineMd5, java.util.function.Function.identity()));
int matchCodeBlockLineCount = 0;
for (String matchFeatureFunctionMd5 : matchingTraitLineSet) {
LineModel lineModel = traitMd5Map.get(matchFeatureFunctionMd5);
matchCodeBlockLineCount += (Integer.valueOf(lineModel.getEndLine()) - Integer.valueOf(lineModel.getStartLine()));
}
BigDecimal featureSimilarity = new BigDecimal(matchCodeBlockLineCount).divide(new BigDecimal(analysisFile.getCodeRowNum()), 4, RoundingMode.HALF_UP).multiply(new BigDecimal(100)).setScale(2);
//计算文件的总体开源率
BigDecimal openRate = new BigDecimal(matchedLineRowsNum.size()).divide(new BigDecimal(analysisFile.getCodeRowNum()), 4, RoundingMode.HALF_UP).multiply(new BigDecimal(100)).setScale(2);
//获取开源率的阈值
Integer openRateThreshold = analysisTask.getOpenRateThreshold();
//如果开源率大于阈值,则将当前文件设置成开源
if (openRate.compareTo(new BigDecimal(openRateThreshold)) >= 0) {
analysisFile.setOpenType(true);
}
//保存当前文件的开源信息到mongo库中
MatchOpenFileMongoDto matchOpenFileMongo = new MatchOpenFileMongoDto();
matchOpenFileMongo.setId(IdGenerator.uuid32())
.setFilePath(analysisFile.getFileUrl())
.setFileName(analysisFile.getName())
.setFeatureSimilarity(featureSimilarity.floatValue())
.setOpenRate(openRate.floatValue())
.setOpenType(analysisFile.getOpenType())
.setMatchOpenFile(matchOpenFilesRes);
mongoTemplate.save(matchOpenFileMongo);
}
/**
* 计算当前文件的特征相似度 开源率
*
* @param matchOpenFiles 通过MD5 匹配到的所有开源文件
* @param sourceFileBaseCoreName 当前文件特征文件的 solr coreName
* @param matchLineRowsNum 所有开源文件匹配到的开源行号列表
* @param matchFeatureCodeBlockMd5s 所有开源文件匹配到的特征代码块MD5
*/
private List<MatchOpenFile> calculateSimilarityAndOpenRate(SolrDocumentList matchOpenFiles, CodeFile fileAnalysisRes, String sourceFileBaseCoreName, Set<Integer> matchLineRowsNum, Set<String> matchFeatureCodeBlockMd5s) {
List<MatchOpenFile> matchOpenFilesRes = new ArrayList<>();
//首先根据文件的MD5查询开源文件的版本ID,和路径信息
Set<String> openSourceFileMd5s = matchOpenFiles.stream().map(doc -> (String) doc.get("sourceMd5")).collect(Collectors.toSet());
Map<String, SolrDocument> md5VersionInfoMap = solrUtils.batchQueryVersionIdFromSourceFileBaseBySourceMd5(sourceFileBaseCoreName, openSourceFileMd5s);
//根据版本ID查询版本的详细信息
//todo 这里 查询一个版本的信息 需要检索 两个 solr 库 而且还需要检索 versioinTree 后面需要优化
Set<String> openSourceFileVersionIds = md5VersionInfoMap.values().stream().map(doc -> (String) doc.get("versionId")).collect(Collectors.toSet());
List<VersionTree> versionTrees = solrUtils.queryBatchVersionInfoByVersionIds(openSourceFileVersionIds);
Map<String, VersionTree> versionIdVersionInfoMap = versionTrees.stream().collect(Collectors.toMap(VersionTree::getVersionId, java.util.function.Function.identity()));
for (SolrDocument matchFile : matchOpenFiles) {
//开源文件md5
String openSourceFileMd5 = matchFile.getFieldValue("sourceMd5").toString();
//解析文件的代码块特征值
List<LineModel> openFileCodeBlockFeatureList = getOpenFileCodeBlockList(matchFile);
//匹配的总特征行数
int currentFileMatchFeatureLineCount = 0;
//遍历当前文件的代码块特征,统计匹配的总行数
for (LineModel lineModel : fileAnalysisRes.getLine_hay()) {
String traitLineMd5 = lineModel.getTraitLineMd5();
//村换匹配到的文件的行信息
for (LineModel matchLine : openFileCodeBlockFeatureList) {
if (traitLineMd5.equals(matchLine.getTraitLineMd5())) {
//计算匹配的特征行数
currentFileMatchFeatureLineCount += (Integer.valueOf(matchLine.getEndLine()) - Integer.valueOf(matchLine.getStartLine()) + 1);
matchFeatureCodeBlockMd5s.add(traitLineMd5);
}
}
}
//根据源文件的MD5确定需要查询源码库的序号
String openSourceCodeCoreIndex = openSourceFileMd5.substring(0, 1) + SolrDBConst.CORE_NAME_SUFFIX_SOURCE_FILE_INFO;
//获取开源文件的文本信息
SolrDocument openSourceContent = solrUtils.queryOne(openSourceCodeCoreIndex, "sourceFileMd5:" + openSourceFileMd5, "sourceContent");
//当前文件的开源率
Pair<Float, HashSet<Integer>> openRateAndSaveRowNum = getOpenRateAndSaveRowNum(fileAnalysisRes.getSourceFileContent(), openSourceContent.getFieldValue("sourceContent").toString());
//将当前文件匹配的行号,存储到缓存中,方便统计整体的开源率
matchLineRowsNum.addAll(openRateAndSaveRowNum.getValue());
//统计当前文件的特征相似度
BigDecimal featureSimilarity = new BigDecimal(currentFileMatchFeatureLineCount).divide(fileAnalysisRes.getCodeRowNum(), 4, RoundingMode.HALF_UP).multiply(new BigDecimal(100)).setScale(2);
SolrDocument openEntries = md5VersionInfoMap.get(openSourceFileMd5);
VersionTree versionInfo = versionIdVersionInfoMap.get(openEntries.get("versionId"));
//组装当前开源文件的开源项目信息
MatchOpenFile matchOpenFileInfo = new MatchOpenFile();
matchOpenFileInfo.setPId(versionInfo.getProId())
.setPName(versionInfo.getProName())
.setSourceUrl((String) openEntries.get("fullPath"))
.setFeatureSimilarity(featureSimilarity.floatValue())
.setOpenRate(openRateAndSaveRowNum.getKey())
.setVersion(versionInfo.getVersionName())
.setLicenseType(versionInfo.getLicenseType())
.setAnalyzeType(AnalysisLevelEnum.BLOCK_LEVEL.getCode());
matchOpenFilesRes.add(matchOpenFileInfo);
}
return matchOpenFilesRes;
}
/**
* 获取当前文件的代码块特征值
*
* @param openSourceFile
* @return
*/
private List<LineModel> getOpenFileCodeBlockList(SolrDocument openSourceFile) {
//解析文件的代码块特征值
String lineFeatureMd5s = (String) openSourceFile.get("line_hay");
lineFeatureMd5s = lineFeatureMd5s.replace("\\", "")
.replace("\"{", "{")
.replace("}\"", "}");
return JSONArray.parseArray(lineFeatureMd5s, LineModel.class);
}
/**
* 将特征值插入到mongo库中
*
* @param features 特征集合
* @param lineDataMongoDto 当前分析任务 特征信息存储
* todo 后期 看看有没有插入的必要
* @param
*/
@Deprecated
private void insertFeatureValue(List<LineModel> features, LineDataMongoDto lineDataMongoDto) {
List<LineModel> batchInsertList = new ArrayList<>();
if (CollectionUtil.isNotEmpty(features)) {
//这里的批量插入逻辑可以进行校验
//每10条存一次,解析的数据量如果过大,可能会超过MongoDB数据限制
int batchInsertStpe = 10;
int total = 0;
for (int i = 0; i < features.size(); i++) {
LineModel lineModel = features.get(i);
if (total != batchInsertStpe) {
batchInsertList.add(lineModel);
total++;
}
if (i == features.size() - 1 && total != batchInsertStpe) {
total = 0;
lineDataMongoDto.setId(IdGenerator.uuid32())
.setLineModels(batchInsertList);
mongoTemplate.insert(lineDataMongoDto);
}
if (total == batchInsertStpe) {
total = 0;
lineDataMongoDto.setId(IdGenerator.uuid32())
.setLineModels(batchInsertList);
mongoTemplate.insert(lineDataMongoDto);
batchInsertList.clear();
}
}
} else {
lineDataMongoDto.setId(IdGenerator.uuid32());
mongoTemplate.insert(lineDataMongoDto);
}
}
}

@ -0,0 +1,232 @@
package com.keyware.composeanalysis.task;
import com.keyware.composeanalysis.constant.FixedValue;
import com.keyware.composeanalysis.constant.RedisConst;
import com.keyware.composeanalysis.constant.SolrDBConst;
import com.keyware.composeanalysis.constant.enums.AnalysisLevelEnum;
import com.keyware.composeanalysis.constant.enums.AnalysisStatusEnum;
import com.keyware.composeanalysis.constant.enums.FileAnalysisStatusEnum;
import com.keyware.composeanalysis.entity.AnalysisTask;
import com.keyware.composeanalysis.mongo.FileDataMongoDto;
import com.keyware.composeanalysis.mongo.MatchOpenFile;
import com.keyware.composeanalysis.mongo.MatchOpenFileMongoDto;
import com.keyware.composeanalysis.solr.VersionTree;
import com.keyware.composeanalysis.util.*;
import com.keyware.keyswan.anaysis.Analysis;
import com.keyware.keyswan.anaysis.AnalysisFactory;
import com.keyware.keyswan.common.CodeFile;
import com.keyware.utils.IdGenerator;
import lombok.extern.log4j.Log4j2;
import org.apache.commons.collections.CollectionUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrDocumentList;
import org.springframework.data.mongodb.core.MongoTemplate;
import org.springframework.data.mongodb.core.query.Update;
import java.io.IOException;
import java.math.BigDecimal;
import java.math.RoundingMode;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.*;
import java.util.concurrent.CountDownLatch;
import java.util.function.Function;
import java.util.stream.Collectors;
import static org.springframework.data.mongodb.core.query.Criteria.where;
/**
* @author liuzongren
* @date 2024/7/23
* desc 文件级溯源分析任务
*/
@Log4j2
public class FileAnalysisTask extends IAnalysisTask {
private MongoTemplate mongoTemplate;
private AnalysisTask analysisTask;
private SolrUtils solrUtils;
//文件信息
private FileDataMongoDto analysisFile;
private RedisUtil redisUtil;
private CountDownLatch countDownLatch;
public FileAnalysisTask(AnalysisTask analysisTask, FileDataMongoDto analysisFile, MongoTemplate mongoTemplate, CountDownLatch countDownLatch) {
this.mongoTemplate = mongoTemplate;
this.analysisTask = analysisTask;
this.analysisFile = analysisFile;
this.countDownLatch = countDownLatch;
this.solrUtils = SpringContextUtils.getBean(SolrUtils.class);
this.redisUtil = SpringContextUtils.getBean(RedisUtil.class);
}
/**
* 文件级溯源分析
* 当前级别溯源分析 需要在 项目级级分析完成后执行
* 当前文件源MD5 已经在solr库中匹配不到了需要提取特征去匹配
*/
@Override
public void run() {
//执行任务前,判断一下任务执行的状态
Object status = redisUtil.get(String.format(RedisConst.TASK_RUNNING_STATUS_KEY_PREFIX, analysisTask.getId()));
if (status != null && (status.equals(AnalysisStatusEnum.STOP_ANALYSIS.getCode()) || status.equals(AnalysisStatusEnum.PAUSE_ANALYSIS.getCode()))) {
log.info("任务已取消,fileName:{}", analysisFile.getName());
countDownLatch.countDown();
return;
}
//获取当前文件名称
String fileName = analysisFile.getName();
AnalysisLogUtil.insert(mongoTemplate, "【文件级分析】正在分析" + fileName);
try {
//只有主流语言的才能解析
//非32种主流语言的不能提取文件特征,在文件级MD5匹配的时候,已经做过匹配
if (StringUtils.isNotEmpty(analysisFile.getSuffix()) && FixedValue.SUFFIX_SOLR_VERSION.containsKey(analysisFile.getSuffix())) {
//根据文件后缀 查询 *_CutFileInfo库名称
String featureCoreName = FixedValue.SUFFIX_SOLR_FILE.get(analysisFile.getSuffix());
//根据文件名称,获取文件解析器
Analysis analysis = AnalysisFactory.getAnalysis(fileName);
//如果 analysis 返回值为null 说明还未支持这种语言的特征提取 可以直接通过文件的MD5值去solr库中匹配
if (analysis != null) {
//如果文件大小超过3M,则不进行文件级行级特征提取
Integer fileSize = analysisFile.getFileSize();
if (fileSize < (3 * 1024 * 1024)) {
CodeFile codeFile = analysis.analysisFile(analysisFile.getFileUrl(), "1", "0");
//根据文件的特征值,去相应文件文件后缀的特征库中进行查询
if (codeFile != null) {
String querySb = "sourceMd5:" + codeFile.getSourceMd5() + " OR cutFileMd5:" + codeFile.getCutFileMd5() + " OR traitFileMd5:" + codeFile.getTraitFileMd5();
SolrDocumentList openSourceFileList = solrUtils.query(featureCoreName, querySb, "sourceMd5");
//如果当前文件在源码库中,匹配到了数据,则统计当前文件的开源率
if (CollectionUtils.isNotEmpty(openSourceFileList)) {
ananlyzeFileOpenRate(openSourceFileList);
}
}
}
}
}
//更新文件级分析结果
analysisFile.setFileAnalysisStatus(FileAnalysisStatusEnum.ANALYSIS_DONE.getCode());
mongoTemplate.update(FileDataMongoDto.class)
.matching(where("_id").is(analysisFile.getId()))
.replaceWith(analysisFile)
.findAndReplace();
} catch (Exception e) {
AnalysisLogUtil.insertErrorInfo(mongoTemplate, "【文件级】提取失败" + fileName, e);
log.error("文件:" + fileName + "文件级别特征提取失败!", e);
//将当前文件的分析状态变更为失败
analysisFile.setFileAnalysisStatus(FileAnalysisStatusEnum.FAILED_ANALYSIS.getCode());
//更新文件级分析结果
mongoTemplate.update(FileDataMongoDto.class)
.matching(where("_id").is(analysisFile.getId()))
.apply(new Update().set("fileAnalysisStatus", FileAnalysisStatusEnum.FAILED_ANALYSIS.getCode()))
.first();
} finally {
countDownLatch.countDown();
}
}
/**
* 分析文件的开源率
*
* @param fileList 匹配的开源文件信息
* @throws IOException
*/
private void ananlyzeFileOpenRate(SolrDocumentList fileList) throws IOException {
//创建匹配开源文件信息匹配对象
MatchOpenFileMongoDto matchOpenFileInfo = new MatchOpenFileMongoDto();
matchOpenFileInfo.setId(IdGenerator.uuid32())
.setFileName(analysisFile.getName())
.setFilePath(analysisFile.getFileUrl());
//根据匹配的开源文件的md5 获取版本ID
Set<String> sourceFileMd5 = fileList.stream().map(solrDocument -> (String) solrDocument.get("sourceMd5")).collect(Collectors.toSet());
String sourceCoreName = FixedValue.SUFFIX_SOLR_VERSION.get(analysisFile.getSuffix());
Map<String, SolrDocument> md5VersionObjMap = solrUtils.batchQueryVersionIdFromSourceFileBaseBySourceMd5(sourceCoreName, sourceFileMd5);
//根据版本ID获取版本信息
Set<String> versionIds = md5VersionObjMap.values().stream().map(solrDocument -> (String) solrDocument.get("versionId")).collect(Collectors.toSet());
List<VersionTree> treeInfoList = solrUtils.queryBatchVersionInfoByVersionIds(versionIds);
Map<String, VersionTree> versionIdMap = treeInfoList.stream().collect(Collectors.toMap(VersionTree::getVersionId, Function.identity()));
//获取被测件文本内容
String fileContent = new String(Files.readAllBytes(Paths.get(analysisFile.getFileUrl())), "utf-8").replaceAll(" ", "");
//将被测件的文本内容拆分成行信息,用于匹配开源信息
List<String> fileLines = SimilarityUtil.getSplitWords(fileContent);
HashSet<Integer> openLineNum = new HashSet<>();
//开源文件结果集合
ArrayList<MatchOpenFile> matchOpenFileList = new ArrayList<>();
//遍历匹配到的开源文件列表
for (int i = 0; i < fileList.size(); i++) {
String openFileMd5 = (String) fileList.get(i).get("sourceMd5");
SolrDocument versionObj = md5VersionObjMap.get(openFileMd5);
String versionId = (String) versionObj.get("versionId");
VersionTree versionInfo = versionIdMap.get(versionId);
if (versionInfo == null) {
log.error("未在versionTree中找到版本信息,openFileMd5:{},versionId:{}",openFileMd5, versionId);
continue;
}
MatchOpenFile matchOpenFile = new MatchOpenFile();
matchOpenFile.setId(IdGenerator.uuid32())
.setVersionId(versionId)
.setSourceFilePath((String) versionObj.get("fullPath"))
.setSourceUrl(versionInfo.getDownUrl())
.setPId(versionInfo.getProId())
.setPName(versionInfo.getProName())
.setLicenseType(versionInfo.getLicenseType())
.setAnalyzeType(AnalysisLevelEnum.FILE_LEVEL.getCode())
.setVersion(versionInfo.getVersionName())
.setFeatureSimilarity(100.00f);
//计算被测件和开源文件的文本相似度
//根据文件的MD5的第一位获取solr库索引名称
String solrNameIndex =openFileMd5.substring(0, 1) + SolrDBConst.CORE_NAME_SUFFIX_SOURCE_FILE_INFO;
SolrDocumentList sourceFileInfo = solrUtils.query(solrNameIndex, "sourceFileMd5:" + openFileMd5, "sourceContent");
if (CollectionUtils.isNotEmpty(sourceFileInfo)) {
String openSourceContent = String.valueOf(sourceFileInfo.get(0).getFieldValue("sourceContent"));
//这里存在优化空间,被测件的文件行拆分 可以拿到循环外面
double similarity = SimilarityUtil.getSimilarityAndSaveRowNum(fileLines, openSourceContent, openLineNum);
matchOpenFile.setOpenRate(new BigDecimal(similarity * 100).setScale(2, RoundingMode.HALF_UP).floatValue());
//如果找不到源代码,直接将原文开源率置为 100%
} else {
log.error("找不到源代码,DBname:{},sourceFileMd5:{}", solrNameIndex, openFileMd5);
matchOpenFile.setOpenRate(100.00f);
}
matchOpenFile.setMd5(openFileMd5);
matchOpenFileList.add(matchOpenFile);
}
//统计被测件的总体开源率
//获取开源率阈值,判断当前文件是否开源
Integer openRateThreshold = analysisTask.getOpenRateThreshold();
int openLineCount = openLineNum.size();
BigDecimal totalLineCount = new BigDecimal(fileLines.size());
BigDecimal openRate = new BigDecimal(openLineCount).divide(totalLineCount, 4, RoundingMode.HALF_UP).multiply(new BigDecimal(100));
//超过阈值,则认为当前文件是开源文件
if (openRate.compareTo(new BigDecimal(openRateThreshold)) > 0) {
analysisFile.setOpenType(true);
} else {
analysisFile.setOpenType(false);
}
//修改保存测试文件信息
analysisFile.setOpenLineCount(openLineCount)
.setOpenRate(openRate.floatValue());
//组装开源信息
matchOpenFileInfo.setFilePath(analysisFile.getFileUrl())
.setOpenType(analysisFile.getOpenType())
.setOpenRate(analysisFile.getOpenType() ? 100.00f : 0.00f)
.setMatchOpenFile(matchOpenFileList);
//保存当前开源信息数据
mongoTemplate.insert(matchOpenFileInfo);
}
}

@ -0,0 +1,409 @@
package com.keyware.composeanalysis.task;
import cn.hutool.core.collection.CollectionUtil;
import cn.hutool.core.lang.Pair;
import cn.hutool.core.util.ObjUtil;
import com.alibaba.fastjson.JSONArray;
import com.keyware.composeanalysis.constant.FixedValue;
import com.keyware.composeanalysis.constant.RedisConst;
import com.keyware.composeanalysis.constant.SolrDBConst;
import com.keyware.composeanalysis.constant.enums.AnalysisLevelEnum;
import com.keyware.composeanalysis.constant.enums.AnalysisStatusEnum;
import com.keyware.composeanalysis.constant.enums.FileAnalysisStatusEnum;
import com.keyware.composeanalysis.entity.AnalysisTask;
import com.keyware.composeanalysis.mongo.FileDataMongoDto;
import com.keyware.composeanalysis.mongo.LineDataMongoDto;
import com.keyware.composeanalysis.mongo.MatchOpenFile;
import com.keyware.composeanalysis.mongo.MatchOpenFileMongoDto;
import com.keyware.composeanalysis.solr.VersionTree;
import com.keyware.composeanalysis.util.*;
import com.keyware.keyswan.common.LineModel;
import com.keyware.keyware.anaysis.Analysis;
import com.keyware.keyware.anaysis.AnalysisFactory;
import com.keyware.keyware.common.CodeFile;
import com.keyware.keyware.common.Function;
import com.keyware.utils.IdGenerator;
import lombok.extern.log4j.Log4j2;
import org.apache.commons.lang3.StringUtils;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrDocumentList;
import org.springframework.data.mongodb.core.MongoTemplate;
import org.springframework.data.mongodb.core.query.Update;
import java.io.FileInputStream;
import java.math.BigDecimal;
import java.math.RoundingMode;
import java.util.*;
import java.util.concurrent.CountDownLatch;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import static org.springframework.data.mongodb.core.query.Criteria.where;
/**
* @author liuzongren
* @ClassName LineAnalysisTask
* @description: 函数级别溯源 任务
* @datetime 2024年 07月 25日 16:19
* @version: 1.0
*/
@Log4j2
public class FunctionAnalysisTask extends IAnalysisTask {
private MongoTemplate mongoTemplate;
private AnalysisTask analysisTask;
//被测件的文件信息
private FileDataMongoDto analysisFile;
private SolrUtils solrUtils;
private RedisUtil redisUtil;
private CountDownLatch countDownLatch;
public FunctionAnalysisTask(AnalysisTask analysisTask, FileDataMongoDto analysisFile, MongoTemplate mongoTemplate, CountDownLatch countDownLatch) {
this.mongoTemplate = mongoTemplate;
this.analysisTask = analysisTask;
this.analysisFile = analysisFile;
this.countDownLatch = countDownLatch;
this.solrUtils = SpringContextUtils.getBean(SolrUtils.class);
this.redisUtil = SpringContextUtils.getBean(RedisUtil.class);
}
/**
* 方法 或者代码块 级别 源代码溯源
* 当前任务 需要在 文件级分析完成后 进行
*/
@Override
public void run() {
//执行任务前,判断一下任务执行的状态
Object status = redisUtil.get(String.format(RedisConst.TASK_RUNNING_STATUS_KEY_PREFIX, analysisTask.getId()));
if (status != null && (status.equals(AnalysisStatusEnum.STOP_ANALYSIS.getCode()) || status.equals(AnalysisStatusEnum.PAUSE_ANALYSIS.getCode()))) {
log.info("任务已取消,fileName:{}", analysisFile.getName());
countDownLatch.countDown();
return;
}
//获取文件地址
String filePath = analysisFile.getFileUrl();
//获取文件名称
String fileName = analysisFile.getName();
try {
//根据文件后缀判断需要查询的solr特征库库名称
String featureCoreName = FixedValue.SUFFIX_SOLR_FILE.get(analysisFile.getSuffix());
//根据文件后缀,去检索sourceFileBase库,来获取文件版本信息
String sourceFileBaseCoreName = FixedValue.SUFFIX_SOLR_VERSION.get(analysisFile.getSuffix());
//根据文件的名称获取函数解析器
Analysis analysis = AnalysisFactory.getAnalysis(filePath);
//解析文件
if (!ObjUtil.hasEmpty(featureCoreName, sourceFileBaseCoreName, analysis)) {
CodeFile codeFile = analysis.analysisFile(new FileInputStream(filePath));
if (codeFile != null) {
List<Function> functionList = codeFile.getFunctionList();
if (CollectionUtil.isNotEmpty(functionList)) {
//获取函数的特征MD5,cutMD5
List<String> featureFunctionMd5List = functionList.stream().map(Function::getMd5).collect(Collectors.toList());
List<String> cutFunctionMd5List = functionList.stream().map(Function::getSourceMd5).collect(Collectors.toList());
Set<String> queryMd5List = Stream.concat(featureFunctionMd5List.stream(), cutFunctionMd5List.stream()).collect(Collectors.toSet());
String queryStr = "fun_hay:(" + StringUtils.join(queryMd5List, " OR ") + ")";
// log.info("检索函数特征,coreName:{} ,queryStr:{}", featureCoreName, queryStr);
SolrDocumentList matchOpenFiles = solrUtils.query(featureCoreName, queryStr, "sourceMd5,fun_hay");
// log.info("resp", sourceMd5);
//如果函数级特征匹配,能够匹配到开源文件信息,则根据开源文件的md5或者开源文件信息,做相似度对比
if (matchOpenFiles != null) {
//对匹配到的文件进行分析
doAnalysis(matchOpenFiles, sourceFileBaseCoreName, codeFile);
} else {
//因为函数的特征库较少,这里补充一个对比逻辑,如果当前文件解析失败,或者没有通过函数匹配到数据,则直接通过文件的md5 再次查询一次solr库
checkByOriginalFileMd5(sourceFileBaseCoreName, analysisFile.getMd5());
}
}
}
} else {
//因为函数的特征库较少,这里补充一个对比逻辑,如果当前文件解析失败,或者没有通过函数匹配到数据,则直接通过文件的md5 再次查询一次solr库
checkByOriginalFileMd5(sourceFileBaseCoreName, analysisFile.getMd5());
}
//更新文件表的分析状态为3 函数级特征以分析完毕
analysisFile.setFileAnalysisStatus(FileAnalysisStatusEnum.ANALYSIS_DONE.getCode());
mongoTemplate.update(FileDataMongoDto.class)
.matching(where("_id").is(analysisFile.getId()))
.replaceWith(analysisFile)
.findAndReplace();
AnalysisLogUtil.insert(mongoTemplate, "【函数级分析】完成" + fileName);
log.info("文件" + fileName + ":函数级分析完成");
} catch (Exception e) {
AnalysisLogUtil.insertErrorInfo(mongoTemplate, "【函数级级分析】失败" + fileName, e);
log.error("文件:" + fileName + "函数级别特征提取失败!", e);
//修改当前文件分析状态未失败
mongoTemplate.update(FileDataMongoDto.class)
.matching(where("_id").is(analysisFile.getId()))
.apply(new Update().set("fileAnalysisStatus", FileAnalysisStatusEnum.FAILED_ANALYSIS.getCode()))
.first();
} finally {
countDownLatch.countDown();
}
}
/**
* 对比函数级文本相似度
*
* @param matchOpenFiles 通过特征匹配到的开源文件的md5
* @param sourceFileBaseCoreName 查询版开源文件版本ID的 solr库名称
* @param fileAnalysisRes 被测件的函数解析结果
* @throws Exception
*/
private void doAnalysis(SolrDocumentList matchOpenFiles, String sourceFileBaseCoreName, CodeFile fileAnalysisRes) throws Exception {
//按照函数的特征md5进行分组,getter ,setter等方法的 特征值会重复
Map<String, List<Function>> featureMd5FunctionMap = fileAnalysisRes.getFunctionList().stream().collect(Collectors.groupingBy(Function::getMd5));
//函数代码总函数
int totalFunctionLineCount = fileAnalysisRes.getFunctionList().stream().mapToInt(Function::getCodeRowNum).sum();
//匹配到的特征函数Md5
Set<String> matchFeatureFunctionMd5s = new HashSet();
//匹配到源码的行号
Set<Integer> matchOpenLineRowsNum = new HashSet();
//计算与每个开源文件的开源率和特征相似度
List<MatchOpenFile> matchOpenFilesRes = calculateSimilarityAndOpenRate(matchOpenFiles, fileAnalysisRes, sourceFileBaseCoreName, matchOpenLineRowsNum, matchFeatureFunctionMd5s);
//计算文件的总体的特征相似度
int matchFunctionLineCount = 0;
for (String matchFeatureFunctionMd5 : matchFeatureFunctionMd5s) {
matchFunctionLineCount += featureMd5FunctionMap.get(matchFeatureFunctionMd5).stream().mapToInt(Function::getCodeRowNum).sum();
}
BigDecimal featureSimilarity = new BigDecimal(matchFunctionLineCount).divide(new BigDecimal(totalFunctionLineCount), 4, RoundingMode.HALF_UP).multiply(new BigDecimal(100)).setScale(2);
//计算文件的总体开源率
BigDecimal openRate = new BigDecimal(matchOpenLineRowsNum.size()).divide(new BigDecimal(analysisFile.getCodeRowNum()), 4, RoundingMode.HALF_UP).multiply(new BigDecimal(100)).setScale(2);
//获取开源率的阈值
Integer openRateThreshold = analysisTask.getOpenRateThreshold();
//如果开源率大于阈值,则将当前文件设置成开源
if (openRate.floatValue() > openRateThreshold) {
analysisFile.setOpenType(true);
}
//保存当前文件的开源信息到mongo库中
MatchOpenFileMongoDto matchOpenFileMongo = new MatchOpenFileMongoDto();
matchOpenFileMongo.setId(IdGenerator.uuid32())
.setFilePath(analysisFile.getFileUrl())
.setFileName(analysisFile.getName())
.setFeatureSimilarity(featureSimilarity.floatValue())
.setOpenRate(openRate.floatValue())
.setOpenType(analysisFile.getOpenType())
.setMatchOpenFile(matchOpenFilesRes);
mongoTemplate.save(matchOpenFileMongo);
}
/**
* 计算当前文件的特征相似度 开源率
*
* @param matchOpenFiles 通过MD5 匹配到的所有开源文件
* @param sourceFileBaseCoreName 当前文件特征文件的 solr coreName
* @param matchLineRowsNum 所有开源文件匹配到的开源行号列表
* @param matchFeatureFunctionMd5s 所有开源文件匹配到的特征函数MD5
* return 匹配的开源文件解析后的结果集
*/
private List<MatchOpenFile> calculateSimilarityAndOpenRate(SolrDocumentList matchOpenFiles, CodeFile fileAnalysisRes, String sourceFileBaseCoreName, Set<Integer> matchLineRowsNum, Set<String> matchFeatureFunctionMd5s) {
//匹配的开源文件列表
List<MatchOpenFile> matchOpenFilesRes = new ArrayList<>();
//按照函数的特征md5进行分组,getter ,setter等方法的 特征值会重复
Map<String, List<Function>> featureMd5FunctionMap = fileAnalysisRes.getFunctionList().stream().collect(Collectors.groupingBy(Function::getMd5));
//首先根据文件的MD5查询开源文件的版本ID,和路径信息
Set<String> openSourceFileMd5s = matchOpenFiles.stream().map(doc -> (String) doc.get("sourceMd5")).collect(Collectors.toSet());
Map<String, SolrDocument> md5VersionInfoMap = solrUtils.batchQueryVersionIdFromSourceFileBaseBySourceMd5(sourceFileBaseCoreName, openSourceFileMd5s);
//根据版本ID查询版本的详细信息
//todo 这里 查询一个版本的信息 需要检索 两个 solr 库 而且还需要检索 versioinTree 后面需要优化
Set<String> openSourceFileVersionIds = md5VersionInfoMap.values().stream().map(doc -> (String) doc.get("versionId")).collect(Collectors.toSet());
List<VersionTree> versionTrees = solrUtils.queryBatchVersionInfoByVersionIds(openSourceFileVersionIds);
Map<String, VersionTree> versionIdVersionInfoMap = versionTrees.stream().collect(Collectors.toMap(VersionTree::getVersionId, java.util.function.Function.identity()));
//函数总行数
BigDecimal totalFunctionLineCount = new BigDecimal(fileAnalysisRes.getFunctionList().stream().mapToInt(Function::getCodeRowNum).sum());
for (SolrDocument openSourceFile : matchOpenFiles) {
//开源文件md5
String openSourceFileMd5 = openSourceFile.getFieldValue("sourceMd5").toString();
//解析文件的函数特征值
List<Function> openFileFunctionList = getOpenFileFunctionList(openSourceFile);
//根据源文件的MD5确定需要查询源码库的序号
String openSourceCodeCoreIndex = openSourceFileMd5.substring(0, 1) + SolrDBConst.CORE_NAME_SUFFIX_SOURCE_FILE_INFO;
//获取开源文件的文本信息
SolrDocument openSourceContent = solrUtils.queryOne(openSourceCodeCoreIndex, "sourceFileMd5:" + openSourceFileMd5, "sourceContent");
//当前文件匹配特征函数总行数
int currentFileMatchFeatureLineCount = 0;
//当前文件所匹配的特征函数MD5
Set<String> currentFileMatchFeatureFunctionMd5 = new HashSet();
//遍历函数特征MD5
for (String funFeatureMd5 : featureMd5FunctionMap.keySet()) {
List<Function> currentFueatureFunctionList = featureMd5FunctionMap.get(funFeatureMd5);
//源文件的特征函数列表
for (Function openFunction : openFileFunctionList) {
if (funFeatureMd5.equals(openFunction.getMd5())) {
//每个特征函数 不能多次匹配,影响整体特征相似度
//匹配成功后,相同的特征行 一并加上
if (!currentFileMatchFeatureFunctionMd5.contains(funFeatureMd5)) {
currentFileMatchFeatureFunctionMd5.add(funFeatureMd5);
matchFeatureFunctionMd5s.add(funFeatureMd5);
currentFileMatchFeatureLineCount += currentFueatureFunctionList.stream().mapToInt(Function::getCodeRowNum).sum();
}
}
}
}
//当前文件的开源率
Pair<Float, HashSet<Integer>> openRateAndSaveRowNum = SimilarityUtil.getOpenRateAndSaveRowNum(new String(fileAnalysisRes.getFileContent()), openSourceContent.getFieldValue("sourceContent").toString());
//将当前文件匹配的行号,存储到缓存中,方便统计整体的开源率
matchLineRowsNum.addAll(openRateAndSaveRowNum.getValue());
//统计当前文件的特征相似度
BigDecimal featureSimilarity = new BigDecimal(currentFileMatchFeatureLineCount).divide(totalFunctionLineCount, 4, RoundingMode.HALF_UP).multiply(new BigDecimal(100)).setScale(2);
SolrDocument openEntries = md5VersionInfoMap.get(openSourceFileMd5);
VersionTree versionInfo = versionIdVersionInfoMap.get(openEntries.get("versionId"));
//组装当前开源文件的开源项目信息
MatchOpenFile matchOpenFileInfo = new MatchOpenFile();
matchOpenFileInfo.setPId(versionInfo.getProId())
.setPName(versionInfo.getProName())
.setSourceUrl((String) openEntries.get("fullPath"))
.setFeatureSimilarity(featureSimilarity.floatValue())
.setOpenRate(openRateAndSaveRowNum.getKey())
.setVersion(versionInfo.getVersionName())
.setLicenseType(versionInfo.getLicenseType())
.setAnalyzeType(AnalysisLevelEnum.FUNCTION_LEVEL.getCode());
matchOpenFilesRes.add(matchOpenFileInfo);
}
return matchOpenFilesRes;
}
/**
* 防止函数特征库不全再次根据文件MD5查询开源文件信息, 做二次校验
*
* @param originalFileMd5
* @param versionIdCoreName
*/
private void checkByOriginalFileMd5(String versionIdCoreName, String originalFileMd5) {
//根据文件的MD5,查询特征库,看当前文件是否在开源代码库中
SolrDocument versionIdAndPath = solrUtils.queryOne(versionIdCoreName, "sourceFileMd5:" + originalFileMd5, "versionId,fullPath,sourceFileMd5");
if (versionIdAndPath != null) {
//根据版本ID查询版本的详细信息
VersionTree versionInfo = solrUtils.queryVersionInfoByVersionId((String) versionIdAndPath.get("versionId"));
if (versionInfo != null) {
//当前开源文件的开源项目信息
MatchOpenFile matchOpenFileInfo = new MatchOpenFile();
matchOpenFileInfo.setPId(versionInfo.getProId())
.setPName(versionInfo.getProName())
.setSourceUrl(versionInfo.getDownUrl())
.setFeatureSimilarity(100.00f)
.setOpenRate(100.00f)
.setAnalyzeType(AnalysisLevelEnum.FUNCTION_LEVEL.getCode());
//保存当前文件的开源信息到mongo库中
MatchOpenFileMongoDto matchOpenFileMongo = new MatchOpenFileMongoDto();
matchOpenFileMongo.setId(IdGenerator.uuid32())
.setFilePath(analysisFile.getFileUrl())
.setFileName(analysisFile.getName())
.setOpenRate(100.00f)
.setOpenType(analysisFile.getOpenType())
.setMatchOpenFile(Arrays.asList(matchOpenFileInfo));
mongoTemplate.save(matchOpenFileMongo);
}
}
}
/**
* 获取当前文件的函数特征值
*
* @param matchOpenFile
* @return
*/
private List<Function> getOpenFileFunctionList(SolrDocument matchOpenFile) {
try {
//解析文件的函数特征值
String lineFeatureMd5s = matchOpenFile.getFieldValue("fun_hay").toString();
lineFeatureMd5s = lineFeatureMd5s.replace("\\", "")
.replace("\"{", "{")
.replace("}\"", "}");
return JSONArray.parseArray(lineFeatureMd5s, Function.class);
}catch (Exception e){
log.error("解析文件特征值失败",e);
}
return new ArrayList<Function>();
}
/**
* 将特征值插入到mongo库中
*
* @param features 特征集合
* @param lineDataMongoDto 当前分析任务 特征信息存储
* @param
*/
@Deprecated
private void insertFeatureValue(List<LineModel> features, LineDataMongoDto lineDataMongoDto) {
List<LineModel> batchInsertList = new ArrayList<>();
if (CollectionUtil.isNotEmpty(features)) {
//这里的批量插入逻辑可以进行校验
//每10条存一次,解析的数据量如果过大,可能会超过MongoDB数据限制
int batchInsertStpe = 10;
int total = 0;
for (int i = 0; i < features.size(); i++) {
LineModel lineModel = features.get(i);
if (total != batchInsertStpe) {
batchInsertList.add(lineModel);
total++;
}
if (i == features.size() - 1 && total != batchInsertStpe) {
total = 0;
lineDataMongoDto.setId(IdGenerator.uuid32())
.setLineModels(batchInsertList);
mongoTemplate.insert(lineDataMongoDto);
}
if (total == batchInsertStpe) {
total = 0;
lineDataMongoDto.setId(IdGenerator.uuid32())
.setLineModels(batchInsertList);
mongoTemplate.insert(lineDataMongoDto);
batchInsertList.clear();
}
}
} else {
lineDataMongoDto.setId(IdGenerator.uuid32());
mongoTemplate.insert(lineDataMongoDto);
}
}
}

@ -0,0 +1,10 @@
package com.keyware.composeanalysis.task;
/**
* @author liuzongren
* @date 2024/7/31
* @description 分析任务抽象接口
*/
public abstract class IAnalysisTask implements Runnable{
}

@ -0,0 +1,298 @@
package com.keyware.composeanalysis.task;
import cn.hutool.core.collection.CollectionUtil;
import com.keyware.composeanalysis.constant.FixedValue;
import com.keyware.composeanalysis.constant.FunctionAndAnalysisAssemblyConst;
import com.keyware.composeanalysis.constant.RedisConst;
import com.keyware.composeanalysis.constant.SolrDBConst;
import com.keyware.composeanalysis.constant.enums.AnalysisLevelEnum;
import com.keyware.composeanalysis.constant.enums.AnalysisStatusEnum;
import com.keyware.composeanalysis.constant.enums.FileAnalysisStatusEnum;
import com.keyware.composeanalysis.entity.AnalysisTask;
import com.keyware.composeanalysis.mongo.FileDataMongoDto;
import com.keyware.composeanalysis.mongo.LineDataMongoDto;
import com.keyware.composeanalysis.mongo.MatchOpenFile;
import com.keyware.composeanalysis.mongo.MatchOpenFileMongoDto;
import com.keyware.composeanalysis.solr.VersionTree;
import com.keyware.composeanalysis.util.AnalysisLogUtil;
import com.keyware.composeanalysis.util.RedisUtil;
import com.keyware.composeanalysis.util.SolrUtils;
import com.keyware.composeanalysis.util.SpringContextUtils;
import com.keyware.keyswan.anaysis.Analysis;
import com.keyware.keyswan.anaysis.AnalysisFactory;
import com.keyware.keyswan.common.CodeFile;
import com.keyware.utils.IdGenerator;
import lombok.extern.log4j.Log4j2;
import org.apache.commons.lang3.StringUtils;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrDocumentList;
import org.springframework.data.mongodb.core.MongoTemplate;
import org.springframework.data.mongodb.core.query.Update;
import java.math.BigDecimal;
import java.math.RoundingMode;
import java.util.*;
import java.util.concurrent.CountDownLatch;
import static org.springframework.data.mongodb.core.query.Criteria.where;
/**
* @author liuzongren
* @ClassName LineAnalysisTask
* @description: 行级别 特征提取定时任务
* @datetime 2024年 07月 25日 16:19
* @version: 1.0
*/
@Log4j2
public class LineAnalysisTask extends IAnalysisTask {
private MongoTemplate mongoTemplate;
private AnalysisTask analysisTask;
//被测件的文件信息
private FileDataMongoDto analysisFile;
private SolrUtils solrUtils;
private RedisUtil redisUtil;
private CountDownLatch countDownLatch;
public LineAnalysisTask(AnalysisTask analysisTask, FileDataMongoDto analysisFile, MongoTemplate mongoTemplate, CountDownLatch countDownLatch) {
this.mongoTemplate = mongoTemplate;
this.analysisTask = analysisTask;
this.analysisFile = analysisFile;
this.countDownLatch = countDownLatch;
this.solrUtils = SpringContextUtils.getBean(SolrUtils.class);
this.redisUtil = SpringContextUtils.getBean(RedisUtil.class);
}
/**
* 行级别 源代码溯源
* 当前任务 需要在 文件级分析完成后 进行
*/
@Override
public void run() {
//执行任务前,判断一下任务执行的状态
Object status = redisUtil.get(String.format(RedisConst.TASK_RUNNING_STATUS_KEY_PREFIX, analysisTask.getId()));
if (status != null && (status.equals(AnalysisStatusEnum.STOP_ANALYSIS.getCode()) || status.equals(AnalysisStatusEnum.PAUSE_ANALYSIS.getCode()))) {
log.info("任务已取消,fileName:{}", analysisFile.getName());
countDownLatch.countDown();
return;
}
//获取文件地址
String filePath = analysisFile.getFileUrl();
//获取文件名称
String fileName = analysisFile.getName();
AnalysisLogUtil.insert(mongoTemplate, "【行级特征提取】正在提取" + fileName);
try {
LineDataMongoDto lineDataMongoDto = new LineDataMongoDto();
lineDataMongoDto.setFileId(analysisFile.getId())
.setStatus(0)
.setIsSelect(false);
Analysis analysis = AnalysisFactory.getAnalysis(filePath);
CodeFile codeFile = null;
//获取文件行级特征md5
codeFile = analysis.analysisFile(filePath, FunctionAndAnalysisAssemblyConst.LINE_EXTRACT, FunctionAndAnalysisAssemblyConst.LINE_EXTRACT);
//每一行原内容MD5值集合
// String cutFileLineMd5 = codeFile.getCutFileLineMd5();
//每一行特征内容MD5值集合
String traitFileLineMd5 = codeFile.getTraitFileLineMd5();
String[] featureMd5Arr = {};
if (StringUtils.isNotBlank(traitFileLineMd5)) {
featureMd5Arr = traitFileLineMd5.split(",");
}
List<String> lineFeatures = Arrays.asList(featureMd5Arr);
//从solr中获取特征相似的 文件
SolrDocumentList featureSimilarityFromSolr = getFeatureSimilarityFromSolr(lineFeatures);
//计算文件的开源率
calculateOpenRate(featureSimilarityFromSolr, lineFeatures);
//更新文件表的分析状态为3 行级特征以分析完毕
analysisFile.setFileAnalysisStatus(FileAnalysisStatusEnum.ANALYSIS_DONE.getCode());
mongoTemplate.update(FileDataMongoDto.class)
.matching(where("_id").is(analysisFile.getId()))
.replaceWith(analysisFile)
.findAndReplace();
AnalysisLogUtil.insert(mongoTemplate, "【行级特征提取】提取完成" + fileName);
log.info("文件" + fileName + ":行级分析完成");
} catch (Exception e) {
AnalysisLogUtil.insertErrorInfo(mongoTemplate, "【行级特征提取】提取失败" + fileName, e);
log.error("文件:" + fileName + "行级别特征提取失败!", e);
//修改当前文件分析状态未失败
mongoTemplate.update(FileDataMongoDto.class)
.matching(where("_id").is(analysisFile.getId()))
.apply(new Update().set("fileAnalysisStatus", FileAnalysisStatusEnum.FAILED_ANALYSIS.getCode()))
.first();
} finally {
countDownLatch.countDown();
}
}
/**
* 计算开源率 被测件的开源率
*
* @param matcheOpenSourceFiles
* @param lineFeatures
*/
private void calculateOpenRate(SolrDocumentList matcheOpenSourceFiles, List<String> lineFeatures) {
if (CollectionUtil.isEmpty(matcheOpenSourceFiles)) {
return;
}
//根据文件后缀判断需要查询的文件版本库名称
String versionIdCoreName = FixedValue.SUFFIX_SOLR_VERSION.get(analysisFile.getSuffix());
//定义结果集对象
MatchOpenFileMongoDto matchOpenFileMongo = new MatchOpenFileMongoDto();
matchOpenFileMongo.setId(IdGenerator.uuid32())
.setFilePath(analysisFile.getFileUrl())
.setFileName(analysisFile.getName());
//开源文件信息保存结果集
List<MatchOpenFile> matchOpenFileInfoList = new ArrayList<>();
//保存所有匹配的行数信息,方便统计总的匹配行数
Set<String> matchingLineSet = new HashSet<>();
//获取文件总行数
BigDecimal totalCodeRowNum = new BigDecimal(analysisFile.getCodeRowNum());
//统计每个开源文件和被测件的匹配行数
for (SolrDocument matchFile : matcheOpenSourceFiles) {
//解析文件的代码块特征值
String lineFeatureMd5s = (String) matchFile.get("tz_line_hay");
List<String> matchedLineFeatures = Arrays.asList(lineFeatureMd5s.split(","));
//匹配的总行数
int currentFileMatchLineCount = 0;
//遍历当前文件的代码块特征,统计匹配的总行数
for (String originalLineFeatureMd5 : lineFeatures) {
for (String matchLineFeatureMd5 : matchedLineFeatures) {
if (originalLineFeatureMd5.equals(matchLineFeatureMd5)) {
currentFileMatchLineCount++;
matchingLineSet.add(originalLineFeatureMd5);
}
}
}
//首先根据文件的MD5查询开源文件的版本ID,和路径信息
SolrDocument versionIdAndPath = solrUtils.queryOne(versionIdCoreName, "sourceFileMd5:" + matchFile.get("sourceMd5"), "versionId,fullPath,sourceFileMd5");
//根据版本ID查询版本的详细信息
//todo 这里 查询一个版本的信息 需要检索 两个 solr 库 而且还需要检索 versioinTree 后面需要优化
VersionTree versionInfo = solrUtils.queryVersionInfoByVersionId((String) versionIdAndPath.get("versionId"));
//计算与当前开源文件的开源率
BigDecimal openRate = new BigDecimal(currentFileMatchLineCount).divide(totalCodeRowNum, 4, RoundingMode.HALF_UP).multiply(new BigDecimal(100));
//当前开源文件的开源项目信息
MatchOpenFile matchOpenFileInfo = new MatchOpenFile();
matchOpenFileInfo.setPId(versionInfo.getProId())
.setPName(versionInfo.getProName())
.setSourceUrl(versionInfo.getDownUrl())
.setOpenRate(openRate.floatValue())
.setVersion(versionInfo.getVersionName())
.setLicenseType(versionInfo.getLicenseType())
.setAnalyzeType(AnalysisLevelEnum.LINE_LEVEL.getCode());
matchOpenFileInfoList.add(matchOpenFileInfo);
}
//统计当前文件的整体开源率
BigDecimal openRate = new BigDecimal(matchingLineSet.size()).divide(totalCodeRowNum, 4, RoundingMode.HALF_UP).multiply(new BigDecimal(100));
//获取开源率的阈值
Integer openRateThreshold = analysisTask.getOpenRateThreshold();
//如果开源率大于阈值,则将当前文件设置成开源
if (openRate.compareTo(new BigDecimal(openRateThreshold)) >= 0) {
analysisFile.setOpenType(true);
}
//保存当前文件的开源信息
matchOpenFileMongo.setOpenType(analysisFile.getOpenType())
.setMatchOpenFile(matchOpenFileInfoList);
mongoTemplate.save(matchOpenFileMongo);
}
/**
* 将特征值插入到mongo库中
*
* @param features 特征集合
* @param lineDataMongoDto 当前分析任务 特征信息存储
* todo 后期 看看有没有插入的必要
* @param
*/
@Deprecated
private void insertFeatureValue(String features, LineDataMongoDto lineDataMongoDto) {
String[] featureMd5Arr = {};
if (StringUtils.isNotBlank(features)) {
featureMd5Arr = features.split(",");
}
List<String> lineFeatures = Arrays.asList(featureMd5Arr);
List<String> batchInsertList = new ArrayList<>();
if (CollectionUtil.isNotEmpty(lineFeatures)) {
//这里的批量插入逻辑可以进行校验
//每10条存一次,解析的数据量如果过大,可能会超过MongoDB数据限制
int batchInsertStpe = 5000;
int total = 0;
for (int i = 0; i < lineFeatures.size(); i++) {
if (total != batchInsertStpe) {
batchInsertList.add(lineFeatures.get(i));
total++;
}
if (i == lineFeatures.size() - 1 && total != batchInsertStpe) {
total = 0;
lineDataMongoDto.setId(IdGenerator.uuid32())
.setLineFeatueMd5s(batchInsertList);
mongoTemplate.insert(lineDataMongoDto);
}
if (total == batchInsertStpe) {
total = 0;
lineDataMongoDto.setId(IdGenerator.uuid32())
.setLineFeatueMd5s(batchInsertList);
mongoTemplate.insert(lineDataMongoDto);
batchInsertList.clear();
}
}
} else {
lineDataMongoDto.setId(IdGenerator.uuid32());
mongoTemplate.insert(lineDataMongoDto);
}
}
/**
* 根据 特征值 从特征库中检索 具有特征相似的
*
* @param lineFeatureList 行特征信息
* @return
*/
private SolrDocumentList getFeatureSimilarityFromSolr(List<String> lineFeatureList) {
String solrCoreName = SolrDBConst.CORE_NAME_SOURCE_FILE_INFO_TEMP;
//拼接行特征查询条件
String queryStr = "tz_line_hay:(" + StringUtils.join(lineFeatureList, " OR ") + ")";
log.info("查询条件: solrCoreName:{},queryStr:{}", solrCoreName, queryStr);
SolrDocumentList result = solrUtils.query(solrCoreName, queryStr, "sourceMd5,tz_line_hay");
log.info("查询结果: result:{}", result);
return result;
}
}

@ -0,0 +1,378 @@
package com.keyware.composeanalysis.task;
import cn.hutool.core.collection.CollectionUtil;
import com.google.common.collect.Sets;
import com.keyware.composeanalysis.constant.FixedValue;
import com.keyware.composeanalysis.constant.MongoDBConst;
import com.keyware.composeanalysis.constant.enums.AnalysisLevelEnum;
import com.keyware.composeanalysis.constant.enums.AnalysisStatusEnum;
import com.keyware.composeanalysis.constant.enums.FileAnalysisStatusEnum;
import com.keyware.composeanalysis.entity.AnalysisTask;
import com.keyware.composeanalysis.mongo.*;
import com.keyware.composeanalysis.service.impl.AnalysisTaskServiceImpl;
import com.keyware.composeanalysis.solr.VersionTree;
import com.keyware.composeanalysis.solr.VersionTreeNode;
import com.keyware.composeanalysis.util.AnalysisLogUtil;
import com.keyware.composeanalysis.util.SolrUtils;
import com.keyware.composeanalysis.util.SpringContextUtils;
import com.keyware.utils.IdGenerator;
import com.mongodb.client.MongoClient;
import lombok.extern.log4j.Log4j2;
import org.apache.commons.collections.CollectionUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.solr.common.SolrDocument;
import org.springframework.core.task.TaskExecutor;
import org.springframework.data.mongodb.core.MongoTemplate;
import org.springframework.data.mongodb.core.query.Query;
import org.springframework.data.mongodb.core.query.Update;
import java.math.BigDecimal;
import java.math.RoundingMode;
import java.util.*;
import java.util.function.Function;
import java.util.stream.Collectors;
import static org.springframework.data.mongodb.core.query.Criteria.where;
/**
* @author liuzongren
* @date 2024/7/26
* desc 项目级溯源分析任务先将所有文件进行项目级匹配匹配不中的文件在进行细致级别的匹配
* 项目级匹配前需要完成文件解压工作
*/
@Log4j2
public class PorjectAnalysisTask {
private MongoTemplate mongoTemplate;
private MongoTemplate keyswanDBTemplate;
private AnalysisTask analysisTask;
private AnalysisTaskServiceImpl analysisService;
private SolrUtils solrUtils;
private TaskExecutor taskExecutor;
/**
* 项目级分析
*
* @param mongoClient
* @param analysisTask
* @param solrUtils
* @param analysisService
*/
public PorjectAnalysisTask(MongoClient mongoClient, AnalysisTask analysisTask, SolrUtils solrUtils, AnalysisTaskServiceImpl analysisService) {
this.analysisService = analysisService;
keyswanDBTemplate = new MongoTemplate(mongoClient, MongoDBConst.DB_NAME_KEYSWAN);
this.mongoTemplate = new MongoTemplate(mongoClient, MongoDBConst.DB_NAME_PREFIX + analysisTask.getId());
this.analysisTask = analysisTask;
this.solrUtils = solrUtils;
this.taskExecutor = SpringContextUtils.getBean(TaskExecutor.class);
}
public void doAnalysis() {
try {
long startTime = System.currentTimeMillis();
//首先从versionbasedata库中匹配当前被测件
Boolean matchedPrject = matchByProjectMd5();
//从versionbase 中整体匹配不到项目信息, 拿项目的所有文件去匹配 solr库的versionTree去检索
if (!matchedPrject) {
List<FileDataMongoDto> unMatchedFiles = matchByAllFilesMd5();
//剩余没有匹配文件,用文件的md5去匹配solr库的versionTree
if (CollectionUtils.isNotEmpty(unMatchedFiles)) {
matchByFileMd5s(unMatchedFiles);
}
}
//todo 如果整体耗时较长,將matchOpenFileInfo存储到数据库的逻辑修改成异步的
log.info("项目级分析完成,用时:" + (System.currentTimeMillis() - startTime) / 1000 + "s");
} catch (Exception e) {
AnalysisLogUtil.insert(mongoTemplate, "成分分析失败:" + e.getStackTrace());
log.error("项目级分析失败,项目名称:" + analysisTask.getFileName(), e);
analysisTask.setAnalysisStatus(AnalysisStatusEnum.FAIL_ANALYSIS.getCode());
analysisService.updateById(analysisTask);
}
}
//项目整体匹配, 查看整个项目是否开源
private Boolean matchByProjectMd5() {
//判断当前项目整体是否开源,去版本库中匹配
Query versionBaseQuery = new Query(where("MD5").is(analysisTask.getMd5()));
VersionbasedataMongoDto openSourceProject = keyswanDBTemplate.findOne(versionBaseQuery, VersionbasedataMongoDto.class);
//如果匹配中了开源项目,则将状态设置为开源,并将结果存储到数据库中
if (openSourceProject != null) {
analysisTask.setOpenType(true);
//将匹配中的开源项目信息存入当前文件开源信息中
ProjectAssemblyMongoDto projectAssembly = new ProjectAssemblyMongoDto();
projectAssembly.setId(IdGenerator.uuid32())
.setFileCount(analysisTask.getFileCount())
.setMatchFileCount(analysisTask.getFileCount())
.setProjectId(openSourceProject.getProjectId())
.setVersionId(openSourceProject.getVersionId())
.setVersionName(openSourceProject.getVersionName())
.setSemblance(100.00d)
.setOpenSourceUrl(openSourceProject.getDownloadUrl());
//根据版本信息去查询项目名称和许可证信息
Query projectBaseQuery = new Query(where("ID").is(openSourceProject.getProjectId()));
ProjectBaseDataMongoDto projectbasedata = keyswanDBTemplate.findOne(projectBaseQuery, ProjectBaseDataMongoDto.class);
if (projectbasedata != null) {
projectAssembly.setProjectName(projectbasedata.getName());
if (StringUtils.isNotEmpty(projectbasedata.getLicenseType())) {
projectAssembly.setLicenseType(Arrays.asList(projectbasedata.getLicenseType()));
}
}
//当前文件开源信息存入数据库中
mongoTemplate.insert(projectAssembly);
analysisService.updateById(analysisTask);
//更新文件分析的状态
mongoTemplate.update(FileDataMongoDto.class)
.matching(where("isDirectory").is(false))
.apply(new Update().set("openType", true)
.set("openRate", 100.00d)
.set("fileAnalysisStatus", FileAnalysisStatusEnum.ANALYSIS_DONE.getCode()))
.all();
//保存具体开源文件信息
VersionTree openProjectList = solrUtils.queryVersionTreeByVersionId(openSourceProject.getVersionId());
Query fileQuery = new Query(where("isDirectory").is(false));
List<FileDataMongoDto> fileDataMongoDtos = mongoTemplate.find(fileQuery, FileDataMongoDto.class);
saveProjectOpenInfo(openProjectList, fileDataMongoDtos);
return true;
}
return false;
}
//通过文件的md5 去匹配开源项目
private List<FileDataMongoDto> matchByAllFilesMd5() {
//检索当前项目的所有文件
Query query = new Query(where("isDirectory").is(false));
List<FileDataMongoDto> projectFiles = mongoTemplate.query(FileDataMongoDto.class).matching(query).all();
//分多次拿所有文件匹配solr库
Set<String> matchedFileMd5Set = multipleMatchByAllFilesMd5(projectFiles);
//统计未匹配的文件
List<FileDataMongoDto> unMatchedFiles = projectFiles.stream().filter(file -> !matchedFileMd5Set.contains(file.getMd5())).collect(Collectors.toList());
return unMatchedFiles;
}
//通过文件的md5去特征库匹配
private void matchByFileMd5s(List<FileDataMongoDto> unMatchedFiles) {
//将文件按照后缀分组,方便查询solr库
Map<String, List<FileDataMongoDto>> allSuffixFiles = unMatchedFiles.stream().filter(file -> StringUtils.isNotEmpty(file.getSuffix())).collect(Collectors.groupingBy(FileDataMongoDto::getSuffix));
//统计语言的文件
List<FileDataMongoDto> otherLanguageFiles = new ArrayList<>();
//遍历主流32语言
allSuffixFiles.forEach((suffix, data) -> {
//根据文件后缀名获取特征库名称
if (FixedValue.SUFFIX_SOLR_VERSION.containsKey(suffix)) {
String currentCoreName = FixedValue.SUFFIX_SOLR_VERSION.get(suffix);
//通过md5去*_SourceFileBase中匹配版本Id
Set<String> fileMd5s = data.stream().map(FileDataMongoDto::getMd5).collect(Collectors.toSet());
Map<String, SolrDocument> md5VersionObjMap = solrUtils.batchQueryVersionIdFromSourceFileBaseBySourceMd5(currentCoreName, fileMd5s);
if (CollectionUtil.isEmpty(md5VersionObjMap)) {
return;
}
//保存结果数据
saveMatchOpenFileInfo(md5VersionObjMap, data);
} else {
//非主流语言的,没有单独的特征库,统一到默认的特征库进行检索
otherLanguageFiles.addAll(data);
}
});
//将无后缀的文件 归纳于 处理非32种语言的文件
List<FileDataMongoDto> noSuffixFiles = unMatchedFiles.stream().parallel().filter(file -> StringUtils.isEmpty(file.getSuffix())).collect(Collectors.toList());
otherLanguageFiles.addAll(noSuffixFiles);
if (CollectionUtils.isNotEmpty(otherLanguageFiles)) {
//非32种语言的会分2种MD5
//暂时忽略字符流md5的匹配,因为大部分都是一样的
Set<String> fileMd5s = otherLanguageFiles.stream().map(FileDataMongoDto::getMd5).collect(Collectors.toSet());
Map<String, SolrDocument> md5VersionIdMap = solrUtils.batchQueryVersionIdFromSourceFileBaseBySourceMd5(MongoDBConst.TABLE_NAME_SOURCE_FILE_BASE, fileMd5s);
if (md5VersionIdMap == null || md5VersionIdMap.isEmpty()) {
//如果没有匹配到,直接更新文件分析状态已完成,因为非32种语言的文件,无法进行解析,通过源文件的MD5匹配不到,就匹配不到了,无需进行下一步的匹配
updateFileAnalysisStatus(fileMd5s);
return;
}
saveMatchOpenFileInfo(md5VersionIdMap, otherLanguageFiles);
//直接更改没有匹配的文件分析状态
updateFileAnalysisStatus(Sets.difference(fileMd5s, md5VersionIdMap.keySet()));
}
}
//匹配到开源文件后,补充详细信息然后保存到mongo中
private void saveMatchOpenFileInfo(Map<String, SolrDocument> md5VersionIdMap, List<FileDataMongoDto> originalFiles) {
List<MatchOpenFileMongoDto> batchInsertCache = new ArrayList<>();
//根据版本id查询版本的详细信息
//todo 这段逻辑如果耗时的话,可以异步处理 补充文件的版本信息
Set<String> versionIds = md5VersionIdMap.values().stream().map(doc->(String)doc.get("versionId")).collect(Collectors.toSet());
List<VersionTree> versionInfos = solrUtils.queryBatchVersionInfoByVersionIds(versionIds);
Map<String, VersionTree> versionTreeMap = versionInfos.stream().collect(Collectors.toMap(VersionTree::getVersionId, Function.identity(), (key1, key2) -> key1));
Map<String, FileDataMongoDto> fileMd5ObjMap = originalFiles.stream().collect(Collectors.toMap(FileDataMongoDto::getMd5, Function.identity(), (key1, key2) -> key1));
md5VersionIdMap.forEach((fileMd5, versionObj) -> {
String versionId = (String) versionObj.get("versionId");
VersionTree versionInfo = versionTreeMap.get(versionId);
if (versionInfo == null){
log.error("根据versionId,未在versionTree中找到版本信息,fileMd5:{},versionId:{}",fileMd5, versionId);
return;
}
FileDataMongoDto fileDataMongoDto = fileMd5ObjMap.get(fileMd5);
MatchOpenFileMongoDto matchOpenFile = getMatchOpenFile(versionInfo, fileDataMongoDto, versionObj.get("fullPath").toString());
batchInsertCache.add(matchOpenFile);
});
if (CollectionUtils.isNotEmpty(batchInsertCache)) {
mongoTemplate.insert(batchInsertCache, MatchOpenFileMongoDto.class);
//更新文件分析的状态
updateFileAnalysisStatus(md5VersionIdMap.keySet());
}
}
//匹配到开源项目后,保存各个文件的开源信息
private void saveProjectOpenInfo(VersionTree versionInfo, List<FileDataMongoDto> originalFiles) {
Map<String, FileDataMongoDto> originalMd5ObjMap = originalFiles.stream().collect(Collectors.toMap(FileDataMongoDto::getMd5, Function.identity(), (key1, key2) -> key1));
Set<String> matchedMd5s = new HashSet<>();
List<MatchOpenFileMongoDto> batchInsertCache = new ArrayList<>();
List<VersionTreeNode> fileInfos = versionInfo.getDirTree();
fileInfos.forEach(versionTreeNodeObj->{
String openFileMd5 = versionTreeNodeObj.getSourceFileMd5();
//看是否和被测件的md5匹配
if (originalMd5ObjMap.keySet().contains(openFileMd5)) {
//匹配的文件只保存一次
if (!matchedMd5s.contains(openFileMd5)) {
MatchOpenFileMongoDto matchOpenFile = getMatchOpenFile(versionInfo, originalMd5ObjMap.get(openFileMd5),versionTreeNodeObj.getFullPath());
batchInsertCache.add(matchOpenFile);
matchedMd5s.add(openFileMd5);
}
}
//分批保存,防止单个项目太大,撑爆内存 或 超过 mongodb最大插入数
if (batchInsertCache.size() >= 1000) {
mongoTemplate.insert(batchInsertCache, MatchOpenFileMongoDto.class);
batchInsertCache.clear();
}
});
if (batchInsertCache.size() != 0) {
mongoTemplate.insert(batchInsertCache, MatchOpenFileMongoDto.class);
}
//更新文件分析的状态
updateFileAnalysisStatus(matchedMd5s);
}
//获取匹配到的开源文件信息
private MatchOpenFileMongoDto getMatchOpenFile(VersionTree versionInfo, FileDataMongoDto originalFile,String openFilePath) {
//设置匹配文件的信息
MatchOpenFile matchOpenFile = new MatchOpenFile();
matchOpenFile.setId(IdGenerator.uuid32())
.setVersionId(versionInfo.getVersionId())
.setSourceFilePath(openFilePath)
.setSourceUrl(versionInfo.getDownUrl())
.setPId(versionInfo.getProId())
.setPName(versionInfo.getProName())
.setLicenseType(versionInfo.getLicenseType())
.setAnalyzeType(AnalysisLevelEnum.FILE_LEVEL.getCode())
.setVersion(versionInfo.getVersionName())
.setFeatureSimilarity(100.00f)
.setOpenRate(100.00f);
//创建当前文件与开源代码的匹配信息
MatchOpenFileMongoDto matchOpenFileInfo = new MatchOpenFileMongoDto();
matchOpenFileInfo.setId(IdGenerator.uuid32())
.setFileName(originalFile.getName())
.setFilePath(originalFile.getFileUrl())
.setOpenType(originalFile.getOpenType())
.setFeatureSimilarity(100.00f)
.setOpenRate(100.00f)
.setMatchOpenFile(Arrays.asList(matchOpenFile));
return matchOpenFileInfo;
}
//匹配拿所有文件的md5去versionTree中,需要分多次匹配,单次匹配多个结果集的话,会导致solr响应长时间阻塞
//多次匹配,每次匹配上一次未匹配种的文件
//todo 这里需要设置一个阈值,一共匹配多少次,或者当相似度达到多少的时候,停止整体匹配
//目前默认查询三次
private Set<String> multipleMatchByAllFilesMd5(List<FileDataMongoDto> projectFiles) {
//获取被测件所有文件的md5
Set<String> projectFilesMd5 = projectFiles.stream().map(FileDataMongoDto::getMd5).collect(Collectors.toSet());
//匹配到的文件md5存入set中
Set<String> matchedFileMd5Set = new HashSet<>();
//剩余未匹配中的文件md5
Set<String> unMatchedFileMd5s = projectFilesMd5;
//循环匹配5次,进行整体的文件匹配
for (int i = 0; i < 5; i++){
//检索versionTree库
String queryStr = "dirTree:(" + StringUtils.join(unMatchedFileMd5s, " OR ") + ")";
log.info("versionTree queryStr: " + queryStr);
long startTime = System.currentTimeMillis();
VersionTree openProject = solrUtils.queryVersionTree(queryStr);
log.info("query versionTree cost:{}s", (System.currentTimeMillis() - startTime) / 1000);
//如果存在没有匹配到开源数据的情况,直接退出循环匹配
if (openProject == null){
break;
}
//异步保存匹配的开源文件信息
taskExecutor.execute(() -> saveProjectOpenInfo(openProject, projectFiles));
//获取开源项目的所有文件md5集合
List<String> openFilesMd5 = openProject.getDirTree().stream().map(VersionTreeNode::getSourceFileMd5).collect(Collectors.toList());
//获取被测件和开源项目相同的文件
Set<String> matchedFiles = unMatchedFileMd5s.stream().filter(item -> openFilesMd5.contains(item)).collect(Collectors.toSet());
//保存已匹配的文件md5,后续需要统计整体的开源率
matchedFileMd5Set.addAll(matchedFiles);
//计算与当前项目的相似度
BigDecimal semblance = new BigDecimal(matchedFiles.size()).divide(new BigDecimal(projectFilesMd5.size()), 4, RoundingMode.HALF_UP).multiply(new BigDecimal(100));
//当相似度小于30%,不保存项目级的信息
if (semblance.compareTo(new BigDecimal(30)) < 0){
break;
}
ProjectAssemblyMongoDto projectAssembly = new ProjectAssemblyMongoDto();
projectAssembly.setId(IdGenerator.uuid32())
.setFileCount(openFilesMd5.size())
.setMatchFileCount(matchedFiles.size())
.setProjectId(openProject.getProId())
.setProjectName(openProject.getProName())
.setVersionName(openProject.getVersionName())
.setOpenSourceUrl(openProject.getDownUrl())
.setSemblance(semblance.doubleValue());
mongoTemplate.insert(projectAssembly);
//获取未匹配中的文件md5,更新下次匹配的md5集合
unMatchedFileMd5s = Sets.difference(unMatchedFileMd5s, matchedFiles);
//如果没有剩余未匹配文件,退出整体匹配
if (CollectionUtils.isEmpty(unMatchedFileMd5s) ) {
break;
}
}
return matchedFileMd5Set;
}
//更新文件分析的状态
private void updateFileAnalysisStatus(Set<String> fileMd5Set) {
mongoTemplate.update(FileDataMongoDto.class)
.matching(where("md5").in(fileMd5Set))
.apply(new Update().set("openType", true)
.set("openRate", 100.00f)
.set("fileAnalysisStatus", FileAnalysisStatusEnum.ANALYSIS_DONE.getCode()))
.all();
}
}

@ -0,0 +1,33 @@
package com.keyware.composeanalysis.util;
import cn.hutool.core.date.DateTime;
import com.keyware.composeanalysis.mongo.AnalysisLogMongoDto;
import org.springframework.data.mongodb.core.MongoTemplate;
import org.springframework.scheduling.annotation.Async;
import java.io.PrintWriter;
import java.io.StringWriter;
/**
* @author liuzongren
* @date 2024/7/30
* @description 分析日志工具
*/
public class AnalysisLogUtil {
public static void insert(MongoTemplate mongoTemplate,String logInfo) {
mongoTemplate.insert(new AnalysisLogMongoDto().setLogInfo(logInfo).setCreateTime(new DateTime()));
}
public static void insertErrorInfo(MongoTemplate mongoTemplate, String logInfo, Exception e) {
mongoTemplate.insert(new AnalysisLogMongoDto().setLogInfo(logInfo + getErrorMsg(e)).setCreateTime(new DateTime()));
}
private static String getErrorMsg(Exception e) {
StringWriter errors = new StringWriter();
e.printStackTrace(new PrintWriter(errors));
return errors.toString();
}
}

@ -0,0 +1,32 @@
package com.keyware.composeanalysis.util;
import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONArray;
import com.keyware.composeanalysis.solr.VersionTree;
import com.keyware.composeanalysis.solr.VersionTreeNode;
import org.apache.solr.common.SolrDocument;
import java.util.List;
/**
* @author liuzongren
* @date 2024/8/7
* @description dom entity
*/
public class BeanUtil {
public static VersionTree domToVersionTree(SolrDocument dom) {
String dirTree = String.valueOf(dom.get("dirTree"));
dirTree = dirTree.replace("\\", "");
dirTree = dirTree.replace("\"{", "{");
dirTree = dirTree.replace("}\"", "}");
dom.put("dirTree", null);
JSONArray treeArray = JSON.parseArray(dirTree);
List<VersionTreeNode> treeList = treeArray.toJavaList(VersionTreeNode.class);
String domObj = JSON.toJSONString(dom);
VersionTree versionTree = JSON.parseObject(domObj, VersionTree.class);
versionTree.setDirTree(treeList);
return versionTree;
}
}

@ -0,0 +1,38 @@
package com.keyware.composeanalysis.util;
import com.alibaba.fastjson.JSON;
import com.mongodb.BasicDBObject;
import org.bson.Document;
import org.bson.json.JsonWriterSettings;
/**
* @author liuzongren
* @date 2024/7/24
* 类型转化工具类
*/
public class ConvertUtil {
public <T> T documentToBean(BasicDBObject dbObject, Class<T> clzss) {
String realJson = dbObject.toJson(JsonWriterSettings.builder().build());
T obj = JSON.parseObject(realJson, clzss);
return obj;
}
public static <T> T documentToBean(Document document, Class<T> clzss) {
String realJson = document.toJson(JsonWriterSettings.builder().build());
T obj = JSON.parseObject(realJson, clzss);
return obj;
}
public static <T> BasicDBObject toDBObject(T object) {
String json = JSON.toJSONString(object);
BasicDBObject basicDBObject = BasicDBObject.parse(json);
return basicDBObject;
}
public static <T> Document beanToDocument(T object) {
String json = JSON.toJSONString(object);
Document document = Document.parse(json);
return document;
}
}

@ -0,0 +1,23 @@
package com.keyware.composeanalysis.util;
import lombok.extern.log4j.Log4j2;
import java.net.InetAddress;
import java.net.UnknownHostException;
/**
* @author liuzongren
* @date 2024/7/30
*/
@Log4j2
public class IpUtil {
public static String getHostIp() {
try {
return InetAddress.getLocalHost().getHostAddress();
} catch (UnknownHostException e) {
log.error(e.getMessage(), e);
}
return "127.0.0.1";
}
}

@ -0,0 +1,537 @@
package com.keyware.composeanalysis.util;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Qualifier;
import org.springframework.data.redis.core.RedisTemplate;
import org.springframework.stereotype.Component;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.TimeUnit;
/**
* liuzongren
* data 2024/04/02
*/
@Component
public class RedisUtil {
@Autowired
private RedisTemplate redisTemplate;
/**
* 给一个指定的 key 值附加过期时间
*
* @param key
* @param time
* @return
*/
public boolean expire(String key, long time) {
return this.redisTemplate.expire(key, time, TimeUnit.SECONDS);
}
/**
* 根据key 获取过期时间
*
* @param key
* @return
*/
public long getTime(String key) {
return redisTemplate.getExpire(key, TimeUnit.SECONDS);
}
/**
* 根据key 获取过期时间
*
* @param key
* @return
*/
public boolean hasKey(String key) {
return redisTemplate.hasKey(key);
}
/**
* 移除指定key 的过期时间
*
* @param key
* @return
*/
public boolean persist(String key) {
return redisTemplate.boundValueOps(key).persist();
}
//- - - - - - - - - - - - - - - - - - - - - String类型 - - - - - - - - - - - - - - - - - - - -
/**
* 根据key获取值
*
* @param key
* @return
*/
public Object get(String key) {
return key == null ? null : redisTemplate.opsForValue().get(key);
}
/**
* 将值放入缓存
*
* @param key
* @param value
* @return true成功 false 失败
*/
public void set(String key, Object value) {
redisTemplate.opsForValue().set(key, value);
}
/**
* 将值放入缓存并设置时间
*
* @param key
* @param value
* @param time 时间() -1为无期限
* @return true成功 false 失败
*/
public void set(String key, String value, long time) {
if (time > 0) {
redisTemplate.opsForValue().set(key, value, time, TimeUnit.SECONDS);
} else {
redisTemplate.opsForValue().set(key, value);
}
}
public void delKey(String key) {
redisTemplate.delete(key);
}
/**
* 批量添加 key (重复的键会覆盖)
*
* @param keyAndValue
*/
public void batchSet(Map<String, String> keyAndValue) {
redisTemplate.opsForValue().multiSet(keyAndValue);
}
/**
* 批量添加 key-value 只有在键不存在时,才添加
* map 中只要有一个key存在,则全部不添加
*
* @param keyAndValue
*/
public void batchSetIfAbsent(Map<String, String> keyAndValue) {
redisTemplate.opsForValue().multiSetIfAbsent(keyAndValue);
}
/**
* 对一个 key-value 的值进行加减操作,
* 如果该 key 不存在 将创建一个key 并赋值该 number
* 如果 key 存在, value 不是长整型 ,将报错
*
* @param key
* @param number
*/
public Long increment(String key, long number) {
return redisTemplate.opsForValue().increment(key, number);
}
/**
* 对一个 key-value 的值进行加减操作,
* 如果该 key 不存在 将创建一个key 并赋值该 number
* 如果 key 存在, value 不是 纯数字 ,将报错
*
* @param key
* @param number
*/
public Double increment(String key, double number) {
return redisTemplate.opsForValue().increment(key, number);
}
//- - - - - - - - - - - - - - - - - - - - - set类型 - - - - - - - - - - - - - - - - - - - -
/**
* 将数据放入set缓存
*
* @param key
* @return
*/
public void sSet(String key, String value) {
redisTemplate.opsForSet().add(key, value);
}
/**
* 获取变量中的值
*
* @param key
* @return
*/
public Set<Object> members(String key) {
return redisTemplate.opsForSet().members(key);
}
/**
* 随机获取变量中指定个数的元素
*
* @param key
* @param count
* @return
*/
public void randomMembers(String key, long count) {
redisTemplate.opsForSet().randomMembers(key, count);
}
/**
* 随机获取变量中的元素
*
* @param key
* @return
*/
public Object randomMember(String key) {
return redisTemplate.opsForSet().randomMember(key);
}
/**
* 弹出变量中的元素
*
* @param key
* @return
*/
public Object pop(String key) {
return redisTemplate.opsForSet().pop("setValue");
}
/**
* 获取变量中值的长度
*
* @param key
* @return
*/
public long size(String key) {
return redisTemplate.opsForSet().size(key);
}
/**
* 根据value从一个set中查询,是否存在
*
* @param key
* @param value
* @return true 存在 false不存在
*/
public boolean sHasKey(String key, Object value) {
return redisTemplate.opsForSet().isMember(key, value);
}
/**
* 检查给定的元素是否在变量中
*
* @param key
* @param obj 元素对象
* @return
*/
public boolean isMember(String key, Object obj) {
return redisTemplate.opsForSet().isMember(key, obj);
}
/**
* 转移变量的元素值到目的变量
*
* @param key
* @param value 元素对象
* @param destKey 元素对象
* @return
*/
public boolean move(String key, String value, String destKey) {
return redisTemplate.opsForSet().move(key, value, destKey);
}
/**
* 批量移除set缓存中元素
*
* @param key
* @param values
* @return
*/
public void remove(String key, Object... values) {
redisTemplate.opsForSet().remove(key, values);
}
/**
* 通过给定的key求2个set变量的差值
*
* @param key
* @param destKey
* @return
*/
public Set<Set> difference(String key, String destKey) {
return redisTemplate.opsForSet().difference(key, destKey);
}
//- - - - - - - - - - - - - - - - - - - - - hash类型 - - - - - - - - - - - - - - - - - - - -
/**
* 加入缓存
*
* @param key
* @param map
* @return
*/
public void add(String key, Map<String, String> map) {
redisTemplate.opsForHash().putAll(key, map);
}
/**
* 获取 key 下的 所有 hashkey value
*
* @param key
* @return
*/
public Map<Object, Object> getHashEntries(String key) {
return redisTemplate.opsForHash().entries(key);
}
/**
* 验证指定 key 有没有指定的 hashkey
*
* @param key
* @param hashKey
* @return
*/
public boolean hashKey(String key, String hashKey) {
return redisTemplate.opsForHash().hasKey(key, hashKey);
}
/**
* 获取指定key的值string
*
* @param key
* @param key2
* @return
*/
public String getMapString(String key, String key2) {
return redisTemplate.opsForHash().get("map1", "key1").toString();
}
/**
* 获取指定的值Int
*
* @param key
* @param key2
* @return
*/
public Integer getMapInt(String key, String key2) {
return (Integer) redisTemplate.opsForHash().get("map1", "key1");
}
/**
* 弹出元素并删除
*
* @param key
* @return
*/
public String popValue(String key) {
return redisTemplate.opsForSet().pop(key).toString();
}
/**
* 删除指定 hash HashKey
*
* @param key
* @param hashKeys
* @return 删除成功的 数量
*/
public Long delete(String key, String... hashKeys) {
return redisTemplate.opsForHash().delete(key, hashKeys);
}
/**
* 给指定 hash hashkey 做增减操作
*
* @param key
* @param hashKey
* @param number
* @return
*/
public Long increment(String key, String hashKey, long number) {
return redisTemplate.opsForHash().increment(key, hashKey, number);
}
/**
* 给指定 hash hashkey 做增减操作
*
* @param key
* @param hashKey
* @param number
* @return
*/
public Double increment(String key, String hashKey, Double number) {
return redisTemplate.opsForHash().increment(key, hashKey, number);
}
/**
* 获取 key 下的 所有 hashkey 字段
*
* @param key
* @return
*/
public Set<Object> hashKeys(String key) {
return redisTemplate.opsForHash().keys(key);
}
/**
* 获取指定 hash 下面的 键值对 数量
*
* @param key
* @return
*/
public Long hashSize(String key) {
return redisTemplate.opsForHash().size(key);
}
//- - - - - - - - - - - - - - - - - - - - - list类型 - - - - - - - - - - - - - - - - - - - -
/**
* 在变量左边添加元素值
*
* @param key
* @param value
* @return
*/
public void leftPush(String key, Object value) {
redisTemplate.opsForList().leftPush(key, value);
}
/**
* 获取集合指定位置的值
*
* @param key
* @param index
* @return
*/
public Object index(String key, long index) {
return redisTemplate.opsForList().index("list", 1);
}
/**
* 获取指定区间的值
*
* @param key
* @param start
* @param end
* @return
*/
public List<Object> range(String key, long start, long end) {
return redisTemplate.opsForList().range(key, start, end);
}
/**
* 把最后一个参数值放到指定集合的第一个出现中间参数的前面
* 如果中间参数值存在的话
*
* @param key
* @param pivot
* @param value
* @return
*/
public void leftPush(String key, String pivot, String value) {
redisTemplate.opsForList().leftPush(key, pivot, value);
}
/**
* 向左边批量添加参数元素
*
* @param key
* @param values
* @return
*/
public void leftPushAll(String key, String... values) {
// redisTemplate.opsForList().leftPushAll(key,"w","x","y");
redisTemplate.opsForList().leftPushAll(key, values);
}
/**
* 向集合最右边添加元素
*
* @param key
* @param value
* @return
*/
public void leftPushAll(String key, String value) {
redisTemplate.opsForList().rightPush(key, value);
}
/**
* 向左边批量添加参数元素
*
* @param key
* @param values
* @return
*/
public void rightPushAll(String key, String... values) {
//redisTemplate.opsForList().leftPushAll(key,"w","x","y");
redisTemplate.opsForList().rightPushAll(key, values);
}
/**
* 向已存在的集合中添加元素
*
* @param key
* @param value
* @return
*/
public void rightPushIfPresent(String key, Object value) {
redisTemplate.opsForList().rightPushIfPresent(key, value);
}
/**
* 向已存在的集合中添加元素
*
* @param key
* @return
*/
public long listLength(String key) {
return redisTemplate.opsForList().size(key);
}
/**
* 移除集合中的左边第一个元素
*
* @param key
* @return
*/
public void leftPop(String key) {
redisTemplate.opsForList().leftPop(key);
}
/**
* 移除集合中左边的元素在等待的时间里如果超过等待的时间仍没有元素则退出
*
* @param key
* @return
*/
public void leftPop(String key, long timeout, TimeUnit unit) {
redisTemplate.opsForList().leftPop(key, timeout, unit);
}
/**
* 移除集合中右边的元素
*
* @param key
* @return
*/
public void rightPop(String key) {
redisTemplate.opsForList().rightPop(key);
}
/**
* 移除集合中右边的元素在等待的时间里如果超过等待的时间仍没有元素则退出
*
* @param key
* @return
*/
public void rightPop(String key, long timeout, TimeUnit unit) {
redisTemplate.opsForList().rightPop(key, timeout, unit);
}
}

@ -0,0 +1,206 @@
package com.keyware.composeanalysis.util;
import cn.hutool.core.lang.Pair;
import cn.hutool.core.util.ArrayUtil;
import cn.hutool.core.util.ByteUtil;
import cn.hutool.core.util.StrUtil;
import io.micrometer.common.util.StringUtils;
import org.apache.commons.collections.CollectionUtils;
import java.math.BigDecimal;
import java.math.RoundingMode;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
import java.util.List;
public class SimilarityUtil {
private SimilarityUtil() {
}
public static void main(String[] args) {
String s1="";
String s2="";
double similarity = getSimilarityMe(s1, s2);
System.out.println(similarity);
}
/**
* 获得两个文件的相似度
* @param sentence1
* @param sentence2
* @return
*/
public static double getSimilarityMe(String sentence1, String sentence2) {
//被测件文件行
List<String> sent1Words = getSplitWords(sentence1);
if (sentence1.length()==0){
return 0.00;
}
//溯源到文件行
HashSet<String> sent2Words = getSplitWords1(sentence2);
//匹配到的行数
double count=0;
for (String sent1Word : sent1Words) {
if (sent2Words.contains(sent1Word)){
count++;
}
}
return count/sent1Words.size();
}
/**
* 获取开源率和开源行号
* @param analysisFile 被测件内容
* @param openSourceFile 开源文件内容
* @return
*/
// public static Pair<Float, HashSet<Integer>> getOpenRateAndSaveRowNum(String analysisFile, String openSourceFile) {
// if (StrUtil.hasBlank(analysisFile,openSourceFile)){
// return new Pair<>(0.00f,new HashSet<>());
// }
// //匹配到的行号
// HashSet<Integer> matchedRowsNum = new HashSet<>();
//
// //被测件文件行
// List<String> analysisFileLineInfo = getSplitWords(analysisFile);
//
// //溯源到文件行
// HashSet<String> openSourceFileLineInfo = getSplitWords1(openSourceFile);
//
// for (int i = 0; i < analysisFileLineInfo.size(); i++) {
// String sent1Word = analysisFileLineInfo.get(i);
// if (openSourceFileLineInfo.contains(sent1Word)) {
// matchedRowsNum.add(i);
// }
// }
//
// //计算开源率
// BigDecimal openRate = new BigDecimal(matchedRowsNum.size()).divide(new BigDecimal(analysisFileLineInfo.size()), 4, RoundingMode.HALF_UP).multiply(new BigDecimal(100)).setScale(2);
//
// return new Pair<>(openRate.floatValue(), matchedRowsNum);
// }
// public static Pair<Float, HashSet<Integer>> getOpenRateAndSaveRowNum(byte[] analysisFile, byte[] openSourceFile) {
// if (ArrayUtil.hasNull(analysisFile,openSourceFile)){
// return new Pair<>(0.00f,new HashSet<>());
// }
// //匹配到的行号
// HashSet<Integer> matchedRowsNum = new HashSet<>();
//
// //被测件文件行
// List<String> analysisFileLineInfo = getSplitWords(new String(analysisFile));
//
// //溯源到文件行
// HashSet<String> openSourceFileLineInfo = getSplitWords1(new String(openSourceFile));
//
// for (int i = 0; i < analysisFileLineInfo.size(); i++) {
// String sent1Word = analysisFileLineInfo.get(i);
// if (openSourceFileLineInfo.contains(sent1Word)) {
// matchedRowsNum.add(i);
// }
// }
//
// //计算开源率
// BigDecimal openRate = new BigDecimal(matchedRowsNum.size()).divide(new BigDecimal(analysisFileLineInfo.size()), 4, RoundingMode.HALF_UP).multiply(new BigDecimal(100)).setScale(2);
//
// return new Pair<>(openRate.floatValue(), matchedRowsNum);
// }
public static Pair<Float, HashSet<Integer>> getOpenRateAndSaveRowNum(String analysisFile, String openSourceFile) {
if (StrUtil.hasBlank(analysisFile,openSourceFile)){
return new Pair<>(0.00f,new HashSet<>());
}
//匹配到的行号
HashSet<Integer> matchedRowsNum = new HashSet<>();
//被测件文件行
List<String> analysisFileLineInfo = getSplitWords(analysisFile);
//溯源到文件行
HashSet<String> openSourceFileLineInfo = getSplitWords1(openSourceFile);
for (int i = 0; i < analysisFileLineInfo.size(); i++) {
String sent1Word = analysisFileLineInfo.get(i);
if (openSourceFileLineInfo.contains(sent1Word)) {
matchedRowsNum.add(i);
}
}
//计算开源率
BigDecimal openRate = new BigDecimal(matchedRowsNum.size()).divide(new BigDecimal(analysisFileLineInfo.size()), 4, RoundingMode.HALF_UP).multiply(new BigDecimal(100)).setScale(2);
return new Pair<>(openRate.floatValue(), matchedRowsNum);
}
/**
* 获得两个文件的相似度,并将被匹配的行
* @param matchLineInfos 被匹配的行信息
* @param sentence2 开源文件内容
* @return
*/
public static double getSimilarityAndSaveRowNum(List<String> matchLineInfos, String sentence2,HashSet<Integer> matchRows) {
if (CollectionUtils.isEmpty(matchLineInfos)){
return 0.00d;
}
//溯源到文件行
HashSet<String> sent2Words = getSplitWords1(sentence2);
//匹配到的行数
double count = 0d;
for (int i = 0; i < matchLineInfos.size(); i++) {
String lineContents = matchLineInfos.get(i);
if (sent2Words.contains(lineContents)) {
//保存匹配中的行序号
matchRows.add(i);
count++;
}
}
return count / matchLineInfos.size();
}
public static List<String> getSplitWords(String sentence) {
List<String> lineList = new ArrayList<String>();
if (StringUtils.isBlank(sentence)){
return lineList;
}
sentence = sentence.replaceAll("\n\r", "\n").replaceAll("\r\n", "\n").replaceAll("\r", "\n");
List<String> list = Arrays.asList(sentence.split("\n"));
for (String string : list) {
if (string != null && !"".equals(string.trim())) {
lineList.add(string.replaceAll(" ",""));
}
}
return lineList;
// // 去除掉html标签
//
// sentence = Jsoup.parse(sentence.replace(" ","")).body().text();
//
//
// // 标点符号会被单独分为一个Term,去除之
//
// return HanLP.segment(sentence).stream().map(a -> a.word).filter(s -> !"`~!@#$^&*()=|{}':;',\\[\\].<>/?~!@#¥……&*()——|{}【】‘;:”“'。,、? ".contains(s)).collect(Collectors.toList());
}
private static HashSet<String> getSplitWords1(String sentence) {
HashSet<String> set = new HashSet<>();
sentence = sentence.replaceAll("\n\r", "\n").replaceAll("\r\n", "\n").replaceAll("\r", "\n");
List<String> list = Arrays.asList(sentence.split("\n"));
for (String string : list) {
if (string != null && !"".equals(string.trim())) {
set.add(string.replaceAll(" ",""));
}
}
return set;
}
}

@ -0,0 +1,321 @@
package com.keyware.composeanalysis.util;
import com.keyware.composeanalysis.constant.MongoDBConst;
import com.keyware.composeanalysis.solr.VersionTree;
import lombok.Data;
import lombok.extern.log4j.Log4j2;
import org.apache.commons.collections.CollectionUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.solr.client.solrj.SolrClient;
import org.apache.solr.client.solrj.SolrRequest;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.impl.HttpSolrClient;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrDocumentList;
import org.apache.solr.common.params.*;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Component;
import java.io.IOException;
import java.util.*;
import java.util.function.Function;
import java.util.stream.Collectors;
/**
* 注意 注意 注意
* solr同步更新专用 solr地址以solr
*
* @author liuzongren
*/
@Log4j2
@Component
@Data
public class SolrUtils {
@Value("${solr.solrUrl}")
private String clientUrl;
@Value("${solr.row}")
private String ROWS;
//源码上传和解压的地址
@Value("${codeResourcePath}")
private String codeResourcePath;
private String fileAndFunSolrUrl;
//client 连接池
private Map<String, HttpSolrClient> coreClientMap = new HashMap<>();
/**
* @param coreName 表名
* @return todo 这里的client 不知道是否支持并发后续需要测试优化
* @describe 获取solr连接
*/
public HttpSolrClient getClient(String coreName) {
if (coreClientMap.containsKey(coreName)) {
return coreClientMap.get(coreName);
} else {
HttpSolrClient solr = new HttpSolrClient.Builder(clientUrl + "" + coreName)
.withConnectionTimeout(6000000)
.withSocketTimeout(6000000)
.allowCompression(true)
.build();
coreClientMap.put(coreName, solr);
return solr;
}
}
/**
* 简单查询,指定返回字段
*
* @param searchContent 检索内容
* @param returneFields 返回字段
* @return
* @throws Exception
*/
public SolrDocumentList query(String coreName, String searchContent, String returneFields) {
SolrDocumentList docsList = null;
try {
HttpSolrClient client = getClient(coreName);
Map<String, String> map = new HashMap<String, String>();
map.put(CommonParams.Q, searchContent);
map.put(CommonParams.FL, returneFields);
map.put(CommonParams.START, "0");
map.put(CommonParams.ROWS, ROWS);
SolrParams params = new MapSolrParams(map);
QueryResponse query = client.query(params, SolrRequest.METHOD.POST);
if (!query.getResults().isEmpty()){
docsList = query.getResults();
}
} catch (SolrServerException | IOException e) {
log.error("solr查询失败,coreName:{},queryStr:{}", coreName, searchContent, e);
}
return docsList;
}
/**
* 根据文件的MD5 *_SourceFileBase获取当前文件的版本ID
* @param coreName solrCoreName
* @param originalFileMd5s 需要检索的文件md5
* todo 1.这里有一个极端的情况如果查询的文件数量过多返回值不知道会不会过大
* todo 2.这里没有查询出dirTreeId, 下一步并没有从VersionTree中查询出当前文件的具体信息,只是从versionTree查询出版本信息
* @return
*/
public Map<String,SolrDocument> batchQueryVersionIdFromSourceFileBaseBySourceMd5(String coreName, Set<String> originalFileMd5s) {
String queryStr = "sourceFileMd5:(" + StringUtils.join(originalFileMd5s, " OR ") + ")";
Map<String,SolrDocument> openFileMd5VersionIdMap = new HashMap<>();
long strtTime = System.currentTimeMillis();
log.info("batchQueryVersionIdFromSourceFileBaseBySourceMd5 queryStr:{},size:{}", queryStr, originalFileMd5s.size());
try {
HttpSolrClient client = getClient(coreName);
Map<String, String> map = new HashMap<>();
map.put(CommonParams.Q, queryStr);
map.put(CommonParams.FL, "sourceFileMd5,versionId,fullPath");
map.put(CommonParams.START, "0");
map.put(CommonParams.ROWS, String.valueOf(originalFileMd5s.size()));
//分组查询,某一个开源文件匹配一次即可
//todo 这里把匹配次数也查询出来了,貌似还是扫描了很多文档,看是否还有方法只匹配一次的
map.put(GroupParams.GROUP,"true");
map.put(GroupParams.GROUP_FIELD, "sourceFileMd5");
map.put(GroupParams.GROUP_LIMIT,"1");
map.put(GroupParams.GROUP_FORMAT,"simple");
SolrParams params = new MapSolrParams(map);
QueryResponse query = client.query(params, SolrRequest.METHOD.POST);
if (query.getGroupResponse().getValues().size() > 0){
//拿到sourceFileMd5分组数据
SolrDocumentList result = query.getGroupResponse().getValues().get(0).getValues().get(0).getResult();
openFileMd5VersionIdMap = result.stream().collect(Collectors.toMap(doc -> (String) doc.get("sourceFileMd5"), Function.identity()));
}
} catch (Exception e) {
log.error("solr查询失败,coreName:{},queryStr:{}", coreName, queryStr, e);
}
log.info("batchQueryVersionIdFromSourceFileBaseBySourceMd5 cost:{}s", (System.currentTimeMillis()-strtTime) / 1000);
return openFileMd5VersionIdMap;
}
/**
* 简单查询,指定返回字段
*
* @param searchContent 检索内容
* @param returneFields 返回字段
* @return
* @throws Exception
*/
public SolrDocument queryOne(String coreName, String searchContent, String returneFields) {
SolrDocument result = null;
try {
HttpSolrClient client = getClient(coreName);
Map<String, String> map = new HashMap<String, String>();
map.put(CommonParams.Q, searchContent);
map.put(CommonParams.FL, returneFields);
map.put(CommonParams.START, "0");
map.put(CommonParams.ROWS, "1");
SolrParams params = new MapSolrParams(map);
QueryResponse query = client.query(params, SolrRequest.METHOD.POST);
SolrDocumentList resp = query.getResults();
if (CollectionUtils.isNotEmpty(resp)) {
return resp.get(0);
}
} catch (SolrServerException | IOException e) {
log.error("查询solr失败!,coreName:{},queryStr:{}",coreName , searchContent, e);
}
return result;
}
/**
* 查询 versionTree
*
* @param searchContent 检索内容
* @return
* @throws Exception
*/
public VersionTree queryVersionTree(String searchContent) {
String returneFields = "proId,proName,versionName,downUrl,licenseType,dirTree";
VersionTree results = null;
try {
HttpSolrClient client = getClient(MongoDBConst.VERSION_TREE);
Map<String, String> map = new HashMap<String, String>();
map.put(CommonParams.Q, searchContent);
map.put(CommonParams.FL, returneFields);
map.put(CommonParams.START, "0");
map.put(CommonParams.ROWS, "1");
SolrParams params = new MapSolrParams(map);
QueryResponse query = client.query(params, SolrRequest.METHOD.POST);
SolrDocumentList response = query.getResults();
if (!response.isEmpty()) {
//转化对象
results = BeanUtil.domToVersionTree(response.get(0));
}
} catch (SolrServerException | IOException e) {
log.error("查询solr失败!,queryStr:{}" , searchContent, e);
}
return results;
}
/**
* 查询 versionTree
*
* @param versionId 版本ID
* @return
*/
public VersionTree queryVersionTreeByVersionId(String versionId) {
String returneFields = "proId,proName,versionName,downUrl,licenseType,dirTree";
String queryStr = "versionId:"+ versionId;
VersionTree results = null;
try {
HttpSolrClient client = getClient(MongoDBConst.VERSION_TREE);
Map<String, String> map = new HashMap<String, String>();
map.put(CommonParams.Q, queryStr);
map.put(CommonParams.FL, returneFields);
map.put(CommonParams.START, "0");
map.put(CommonParams.ROWS, "1");
SolrParams params = new MapSolrParams(map);
QueryResponse query = client.query(params, SolrRequest.METHOD.POST);
SolrDocumentList response = query.getResults();
//转化对象
if (!response.isEmpty()){
results = BeanUtil.domToVersionTree(response.get(0));
}else {
log.error("根据版本ID查询VersionTree失败,versionId:{}" , versionId);
}
} catch (SolrServerException | IOException e) {
log.error("查询solr失败!,queryStr:{}" , queryStr, e);
}
return results;
}
/**
* 查询 version 的具体信息
*
* @param versionId versionId
* @return
* @throws Exception
*/
public VersionTree queryVersionInfoByVersionId(Object versionId) {
String returneFields = "proId,proName,versionName,downUrl,licenseType";
VersionTree result = new VersionTree();
try {
HttpSolrClient client = getClient(MongoDBConst.VERSION_TREE);
Map<String, String> map = new HashMap<String, String>();
map.put(CommonParams.Q, "versionId:" + versionId);
map.put(CommonParams.FL, returneFields);
map.put(CommonParams.START, "0");
map.put(CommonParams.ROWS, "1");
SolrParams params = new MapSolrParams(map);
QueryResponse query = client.query(params, SolrRequest.METHOD.POST);
SolrDocumentList response = query.getResults();
//转化对象
if (CollectionUtils.isNotEmpty(response)) {
cn.hutool.core.bean.BeanUtil.copyProperties(response.get(0), result);
result.setLicenseType(response.get(0).get("licenseType") == null ? "" : response.get(0).get("licenseType").toString());
}else {
log.error("根据版本ID查询版本信息失败,versionId:{}" , versionId);
}
} catch (SolrServerException | IOException e) {
log.error("查询solr失败!,queryStr:{}" , versionId, e);
}
return result;
}
/**
* 批量查询 version 的具体信息
*
* @param versionIds versionIds
* @return
* @throws Exception
*/
public List<VersionTree> queryBatchVersionInfoByVersionIds(Collection<String> versionIds) {
List<VersionTree> results = new ArrayList<>();
if (CollectionUtils.isEmpty(versionIds)) {
return results;
}
//去一波重
versionIds = versionIds.stream().collect(Collectors.toSet());
String queryStr = "versionId:(" + StringUtils.join(versionIds, " OR ") + ")";
String returneFields = "versionId,proId,proName,versionName,downUrl,licenseType";
try {
HttpSolrClient client = getClient(MongoDBConst.VERSION_TREE);
Map<String, String> map = new HashMap<String, String>();
map.put(CommonParams.Q, queryStr);
map.put(CommonParams.FL, returneFields);
map.put(CommonParams.START, "0");
map.put(CommonParams.ROWS,String.valueOf(versionIds.size()));
SolrParams params = new MapSolrParams(map);
QueryResponse query = client.query(params, SolrRequest.METHOD.POST);
SolrDocumentList response = query.getResults();
//转化对象
if (!response.isEmpty()) {
for (int i = 0; i < response.size(); i++) {
VersionTree versionTree = new VersionTree();
try {
cn.hutool.core.bean.BeanUtil.copyProperties(response.get(i), versionTree);
versionTree.setLicenseType(response.get(i).get("licenseType") == null ? "" : response.get(i).get("licenseType").toString());
results.add(versionTree);
} catch (Exception e) {
e.printStackTrace();
}
}
}
} catch (SolrServerException | IOException e) {
log.error("查询solr失败!,queryStr:{}" , queryStr, e);
}
return results;
}
}

@ -0,0 +1,53 @@
package com.keyware.composeanalysis.util;
import org.springframework.beans.BeansException;
import org.springframework.context.ApplicationContext;
import org.springframework.context.ApplicationContextAware;
import org.springframework.stereotype.Component;
@Component
public class SpringContextUtils implements ApplicationContextAware {
/**
* 上下文对象实例
*/
private static ApplicationContext applicationContext;
@Override
public void setApplicationContext(ApplicationContext applicationContext) throws BeansException {
SpringContextUtils.applicationContext = applicationContext;
}
/**
* 获取applicationContext
*/
public static ApplicationContext getApplicationContext() {
//判断是否为null
if (applicationContext == null) {
throw new IllegalStateException("applicaitonContext未注入,请在applicationContext.xml中定义SpringContextHolder.");
}
return applicationContext;
}
/**
* 通过name获取Bean
*/
public static Object getBean(String name) {
return getApplicationContext().getBean(name);
}
/**
* 通过class获取Bean
*/
public static <T> T getBean(Class<T> clazz) {
return getApplicationContext().getBean(clazz);
}
/**
* 通过name和class获取Bean
*/
public static <T> T getBean(String name, Class<T> clazz) {
return getApplicationContext().getBean(name, clazz);
}
}

@ -0,0 +1,18 @@
server:
port: 8001
spring:
application:
name: compose-analysis-service
cloud:
nacos:
discovery:
server-addr: 172.16.36.100:8848
namespace: 7f9bb282-8ee3-4948-8182-24b7dcadcd5a
config:
server-addr: 172.16.36.100:8848
namespace: 7f9bb282-8ee3-4948-8182-24b7dcadcd5a
group: dev_group
file-extension: yaml
config:
import: nacos:compose-analysis-dev.yaml

@ -0,0 +1,215 @@
<?xml version="1.0" encoding="UTF-8"?>
<!-- 分级别异步文件日志输出配置 -->
<!-- 级别从高到低 OFF 、 FATAL 、 ERROR 、 WARN 、 INFO 、 DEBUG 、 TRACE 、 ALL -->
<!-- 日志输出规则 根据当前ROOT 级别,日志输出时,级别高于root默认的级别时 会输出 -->
<!-- 以下 每个配置的 filter 是过滤掉输出文件里面,会出现高级别文件,依然出现低级别的日志信息,通过filter 过滤只记录本级别的日志 -->
<!-- scan 当此属性设置为true时,配置文件如果发生改变,将会被重新加载,默认值为true。 -->
<!-- scanPeriod 设置监测配置文件是否有修改的时间间隔,如果没有给出时间单位,默认单位是毫秒。当scan为true时,此属性生效。默认的时间间隔为1分钟。 -->
<!-- debug 当此属性设置为true时,将打印出logback内部日志信息,实时查看logback运行状态。默认值为false。 -->
<configuration scan="true" scanPeriod="60 seconds" debug="false">
<!-- 关闭无用日志-->
<statusListener class="ch.qos.logback.core.status.NopStatusListener" />
<!-- 引入spirng boot默认的logback配置文件 -->
<include resource="org/springframework/boot/logging/logback/defaults.xml"/>
<springProperty scope="context" name="springAppName" source="spring.application.name"/>
<!-- 日志路径-->
<property name="logPath" value="./logs/"/>
<!-- logback项目名称 -->
<property name="appName" value="${springAppName}"/>
<!-- 日志级别 DEBUGER INFO WARN ERROR -->
<property name="logLevel" value="INFO"></property>
<!-- 最大保存时间 60天-->
<property name="maxHistory" value="60"/>
<!-- 异步缓冲队列的深度,该值会影响性能.默认值为256 -->
<property name="queueSize" value="512"></property>
<!-- lOGGER PATTERN 根据个人喜好选择匹配 -->
<property name="logPattern" value="[ %-5level] [%date{yyyy-MM-dd HH:mm:ss.SSS}] %logger{36} [%line] [%thread]- %msg%n"></property>
<!-- %d{yyyy-MM-dd HH:mm:ss.SSS} [%-5level] %logger - %msg%n -->
<!-- %d{yyyy-MM-dd HH:mm:ss} %-4relative [%thread] %-5level %logger{35} - %msg %n -->
<!-- [ %-5level] [%date{yyyy-MM-dd HH:mm:ss.SSS}] %logger{96} [%line] [%thread]- %msg%n -->
<!-- 动态日志级别 -->
<jmxConfigurator/>
<!-- 控制台的标准输出 -->
<appender name="CONSOLE" class="ch.qos.logback.core.ConsoleAppender">
<!--此日志appender是为开发使用,只配置最底级别,控制台输出的日志级别是大于或等于此级别的日志信息-->
<filter class="ch.qos.logback.classic.filter.ThresholdFilter">
<level>debug</level>
</filter>
<encoder>
<charset>UTF-8</charset>
<!-- 控制台输出使用默认的输出模版(可以彩色打印)-->
<pattern>${CONSOLE_LOG_PATTERN}</pattern>
</encoder>
</appender>
<!-- DUBUG 日志记录 -->
<appender name="FILE_DEBUG" class="ch.qos.logback.core.rolling.RollingFileAppender">
<file>${logPath}/debug/${appName}_debug.log</file>
<!-- 日志记录器的滚动策略,按日期,按大小记录 -->
<rollingPolicy class="ch.qos.logback.core.rolling.SizeAndTimeBasedRollingPolicy">
<!-- 归档的日志文件的路径,例如今天是2022-11-06日志,当前写的日志文件路径为file节点指定,
可以将此文件与file指定文件路径设置为不同路径,从而将当前日志文件或归档日志文件置不同的目录。
而2022-11-06的日志文件在由fileNamePattern指定。%d{yyyy-MM-dd}指定日期格式,%i指定索引 -->
<fileNamePattern>${logPath}/debug/${appName}_debug-%d{yyyy-MM-dd}.%i.log.gz</fileNamePattern>
<maxFileSize>128MB</maxFileSize>
<maxHistory>${maxHistory}</maxHistory>
<totalSizeCap>10GB</totalSizeCap>
</rollingPolicy>
<encoder>
<pattern>${logPattern}</pattern>
<charset>utf-8</charset>
</encoder>
<filter class="ch.qos.logback.classic.filter.LevelFilter">
<level>DEBUG</level>
<onMatch>ACCEPT</onMatch>
<onMismatch>DENY</onMismatch>
</filter>
</appender>
<!-- INFO 级别的日志记录 -->
<appender name="FILE_INFO" class="ch.qos.logback.core.rolling.RollingFileAppender">
<file>${logPath}/info/${appName}_info.log</file>
<!-- 日志记录器的滚动策略,按日期,按大小记录 -->
<rollingPolicy class="ch.qos.logback.core.rolling.SizeAndTimeBasedRollingPolicy">
<!-- 归档的日志文件的路径,例如今天是2022-11-06日志,当前写的日志文件路径为file节点指定,
可以将此文件与file指定文件路径设置为不同路径,从而将当前日志文件或归档日志文件置不同的目录。
而2022-11-06的日志文件在由fileNamePattern指定。%d{yyyy-MM-dd}指定日期格式,%i指定索引 -->
<fileNamePattern>${logPath}/info/${appName}_info-%d{yyyy-MM-dd}.%i.log.gz</fileNamePattern>
<maxFileSize>128MB</maxFileSize>
<maxHistory>${maxHistory}</maxHistory>
<totalSizeCap>10GB</totalSizeCap>
</rollingPolicy>
<encoder>
<pattern>${logPattern}</pattern>
<charset>utf-8</charset>
</encoder>
<filter class="ch.qos.logback.classic.filter.LevelFilter">
<level>INFO</level>
<onMatch>ACCEPT</onMatch>
<onMismatch>DENY</onMismatch>
</filter>
</appender>
<!-- WARN 级别的日志记录 -->
<appender name="FILE_WARN" class="ch.qos.logback.core.rolling.RollingFileAppender">
<file>${logPath}/warn/${appName}_warn.log</file>
<!-- 日志记录器的滚动策略,按日期,按大小记录 -->
<rollingPolicy class="ch.qos.logback.core.rolling.SizeAndTimeBasedRollingPolicy">
<!-- 归档的日志文件的路径,例如今天是2022-11-06日志,当前写的日志文件路径为file节点指定,
可以将此文件与file指定文件路径设置为不同路径,从而将当前日志文件或归档日志文件置不同的目录。
而2022-11-06的日志文件在由fileNamePattern指定。%d{yyyy-MM-dd}指定日期格式,%i指定索引 -->
<fileNamePattern>${logPath}/warn/${appName}_warn-%d{yyyy-MM-dd}.%i.log.gz</fileNamePattern>
<maxFileSize>128MB</maxFileSize>
<maxHistory>${maxHistory}</maxHistory>
<totalSizeCap>10GB</totalSizeCap>
</rollingPolicy>
<encoder>
<pattern>${logPattern}</pattern>
<charset>utf-8</charset>
</encoder>
<filter class="ch.qos.logback.classic.filter.LevelFilter">
<level>WARN</level>
<onMatch>ACCEPT</onMatch>
<onMismatch>DENY</onMismatch>
</filter>
</appender>
<!-- Error 级别的日志记录 -->
<appender name="FILE_ERROR" class="ch.qos.logback.core.rolling.RollingFileAppender">
<file>${logPath}/error/${appName}_error.log</file>
<!-- 日志记录器的滚动策略,按日期,按大小记录 -->
<rollingPolicy class="ch.qos.logback.core.rolling.SizeAndTimeBasedRollingPolicy">
<!-- 归档的日志文件的路径,例如今天是2022-11-06日志,当前写的日志文件路径为file节点指定,
可以将此文件与file指定文件路径设置为不同路径,从而将当前日志文件或归档日志文件置不同的目录。
而2022-11-06的日志文件在由fileNamePattern指定。%d{yyyy-MM-dd}指定日期格式,%i指定索引 -->
<fileNamePattern>${logPath}/error/${appName}_error-%d{yyyy-MM-dd}.%i.log.gz</fileNamePattern>
<maxFileSize>128MB</maxFileSize>
<maxHistory>${maxHistory}</maxHistory>
<totalSizeCap>10GB</totalSizeCap>
</rollingPolicy>
<encoder>
<pattern>${logPattern}</pattern>
<charset>utf-8</charset>
</encoder>
<filter class="ch.qos.logback.classic.filter.LevelFilter">
<level>ERROR</level>
<onMatch>ACCEPT</onMatch>
<onMismatch>DENY</onMismatch>
</filter>
</appender>
<!-- ASYNC_LOG_DEBUG -->
<appender name="ASYNC_LOG_DEBUG" class="ch.qos.logback.classic.AsyncAppender">
<!-- 不丢失日志.默认的,如果队列的80%已满,则会丢弃TRACT、DEBUG、INFO级别的日志 -->
<discardingThreshold>0</discardingThreshold>
<!-- 更改默认的队列的深度,该值会影响性能.默认值为256 -->
<queueSize>${queueSize}</queueSize>
<!-- 设置该属性 logback 会使用 ArrayBlockingQueue 的非阻塞方法 offer 代替 put, 防止在队列满时阻塞业务线程 -->
<neverBlock>true</neverBlock>
<appender-ref ref="FILE_DEBUG"/>
</appender>
<!-- ASYNC_LOG_INFO -->
<appender name="ASYNC_LOG_INFO" class="ch.qos.logback.classic.AsyncAppender">
<!-- 不丢失日志.默认的,如果队列的80%已满,则会丢弃TRACT、DEBUG、INFO级别的日志 -->
<discardingThreshold>0</discardingThreshold>
<!-- 更改默认的队列的深度,该值会影响性能.默认值为256 -->
<queueSize>${queueSize}</queueSize>
<!-- 设置该属性 logback 会使用 ArrayBlockingQueue 的非阻塞方法 offer 代替 put, 防止在队列满时阻塞业务线程 -->
<neverBlock>true</neverBlock>
<appender-ref ref="FILE_INFO"/>
</appender>
<!-- ASYNC_LOG_WARN -->
<appender name="ASYNC_LOG_WARN" class="ch.qos.logback.classic.AsyncAppender">
<!-- 不丢失日志.默认的,如果队列的80%已满,则会丢弃TRACT、DEBUG、INFO级别的日志 -->
<discardingThreshold>0</discardingThreshold>
<!-- 更改默认的队列的深度,该值会影响性能.默认值为256 -->
<queueSize>${queueSize}</queueSize>
<!-- 设置该属性 logback 会使用 ArrayBlockingQueue 的非阻塞方法 offer 代替 put, 防止在队列满时阻塞业务线程 -->
<neverBlock>true</neverBlock>
<appender-ref ref="FILE_WARN"/>
</appender>
<!--ASYNC_LOG_ERROR -->
<appender name="ASYNC_LOG_ERROR" class="ch.qos.logback.classic.AsyncAppender">
<!-- 不丢失日志.默认的,如果队列的80%已满,则会丢弃TRACT、DEBUG、INFO级别的日志 -->
<discardingThreshold>0</discardingThreshold>
<!-- 更改默认的队列的深度,该值会影响性能.默认值为256 -->
<queueSize>${queueSize}</queueSize>
<!-- 设置该属性 logback 会使用 ArrayBlockingQueue 的非阻塞方法 offer 代替 put, 防止在队列满时阻塞业务线程 -->
<neverBlock>true</neverBlock>
<appender-ref ref="FILE_ERROR"/>
</appender>
<!-- <logger name="com.keyware.composeanalysis" level ="DEBUG">-->
<!-- &lt;!&ndash; 引用的appender,类似于spring的ref &ndash;&gt;-->
<!-- <appender-ref ref="CONSOLE" />-->
<!-- </logger>-->
<!-- 在定义后引用APPENDER -->
<!-- <root level="DEBUG">-->
<root level="INFO">
<appender-ref ref="CONSOLE" />
<appender-ref ref="ASYNC_LOG_DEBUG"/>
<appender-ref ref="ASYNC_LOG_INFO"/>
<appender-ref ref="ASYNC_LOG_WARN"/>
<appender-ref ref="ASYNC_LOG_ERROR"/>
</root>
</configuration>

@ -0,0 +1,25 @@
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE mapper PUBLIC "-//mybatis.org//DTD Mapper 3.0//EN" "http://mybatis.org/dtd/mybatis-3-mapper.dtd">
<mapper namespace="com.keyware.composeanalysis.mapper.AnalyzeTaskMapper">
<!-- 通用查询映射结果 -->
<resultMap id="BaseResultMap" type="com.keyware.composeanalysis.entity.AnalysisTask">
<id column="id" property="id" />
<result column="file_name" property="fileName" />
<result column="version" property="version" />
<result column="open_rate_threshold" property="openRateThreshold" />
<result column="open_type" property="openType" />
<result column="md5" property="md5" />
<result column="analysis_status" property="analysisStatus" />
<result column="analysis_start_time" property="analysisStartTime" />
<result column="analysis_end_time" property="analysisEndTime" />
<result column="compose_flag" property="composeFlag" />
<result column="assembly_flag" property="assemblyFlag" />
<result column="hold_flag" property="holdFlag" />
<result column="licence_flag" property="licenceFlag" />
<result column="decompression_flag" property="decompressionFlag" />
<result column="create_time" property="createTime" />
<result column="create_user_id" property="createUserId" />
</resultMap>
</mapper>
Loading…
Cancel
Save