forked from liuzongren/compose-analysis
commit
0aa44dcca4
@ -0,0 +1,33 @@ |
||||
HELP.md |
||||
target/ |
||||
!.mvn/wrapper/maven-wrapper.jar |
||||
!**/src/main/**/target/ |
||||
!**/src/test/**/target/ |
||||
|
||||
### STS ### |
||||
.apt_generated |
||||
.classpath |
||||
.factorypath |
||||
.project |
||||
.settings |
||||
.springBeans |
||||
.sts4-cache |
||||
|
||||
### IntelliJ IDEA ### |
||||
.idea |
||||
*.iws |
||||
*.iml |
||||
*.ipr |
||||
|
||||
### NetBeans ### |
||||
/nbproject/private/ |
||||
/nbbuild/ |
||||
/dist/ |
||||
/nbdist/ |
||||
/.nb-gradle/ |
||||
build/ |
||||
!**/src/main/**/build/ |
||||
!**/src/test/**/build/ |
||||
|
||||
### VS Code ### |
||||
.vscode/ |
@ -0,0 +1,148 @@ |
||||
<?xml version="1.0" encoding="UTF-8"?> |
||||
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" |
||||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd"> |
||||
<modelVersion>4.0.0</modelVersion> |
||||
<parent> |
||||
<groupId>com.keyware</groupId> |
||||
<artifactId>keyware-cloud</artifactId> |
||||
<version>1.0.0</version> |
||||
</parent> |
||||
|
||||
<groupId>com.keyware</groupId> |
||||
<artifactId>compose-analysis</artifactId> |
||||
<version>1.0.0</version> |
||||
<name>compose-analysis</name> |
||||
<description>compose-analysis</description> |
||||
|
||||
<properties> |
||||
<java.version>17</java.version> |
||||
</properties> |
||||
|
||||
<dependencies> |
||||
<dependency> |
||||
<groupId>org.springframework.boot</groupId> |
||||
<artifactId>spring-boot-starter-web</artifactId> |
||||
</dependency> |
||||
|
||||
<!-- nacos 服务的注册发现 --> |
||||
<dependency> |
||||
<groupId>com.alibaba.cloud</groupId> |
||||
<artifactId>spring-cloud-starter-alibaba-nacos-discovery</artifactId> |
||||
</dependency> |
||||
|
||||
<!-- nacos 配置中心做依赖管理 --> |
||||
<dependency> |
||||
<groupId>com.alibaba.cloud</groupId> |
||||
<artifactId>spring-cloud-starter-alibaba-nacos-config</artifactId> |
||||
</dependency> |
||||
|
||||
<!-- mongodb --> |
||||
<dependency> |
||||
<groupId>org.springframework.boot</groupId> |
||||
<artifactId>spring-boot-starter-data-mongodb</artifactId> |
||||
</dependency> |
||||
|
||||
|
||||
<!-- 集成redis依赖 --> |
||||
<dependency> |
||||
<groupId>org.springframework.boot</groupId> |
||||
<artifactId>spring-boot-starter-data-redis</artifactId> |
||||
</dependency> |
||||
|
||||
<!-- 集成redisson依赖 --> |
||||
<dependency> |
||||
<groupId>org.redisson</groupId> |
||||
<artifactId>redisson-spring-boot-starter</artifactId> |
||||
<version>3.13.6</version> |
||||
</dependency> |
||||
|
||||
<!-- 集成solr依赖 --> |
||||
<dependency> |
||||
<groupId>org.apache.solr</groupId> |
||||
<artifactId>solr-solrj</artifactId> |
||||
<version>7.6.0</version> |
||||
<exclusions> |
||||
<exclusion> |
||||
<groupId>org.apache.zookeeper</groupId> |
||||
<artifactId>zookeeper</artifactId> |
||||
</exclusion> |
||||
</exclusions> |
||||
</dependency> |
||||
|
||||
<!-- 函数解析器--> |
||||
<dependency> |
||||
<groupId>com.keyware</groupId> |
||||
<artifactId>keyswan-analysis</artifactId> |
||||
<version>releases-1.1.5</version> |
||||
</dependency> |
||||
|
||||
<dependency> |
||||
<groupId>com.keyware</groupId> |
||||
<artifactId>keyswan-function</artifactId> |
||||
<version>release-1.1.2</version> |
||||
</dependency> |
||||
|
||||
<!-- 公共依赖--> |
||||
<dependency> |
||||
<groupId>com.keyware</groupId> |
||||
<artifactId>keyware-common</artifactId> |
||||
<version>1.0.0</version> |
||||
</dependency> |
||||
|
||||
<!-- 引入 api 接口--> |
||||
<dependency> |
||||
<groupId>com.keyware</groupId> |
||||
<artifactId>compose-analysis-api</artifactId> |
||||
<version>1.0.0</version> |
||||
</dependency> |
||||
|
||||
</dependencies> |
||||
|
||||
|
||||
<repositories> |
||||
<repository> |
||||
<id>keyware-repos</id> |
||||
<name>KeyWare Repository</name> |
||||
<url>http://218.30.67.85:19201/nexus/content/groups/public/</url> |
||||
</repository> |
||||
<repository> |
||||
<id>keyware-repos-2</id> |
||||
<name>KeyWare Repository-2</name> |
||||
<url>http://218.30.67.85:19201/nexus/content/repositories/releases/</url> |
||||
</repository> |
||||
<!-- <repository>--> |
||||
<!-- <id>aliyun-repository</id>--> |
||||
<!-- <name>aliyun repository</name>--> |
||||
<!-- <url>https://maven.aliyun.com/repository/public/</url>--> |
||||
<!-- </repository>--> |
||||
<!-- <repository>--> |
||||
<!-- <id>aliyun-repos</id>--> |
||||
<!-- <name>Aliyun Repository</name>--> |
||||
<!-- <url>http://maven.aliyun.com/nexus/content/groups/public</url>--> |
||||
<!-- <releases>--> |
||||
<!-- <enabled>true</enabled>--> |
||||
<!-- </releases>--> |
||||
<!-- <snapshots>--> |
||||
<!-- <enabled>false</enabled>--> |
||||
<!-- </snapshots>--> |
||||
<!-- </repository>--> |
||||
</repositories> |
||||
|
||||
<build> |
||||
<plugins> |
||||
<plugin> |
||||
<groupId>org.springframework.boot</groupId> |
||||
<artifactId>spring-boot-maven-plugin</artifactId> |
||||
<version>3.2.7</version> |
||||
</plugin> |
||||
<plugin> |
||||
<groupId>org.apache.maven.plugins</groupId> |
||||
<artifactId>maven-compiler-plugin</artifactId> |
||||
<configuration> |
||||
<source>16</source> |
||||
<target>16</target> |
||||
</configuration> |
||||
</plugin> |
||||
</plugins> |
||||
</build> |
||||
</project> |
@ -0,0 +1,21 @@ |
||||
package com.keyware.composeanalysis; |
||||
|
||||
import org.mybatis.spring.annotation.MapperScan; |
||||
import org.springframework.boot.SpringApplication; |
||||
import org.springframework.boot.autoconfigure.SpringBootApplication; |
||||
import org.springframework.cloud.client.discovery.EnableDiscoveryClient; |
||||
import org.springframework.cloud.context.config.annotation.RefreshScope; |
||||
import org.springframework.scheduling.annotation.EnableAsync; |
||||
|
||||
@MapperScan("com.keyware.composeanalysis.mapper") |
||||
@SpringBootApplication |
||||
@EnableDiscoveryClient |
||||
@RefreshScope |
||||
@EnableAsync |
||||
public class ComposeAnalyzeApplication { |
||||
|
||||
public static void main(String[] args) { |
||||
SpringApplication.run(ComposeAnalyzeApplication.class, args); |
||||
} |
||||
|
||||
} |
@ -0,0 +1,20 @@ |
||||
package com.keyware.composeanalysis.config; |
||||
|
||||
import lombok.Data; |
||||
import org.springframework.beans.factory.annotation.Value; |
||||
import org.springframework.context.annotation.Configuration; |
||||
|
||||
/** |
||||
* @author liuzongren |
||||
* @date 2024/7/25 |
||||
* @description 分析配置类 |
||||
*/ |
||||
@Configuration |
||||
@Data |
||||
public class AnalysisConfig { |
||||
|
||||
//源码上传和解压的地址
|
||||
@Value("${codeResourcePath}") |
||||
private String codeResourcePath; |
||||
|
||||
} |
@ -0,0 +1,94 @@ |
||||
package com.keyware.composeanalysis.config; |
||||
|
||||
import org.redisson.Redisson; |
||||
import org.redisson.api.RedissonClient; |
||||
import org.redisson.codec.JsonJacksonCodec; |
||||
import org.redisson.config.Config; |
||||
import org.springframework.beans.factory.annotation.Value; |
||||
import org.springframework.context.annotation.Bean; |
||||
import org.springframework.context.annotation.Configuration; |
||||
import org.springframework.data.redis.connection.RedisConnectionFactory; |
||||
import org.springframework.data.redis.connection.RedisStandaloneConfiguration; |
||||
import org.springframework.data.redis.connection.lettuce.LettuceConnectionFactory; |
||||
import org.springframework.data.redis.core.RedisTemplate; |
||||
import org.springframework.data.redis.serializer.Jackson2JsonRedisSerializer; |
||||
import org.springframework.data.redis.serializer.StringRedisSerializer; |
||||
|
||||
@Configuration |
||||
public class RedisConfig { |
||||
|
||||
@Value("${spring.data.redis.host}") |
||||
private String redisHost; |
||||
|
||||
@Value("${spring.data.redis.port}") |
||||
private int redisPort; |
||||
|
||||
//配置文件中的密码 到这里已经被转义过了
|
||||
@Value("${spring.data.redis.password}") |
||||
private String redisPassword; |
||||
|
||||
@Bean(name = "redisTemplate") |
||||
public RedisTemplate<String, Object> getRedisTemplate(RedisConnectionFactory factory) { |
||||
RedisTemplate<String, Object> template = new RedisTemplate<String, Object>(); |
||||
template.setConnectionFactory(factory); |
||||
//配置序列化方式
|
||||
Jackson2JsonRedisSerializer jackson2JsonRedisSerializer = new Jackson2JsonRedisSerializer(Object.class); |
||||
|
||||
StringRedisSerializer stringRedisSerializer = new StringRedisSerializer(); |
||||
//key 采用String的序列化方式
|
||||
template.setKeySerializer(stringRedisSerializer); |
||||
//hash
|
||||
template.setHashKeySerializer(jackson2JsonRedisSerializer); |
||||
//value
|
||||
template.setValueSerializer(jackson2JsonRedisSerializer); |
||||
template.afterPropertiesSet(); |
||||
return template; |
||||
} |
||||
|
||||
|
||||
@Bean |
||||
public RedissonClient getRedisson() { |
||||
Config config = new Config(); |
||||
config.useSingleServer(). |
||||
setAddress("redis://" + redisHost + ":" + redisPort). |
||||
setPassword(redisPassword); |
||||
config.setCodec(new JsonJacksonCodec()); |
||||
return Redisson.create(config); |
||||
} |
||||
|
||||
|
||||
@Bean(name = "oneDBRedisTemplateClient") |
||||
public RedisTemplate<String, Object> redisTemplate() { |
||||
//为了开发方便,一般直接使用<String,Object>
|
||||
RedisTemplate<String, Object> template = new RedisTemplate<>(); |
||||
|
||||
template.setConnectionFactory(redisConnection(1)); |
||||
//配置序列化方式
|
||||
Jackson2JsonRedisSerializer jackson2JsonRedisSerializer = new Jackson2JsonRedisSerializer(Object.class); |
||||
|
||||
StringRedisSerializer stringRedisSerializer = new StringRedisSerializer(); |
||||
//key 采用String的序列化方式
|
||||
template.setKeySerializer(stringRedisSerializer); |
||||
//hash
|
||||
template.setHashKeySerializer(jackson2JsonRedisSerializer); |
||||
//value
|
||||
template.setValueSerializer(jackson2JsonRedisSerializer); |
||||
template.afterPropertiesSet(); |
||||
return template; |
||||
} |
||||
|
||||
private LettuceConnectionFactory redisConnection(int db) { |
||||
RedisStandaloneConfiguration server = new RedisStandaloneConfiguration(); |
||||
server.setHostName(redisHost); |
||||
server.setDatabase(db); |
||||
server.setPort(redisPort); |
||||
server.setPassword(redisPassword); |
||||
LettuceConnectionFactory factory = new LettuceConnectionFactory(server); |
||||
factory.afterPropertiesSet(); |
||||
return factory; |
||||
} |
||||
|
||||
|
||||
|
||||
|
||||
} |
@ -0,0 +1,31 @@ |
||||
package com.keyware.composeanalysis.config; |
||||
|
||||
import org.redisson.Redisson; |
||||
import org.redisson.api.RBucket; |
||||
import org.redisson.api.RedissonClient; |
||||
import org.redisson.codec.JsonJacksonCodec; |
||||
import org.redisson.config.Config; |
||||
import org.springframework.beans.factory.annotation.Value; |
||||
import org.springframework.context.annotation.Bean; |
||||
import org.springframework.context.annotation.Configuration; |
||||
|
||||
/** |
||||
* @author liuzongren |
||||
* @date 2024/7/30 |
||||
*/ |
||||
@Configuration |
||||
public class RedissionConfig { |
||||
|
||||
@Value("${spring.data.redis.host}") |
||||
private String redisHost; |
||||
|
||||
@Value("${spring.data.redis.port}") |
||||
private int redisPort; |
||||
|
||||
@Value("${spring.data.redis.password}") |
||||
private String redisPassword; |
||||
|
||||
|
||||
|
||||
|
||||
} |
@ -0,0 +1,25 @@ |
||||
package com.keyware.composeanalysis.config; |
||||
|
||||
import org.springframework.beans.factory.annotation.Value; |
||||
import org.springframework.boot.context.properties.ConfigurationProperties; |
||||
import org.springframework.stereotype.Component; |
||||
|
||||
/** |
||||
* @author liuzongren |
||||
* @date 2024/7/24 |
||||
* description solr的配置中心 |
||||
*/ |
||||
@Component |
||||
@ConfigurationProperties(prefix = "solr") |
||||
public class SolrConfig { |
||||
|
||||
/** |
||||
* 每次查询返回的最大行数 |
||||
*/ |
||||
@Value("${solr.row:5}") |
||||
private String ROWS; |
||||
|
||||
@Value("${solr.solrUrl}") |
||||
private String solrUrl; |
||||
|
||||
} |
@ -0,0 +1,60 @@ |
||||
package com.keyware.composeanalysis.config.thread; |
||||
|
||||
import org.springframework.context.annotation.Bean; |
||||
import org.springframework.context.annotation.Configuration; |
||||
import org.springframework.scheduling.concurrent.ThreadPoolTaskExecutor; |
||||
|
||||
import java.util.concurrent.Executor; |
||||
import java.util.concurrent.ThreadPoolExecutor; |
||||
|
||||
/** |
||||
* 创建线程池配置类 |
||||
*/ |
||||
@Configuration |
||||
public class TaskExecutePool { |
||||
/** |
||||
* 核心线程数 |
||||
*/ |
||||
private int coreThreadsSize = 10; |
||||
|
||||
/** |
||||
* 最大线程数 |
||||
*/ |
||||
private int maxThreadsSize = 50; |
||||
|
||||
/** |
||||
* 存活时间 |
||||
*/ |
||||
private int keepAliveSeconds = 60; |
||||
|
||||
/** |
||||
* 队列容量 |
||||
*/ |
||||
private int queueCapacity = 10000; |
||||
|
||||
|
||||
@Bean |
||||
public Executor taskExecutor() { |
||||
ThreadPoolTaskExecutor executor = new ThreadPoolTaskExecutor(); |
||||
//核心线程池大小
|
||||
executor.setCorePoolSize(coreThreadsSize); |
||||
//最大线程数
|
||||
executor.setMaxPoolSize(maxThreadsSize); |
||||
//队列容量
|
||||
executor.setQueueCapacity(queueCapacity); |
||||
//活跃时间
|
||||
executor.setKeepAliveSeconds(keepAliveSeconds); |
||||
//线程名字前缀
|
||||
executor.setThreadNamePrefix("ComposeAnalysisExecutePool-"); |
||||
|
||||
// setRejectedExecutionHandler:当pool已经达到max size的时候,如何处理新任务
|
||||
// CallerRunsPolicy:不在新线程中执行任务,而是由调用者所在的线程来执行
|
||||
executor.setRejectedExecutionHandler(new ThreadPoolExecutor.CallerRunsPolicy()); |
||||
// 等待所有任务结束后再关闭线程池
|
||||
executor.setWaitForTasksToCompleteOnShutdown(true); |
||||
|
||||
executor.initialize(); |
||||
|
||||
return executor; |
||||
} |
||||
} |
@ -0,0 +1,340 @@ |
||||
package com.keyware.composeanalysis.constant; |
||||
|
||||
|
||||
import java.util.*; |
||||
|
||||
|
||||
/** |
||||
* 常见语言缓存标识 |
||||
*/ |
||||
|
||||
public class FixedValue { |
||||
//压缩格式
|
||||
public final static Set<String> COMPRESSED_FORMAT = new HashSet<String>(); |
||||
|
||||
static { |
||||
COMPRESSED_FORMAT.add(".exe"); |
||||
COMPRESSED_FORMAT.add(".msi"); |
||||
COMPRESSED_FORMAT.add(".zip"); |
||||
COMPRESSED_FORMAT.add(".rar"); |
||||
COMPRESSED_FORMAT.add(".tar"); |
||||
COMPRESSED_FORMAT.add(".jar"); |
||||
COMPRESSED_FORMAT.add(".war"); |
||||
COMPRESSED_FORMAT.add(".tar.gz"); |
||||
COMPRESSED_FORMAT.add(".gz"); |
||||
COMPRESSED_FORMAT.add(".tar.bz2"); |
||||
COMPRESSED_FORMAT.add(".bz2"); |
||||
COMPRESSED_FORMAT.add(".tar.z"); |
||||
COMPRESSED_FORMAT.add(".z"); |
||||
COMPRESSED_FORMAT.add(".tgz"); |
||||
COMPRESSED_FORMAT.add(".7z"); |
||||
COMPRESSED_FORMAT.add(".xz"); |
||||
} |
||||
//分析结果类型
|
||||
|
||||
//许可协议对应的检测结果
|
||||
public final static Map<String, String> LINCEN_RESULT = new HashMap<String, String>(); |
||||
|
||||
static { |
||||
LINCEN_RESULT.put("BSD", "1. 如果再发布的产品中包含源代码,则在源代码中必须带有原来代码中的BSD协议。\n" + |
||||
"2. 如果再发布的只是二进制类库/软件,则需要在类库/软件的文档和版权声明中包含原来代码中的BSD协议。\n" + |
||||
"3. 不可以用开源代码的作者/机构名字和原来产品的名字做市场推广。"); |
||||
LINCEN_RESULT.put("Apache License","1. 需要给代码的用户一份Apache Licence\n" + |
||||
"2. 如果你修改了代码,需要再被修改的文件中说明。\n" + |
||||
"3. 在延伸的代码中(修改和有源代码衍生的代码中)需要带有原来代码中的协议,商标,专利声明和其他原来作者规定需要包含的说明。\n" + |
||||
"4. 如果再发布的产品中包含一个Notice文件,则在Notice文件中需要带有Apache Licence。你可以在Notice中增加自己的许可,但不可以表现为对Apache Licence构成更改。"); |
||||
LINCEN_RESULT.put("GNU General Public License","不允许修改后和衍生的代码做为闭源的商业软件发布和销售"); |
||||
LINCEN_RESULT.put("GNU Lesser General Public License","可以被商业软件作为类库引用并发布和销售"); |
||||
LINCEN_RESULT.put("MIT","必须在你的发行版里包含原许可协议的声明"); |
||||
} |
||||
|
||||
|
||||
|
||||
//语言后缀与语言类型
|
||||
public final static Map<String, String> SUFFIX_LANG = new HashMap<String, String>(); |
||||
|
||||
static { |
||||
SUFFIX_LANG.put(".java", "Java"); |
||||
SUFFIX_LANG.put(".c", "C"); |
||||
SUFFIX_LANG.put(".h", "C"); |
||||
SUFFIX_LANG.put(".cpp", "Cpp");//C++
|
||||
//add by 2022/07/13
|
||||
SUFFIX_LANG.put(".hpp", "Cpp");//C++
|
||||
SUFFIX_LANG.put(".cs", "Cs");//C#
|
||||
SUFFIX_LANG.put(".m", "OC");//Objective-C
|
||||
//add by 2022/07/13
|
||||
SUFFIX_LANG.put(".mm", "OC");//Objective-C
|
||||
SUFFIX_LANG.put(".py", "Python"); |
||||
SUFFIX_LANG.put(".go", "Golang"); |
||||
SUFFIX_LANG.put(".pl", "Perl"); |
||||
SUFFIX_LANG.put(".rb", "Ruby"); |
||||
SUFFIX_LANG.put(".php", "PHP"); |
||||
SUFFIX_LANG.put(".sql", "Plsql");//PL/SQL
|
||||
SUFFIX_LANG.put(".abap", "Abap"); |
||||
SUFFIX_LANG.put(".lua", "Lua"); |
||||
SUFFIX_LANG.put(".erl", "Erlang"); |
||||
SUFFIX_LANG.put(".swift", "Swift"); |
||||
SUFFIX_LANG.put(".groovy", "Groovy"); |
||||
SUFFIX_LANG.put(".frm", "VB");//VB.net
|
||||
SUFFIX_LANG.put(".bas", "VB"); |
||||
SUFFIX_LANG.put(".cls", "VB"); |
||||
SUFFIX_LANG.put(".ctl", "VB"); |
||||
SUFFIX_LANG.put(".vb", "VB"); |
||||
SUFFIX_LANG.put(".vbs", "VB"); |
||||
SUFFIX_LANG.put(".pp", "Puppet"); |
||||
SUFFIX_LANG.put(".clj", "Clojure"); |
||||
SUFFIX_LANG.put(".fs", "F");//F#
|
||||
SUFFIX_LANG.put(".fsx", "F");//F#
|
||||
SUFFIX_LANG.put(".fsscript", "F");//F#
|
||||
SUFFIX_LANG.put(".hs", "Haskell"); |
||||
SUFFIX_LANG.put(".js", "Javascript"); |
||||
SUFFIX_LANG.put(".ts", "Typescript"); |
||||
SUFFIX_LANG.put(".r", "R"); |
||||
//add by 2022/07/13
|
||||
SUFFIX_LANG.put(".R", "R"); |
||||
SUFFIX_LANG.put(".sc", "Scala"); |
||||
SUFFIX_LANG.put(".scala", "Scala"); |
||||
//add by 2022/07/13
|
||||
SUFFIX_LANG.put(".pas", "Pascal"); |
||||
SUFFIX_LANG.put(".cob", "Cobol"); |
||||
SUFFIX_LANG.put(".as", "ActionScript"); |
||||
SUFFIX_LANG.put(".rs", "Rust"); |
||||
SUFFIX_LANG.put(".ino", "Arduino"); |
||||
SUFFIX_LANG.put(".asm", "Assembly"); |
||||
SUFFIX_LANG.put(".f", "Fortran"); |
||||
SUFFIX_LANG.put(".f90", "Fortran"); |
||||
SUFFIX_LANG.put(".sh", "Shell"); |
||||
SUFFIX_LANG.put(".html", "Html"); |
||||
SUFFIX_LANG.put(".htm", "Html"); |
||||
SUFFIX_LANG.put(".css", "Css"); |
||||
SUFFIX_LANG.put(".rpg", "Rpg"); |
||||
SUFFIX_LANG.put(".xml", "Xml"); |
||||
SUFFIX_LANG.put(".pli", "Pli");//PL/I
|
||||
SUFFIX_LANG.put(".p","OpenEdge");//OpenEdge
|
||||
SUFFIX_LANG.put(".abl","OpenEdge");//OpenEdge
|
||||
//SUFFIX_LANG.put(".jar", "Jar");
|
||||
//SUFFIX_LANG.put(".war", "War");
|
||||
} |
||||
//语言后缀与语言类型
|
||||
public final static Map<String, String> SUFFIX_LANG1 = new HashMap<String, String>(); |
||||
|
||||
static { |
||||
SUFFIX_LANG1.put(".java", "java"); |
||||
SUFFIX_LANG1.put(".c", "c"); |
||||
SUFFIX_LANG1.put(".h", "c"); |
||||
SUFFIX_LANG1.put(".cpp", "c++");//C++
|
||||
//add by 2022/07/13
|
||||
SUFFIX_LANG1.put(".hpp", "c++");//C++
|
||||
SUFFIX_LANG1.put(".cs", "c#");//C#
|
||||
SUFFIX_LANG1.put(".m", "Objective-C");//Objective-C
|
||||
//add by 2022/07/13
|
||||
SUFFIX_LANG1.put(".mm", "Objective-C");//Objective-C
|
||||
SUFFIX_LANG1.put(".py", "python"); |
||||
SUFFIX_LANG1.put(".go", "go"); |
||||
SUFFIX_LANG1.put(".pl", "perl"); |
||||
SUFFIX_LANG1.put(".rb", "ruby"); |
||||
SUFFIX_LANG1.put(".php", "php"); |
||||
SUFFIX_LANG1.put(".sql", "plsql");//PL/SQL
|
||||
SUFFIX_LANG1.put(".abap", "abap"); |
||||
SUFFIX_LANG1.put(".lua", "lua"); |
||||
SUFFIX_LANG1.put(".erl", "erlang"); |
||||
SUFFIX_LANG1.put(".swift", "swift"); |
||||
SUFFIX_LANG1.put(".groovy", "groovy"); |
||||
SUFFIX_LANG1.put(".frm", "vb");//VB.net
|
||||
SUFFIX_LANG1.put(".bas", "vb"); |
||||
SUFFIX_LANG1.put(".cls", "vb"); |
||||
SUFFIX_LANG1.put(".ctl", "vb"); |
||||
SUFFIX_LANG1.put(".vb", "vb"); |
||||
SUFFIX_LANG1.put(".vbs", "vb"); |
||||
SUFFIX_LANG1.put(".pp", "puppet"); |
||||
SUFFIX_LANG1.put(".clj", "clojure"); |
||||
SUFFIX_LANG1.put(".fs", "f");//F#
|
||||
SUFFIX_LANG1.put(".fsx", "f");//F#
|
||||
SUFFIX_LANG1.put(".fsscript", "f");//F#
|
||||
SUFFIX_LANG1.put(".hs", "haskell"); |
||||
SUFFIX_LANG1.put(".js", "javaScript"); |
||||
SUFFIX_LANG1.put(".ts", "typeScript"); |
||||
SUFFIX_LANG1.put(".r", "r"); |
||||
//add by 2022/07/13
|
||||
SUFFIX_LANG1.put(".R", "r"); |
||||
SUFFIX_LANG1.put(".sc", "scala"); |
||||
SUFFIX_LANG1.put(".scala", "scala"); |
||||
//add by 2022/07/13
|
||||
SUFFIX_LANG1.put(".pas", "pascal"); |
||||
SUFFIX_LANG1.put(".cob", "cobol"); |
||||
SUFFIX_LANG1.put(".as", "actionScript"); |
||||
SUFFIX_LANG1.put(".rs", "rust"); |
||||
SUFFIX_LANG1.put(".ino", "arduino"); |
||||
SUFFIX_LANG1.put(".asm", "assembly"); |
||||
SUFFIX_LANG1.put(".f", "fortran"); |
||||
SUFFIX_LANG1.put(".f90", "fortran"); |
||||
SUFFIX_LANG1.put(".sh", "shell"); |
||||
SUFFIX_LANG1.put(".html", "html"); |
||||
SUFFIX_LANG1.put(".htm", "html"); |
||||
SUFFIX_LANG1.put(".css", "css"); |
||||
SUFFIX_LANG1.put(".rpg", "rpg"); |
||||
SUFFIX_LANG1.put(".xml", "xml"); |
||||
SUFFIX_LANG1.put(".pli", "pli");//PL/I
|
||||
SUFFIX_LANG1.put(".p","OpenEdge");//OpenEdge
|
||||
SUFFIX_LANG1.put(".abl","OpenEdge");//OpenEdge
|
||||
//SUFFIX_LANG.put(".jar", "Jar");
|
||||
//SUFFIX_LANG.put(".war", "War");
|
||||
} |
||||
//文件后缀对应的solr特征库库名称
|
||||
public final static Map<String, String> SUFFIX_SOLR_FILE = new HashMap<String, String>(); |
||||
|
||||
static { |
||||
SUFFIX_SOLR_FILE.put("java", "Java_CutFileInfo"); |
||||
SUFFIX_SOLR_FILE.put("c", "C_CutFileInfo"); |
||||
SUFFIX_SOLR_FILE.put("h", "C_CutFileInfo"); |
||||
SUFFIX_SOLR_FILE.put("cpp", "Cpp_CutFileInfo");//C++
|
||||
SUFFIX_SOLR_FILE.put("hpp", "Cpp_CutFileInfo");//C++
|
||||
SUFFIX_SOLR_FILE.put("cs", "Cs_CutFileInfo");//C#
|
||||
SUFFIX_SOLR_FILE.put("m", "OC_CutFileInfo");//Objective-C
|
||||
SUFFIX_SOLR_FILE.put("mm", "OC_CutFileInfo");//Objective-C
|
||||
SUFFIX_SOLR_FILE.put("py", "Python_CutFileInfo"); |
||||
SUFFIX_SOLR_FILE.put("go", "Golang_CutFileInfo"); |
||||
SUFFIX_SOLR_FILE.put("pl", "Perl_CutFileInfo"); |
||||
SUFFIX_SOLR_FILE.put("rb", "Ruby_CutFileInfo"); |
||||
SUFFIX_SOLR_FILE.put("php", "PHP_CutFileInfo"); |
||||
SUFFIX_SOLR_FILE.put("sql", "Plsql_CutFileInfo");//PL/SQL
|
||||
SUFFIX_SOLR_FILE.put("abap", "Abap_CutFileInfo"); |
||||
SUFFIX_SOLR_FILE.put("lua", "Lua_CutFileInfo"); |
||||
SUFFIX_SOLR_FILE.put("erl", "Erlang_CutFileInfo"); |
||||
SUFFIX_SOLR_FILE.put("swift", "Swift_CutFileInfo"); |
||||
SUFFIX_SOLR_FILE.put("groovy", "Groovy_CutFileInfo"); |
||||
SUFFIX_SOLR_FILE.put("frm", "VB_CutFileInfo");//VB.net
|
||||
SUFFIX_SOLR_FILE.put("bas", "VB_CutFileInfo"); |
||||
SUFFIX_SOLR_FILE.put("cls", "VB_CutFileInfo"); |
||||
SUFFIX_SOLR_FILE.put("ctl", "VB_CutFileInfo"); |
||||
SUFFIX_SOLR_FILE.put("vb", "VB_CutFileInfo"); |
||||
SUFFIX_SOLR_FILE.put("vbs", "VB_CutFileInfo"); |
||||
SUFFIX_SOLR_FILE.put("pp", "Puppet_CutFileInfo"); |
||||
SUFFIX_SOLR_FILE.put("clj", "Clojure_CutFileInfo"); |
||||
SUFFIX_SOLR_FILE.put("fs", "F_CutFileInfo");//F#
|
||||
SUFFIX_SOLR_FILE.put("fsx", "F_CutFileInfo");//F#
|
||||
SUFFIX_SOLR_FILE.put("fsscript", "F_CutFileInfo");//F#
|
||||
SUFFIX_SOLR_FILE.put("hs", "Haskell_CutFileInfo"); |
||||
SUFFIX_SOLR_FILE.put("js", "Javascript_CutFileInfo"); |
||||
SUFFIX_SOLR_FILE.put("ts", "Typescript_CutFileInfo"); |
||||
SUFFIX_SOLR_FILE.put("r", "R_CutFileInfo"); |
||||
SUFFIX_SOLR_FILE.put("R", "R_CutFileInfo"); |
||||
SUFFIX_SOLR_FILE.put("sc", "Scala_CutFileInfo"); |
||||
SUFFIX_SOLR_FILE.put("scala", "Scala_CutFileInfo"); |
||||
SUFFIX_SOLR_FILE.put("pas", "Pascal_CutFileInfo"); |
||||
SUFFIX_SOLR_FILE.put("cob", "Cobol_CutFileInfo"); |
||||
SUFFIX_SOLR_FILE.put("as", "ActionScript_CutFileInfo"); |
||||
SUFFIX_SOLR_FILE.put("rs", "Rust_CutFileInfo"); |
||||
SUFFIX_SOLR_FILE.put("ino", "Arduino_CutFileInfo"); |
||||
SUFFIX_SOLR_FILE.put("asm", "Assembly_CutFileInfo"); |
||||
SUFFIX_SOLR_FILE.put("f", "Fortran_CutFileInfo"); |
||||
SUFFIX_SOLR_FILE.put("f90", "Fortran_CutFileInfo"); |
||||
SUFFIX_SOLR_FILE.put("sh", "Shell_CutFileInfo"); |
||||
SUFFIX_SOLR_FILE.put("html", "Html_CutFileInfo"); |
||||
SUFFIX_SOLR_FILE.put("htm", "Html_CutFileInfo"); |
||||
SUFFIX_SOLR_FILE.put("css", "Css_CutFileInfo"); |
||||
SUFFIX_SOLR_FILE.put("rpg", "Rpg_CutFileInfo"); |
||||
SUFFIX_SOLR_FILE.put("xml", "Xml_CutFileInfo"); |
||||
SUFFIX_SOLR_FILE.put("pli", "Pli_CutFileInfo");//PL/I
|
||||
} |
||||
//文件后缀对应的solr文件库库名称
|
||||
public final static Map<String, String> SUFFIX_SOLR_VERSION = new HashMap<String, String>(); |
||||
|
||||
static { |
||||
SUFFIX_SOLR_VERSION.put("java", "Java_SourceFileBase"); |
||||
SUFFIX_SOLR_VERSION.put("c", "C_SourceFileBase"); |
||||
SUFFIX_SOLR_VERSION.put("h", "C_SourceFileBase"); |
||||
SUFFIX_SOLR_VERSION.put("cpp", "Cpp_SourceFileBase");//C++
|
||||
SUFFIX_SOLR_VERSION.put("hpp", "Cpp_SourceFileBase");//C++
|
||||
SUFFIX_SOLR_VERSION.put("cs", "Cs_SourceFileBase");//C#
|
||||
SUFFIX_SOLR_VERSION.put("m", "OC_SourceFileBase");//Objective-C
|
||||
SUFFIX_SOLR_VERSION.put("mm", "OC_SourceFileBase");//Objective-C
|
||||
SUFFIX_SOLR_VERSION.put("py", "Python_SourceFileBase"); |
||||
SUFFIX_SOLR_VERSION.put("go", "Golang_SourceFileBase"); |
||||
SUFFIX_SOLR_VERSION.put("pl", "Perl_SourceFileBase"); |
||||
SUFFIX_SOLR_VERSION.put("rb", "Ruby_SourceFileBase"); |
||||
SUFFIX_SOLR_VERSION.put("php", "PHP_SourceFileBase"); |
||||
SUFFIX_SOLR_VERSION.put("sql", "Plsql_SourceFileBase");//PL/SQL
|
||||
SUFFIX_SOLR_VERSION.put("abap", "Abap_SourceFileBase"); |
||||
SUFFIX_SOLR_VERSION.put("lua", "Lua_SourceFileBase"); |
||||
SUFFIX_SOLR_VERSION.put("erl", "Erlang_SourceFileBase"); |
||||
SUFFIX_SOLR_VERSION.put("swift", "Swift_SourceFileBase"); |
||||
SUFFIX_SOLR_VERSION.put("groovy", "Groovy_SourceFileBase"); |
||||
SUFFIX_SOLR_VERSION.put("frm", "VB_SourceFileBase");//VB.net
|
||||
SUFFIX_SOLR_VERSION.put("bas", "VB_SourceFileBase"); |
||||
SUFFIX_SOLR_VERSION.put("cls", "VB_SourceFileBase"); |
||||
SUFFIX_SOLR_VERSION.put("ctl", "VB_SourceFileBase"); |
||||
SUFFIX_SOLR_VERSION.put("vb", "VB_SourceFileBase"); |
||||
SUFFIX_SOLR_VERSION.put("vbs", "VB_SourceFileBase"); |
||||
SUFFIX_SOLR_VERSION.put("pp", "Puppet_SourceFileBase"); |
||||
SUFFIX_SOLR_VERSION.put("clj", "Clojure_SourceFileBase"); |
||||
SUFFIX_SOLR_VERSION.put("fs", "F_SourceFileBase");//F#
|
||||
SUFFIX_SOLR_VERSION.put("fsx", "F_SourceFileBase");//F#
|
||||
SUFFIX_SOLR_VERSION.put("fsscript", "F_SourceFileBase");//F#
|
||||
SUFFIX_SOLR_VERSION.put("hs", "Haskell_SourceFileBase"); |
||||
SUFFIX_SOLR_VERSION.put("js", "Javascript_SourceFileBase"); |
||||
SUFFIX_SOLR_VERSION.put("ts", "Typescript_SourceFileBase"); |
||||
SUFFIX_SOLR_VERSION.put("r", "R_SourceFileBase"); |
||||
SUFFIX_SOLR_VERSION.put("R", "R_SourceFileBase"); |
||||
SUFFIX_SOLR_VERSION.put("sc", "Scala_SourceFileBase"); |
||||
SUFFIX_SOLR_VERSION.put("scala", "Scala_SourceFileBase"); |
||||
SUFFIX_SOLR_VERSION.put("pas", "Pascal_SourceFileBase"); |
||||
SUFFIX_SOLR_VERSION.put("cob", "Cobol_SourceFileBase"); |
||||
SUFFIX_SOLR_VERSION.put("as", "ActionScript_SourceFileBase"); |
||||
SUFFIX_SOLR_VERSION.put("rs", "Rust_SourceFileBase"); |
||||
SUFFIX_SOLR_VERSION.put("ino", "Arduino_SourceFileBase"); |
||||
SUFFIX_SOLR_VERSION.put("asm", "Assembly_SourceFileBase"); |
||||
SUFFIX_SOLR_VERSION.put("f", "Fortran_SourceFileBase"); |
||||
SUFFIX_SOLR_VERSION.put("f90", "Fortran_SourceFileBase"); |
||||
SUFFIX_SOLR_VERSION.put("sh", "Shell_SourceFileBase"); |
||||
SUFFIX_SOLR_VERSION.put("html", "Html_SourceFileBase"); |
||||
SUFFIX_SOLR_VERSION.put("htm", "Html_SourceFileBase"); |
||||
SUFFIX_SOLR_VERSION.put("css", "Css_SourceFileBase"); |
||||
SUFFIX_SOLR_VERSION.put("rpg", "Rpg_SourceFileBase"); |
||||
SUFFIX_SOLR_VERSION.put("xml", "Xml_SourceFileBase"); |
||||
SUFFIX_SOLR_VERSION.put("pli", "Pli_SourceFileBase");//PL/I
|
||||
// SUFFIX_SOLR_VERSION.put(".class", "mavenBinaryVersionTree");//class二进制文件
|
||||
// SUFFIX_SOLR_VERSION.put(".jar", "mavenBinaryVersionTree");//class二进制文件
|
||||
// SUFFIX_SOLR_VERSION.put(".dll", "nugetBinaryVersionTree");//dll二进制文件
|
||||
} |
||||
public final static Map<String, String> SUFFIX_BIN = new HashMap<String, String>(); |
||||
|
||||
static { |
||||
SUFFIX_BIN.put(".class", "java"); |
||||
SUFFIX_BIN.put(".dll", "c++"); |
||||
} |
||||
public final static Map<String, String> SUFFIX_BIN_SOLR = new HashMap<String, String>(); |
||||
|
||||
static { |
||||
SUFFIX_BIN_SOLR.put(".class", "mavenBinaryVersionTree"); |
||||
SUFFIX_BIN_SOLR.put(".dll", "nugetBinaryVersionTree"); |
||||
} |
||||
|
||||
public final static Map<String, String> SUFFIX_TFILE = new HashMap<String, String>(); |
||||
|
||||
static { |
||||
SUFFIX_TFILE.put(".c", "C"); |
||||
SUFFIX_TFILE.put(".cc", "C++"); |
||||
SUFFIX_TFILE.put(".cpp", "C++"); |
||||
SUFFIX_TFILE.put(".cs", "C#"); |
||||
} |
||||
public final static Map<String, Integer> CVE_LEVE = new HashMap<String, Integer>(); |
||||
|
||||
static { |
||||
CVE_LEVE.put("HIGH", 2); |
||||
CVE_LEVE.put("MEDIUM", 1); |
||||
CVE_LEVE.put("LOW", 0); |
||||
} |
||||
|
||||
|
||||
// //系统允许的最大执行分析个数
|
||||
public static int MAX_ANALYSIS_SIZE = 0; |
||||
//
|
||||
// //系统允许的最大等待个数
|
||||
public static int MAX_WAIT_SIZE = 0; |
||||
|
||||
// //正在执行的任务个数
|
||||
public static int IN_PROGRESS_NUM = 0; |
||||
|
||||
|
||||
} |
@ -0,0 +1,26 @@ |
||||
package com.keyware.composeanalysis.constant; |
||||
|
||||
|
||||
/** |
||||
* @Author liuzongren |
||||
* @Date 2024/7/24 |
||||
* @Description functon 和 analysis 组件 常用常量 |
||||
* @return |
||||
**/ |
||||
public interface FunctionAndAnalysisAssemblyConst { |
||||
|
||||
|
||||
|
||||
/** |
||||
* analysis 组件 行级特征提取 |
||||
*/ |
||||
String LINE_EXTRACT= "2"; |
||||
|
||||
|
||||
/** |
||||
* analysis 组件 行级特征提取-按每6行 滚动提取 |
||||
*/ |
||||
String LINE_EXTRACT_BY_6_LINE = "1"; |
||||
|
||||
|
||||
} |
@ -0,0 +1,69 @@ |
||||
package com.keyware.composeanalysis.constant; |
||||
|
||||
|
||||
/** |
||||
* @Author liuzongren |
||||
* @Description mongoDB 数据库 常量 |
||||
* @Date 2024/7/23 |
||||
* @Param |
||||
* @return |
||||
**/ |
||||
public interface MongoDBConst { |
||||
|
||||
|
||||
/** |
||||
* mongodb KEYSWAN 数据库名称 |
||||
*/ |
||||
String DB_NAME_KEYSWAN = "KEYSWAN"; |
||||
|
||||
|
||||
|
||||
/** |
||||
* mongodb 每个任务数据库前缀 |
||||
*/ |
||||
String DB_NAME_PREFIX = "keyswan_task_"; |
||||
|
||||
|
||||
/** |
||||
* mongodb 的 VERSIONBASEDATA 数据表 存储开源项目的MD5等信息 |
||||
*/ |
||||
String TABLE_NAME_VERSIONBASEDATA = "VERSIONBASEDATA"; |
||||
|
||||
//非32种语言的文件库
|
||||
String TABLE_NAME_SOURCE_FILE_BASE = "Other_SourceFileBase"; |
||||
|
||||
//版本树库
|
||||
String VERSION_TREE = "versionTree"; |
||||
|
||||
/** |
||||
* file_data 库 用于存储文件分析的相关信息 |
||||
*/ |
||||
String TABLE_NAME_FILE_DATA = "file_data"; |
||||
|
||||
/** |
||||
* match_open_file 库 用于存储 匹配到的开源项目的信息 |
||||
*/ |
||||
String TABLE_NAME_MATCH_OPEN_FILE = "match_open_file"; |
||||
|
||||
//mongodb line_data数据库
|
||||
String DB_TABLE_NAME_LINE_DATA = "line_data"; |
||||
|
||||
|
||||
// file_data 库 isAnalyze 分析状态字段
|
||||
String ANALYSIS_STATUS = "isAnalyze"; |
||||
|
||||
|
||||
// file_data 库 isParent 是否是目录 标识
|
||||
String IS_DIR = "isParent"; |
||||
|
||||
|
||||
// file_data 库 assFlag 组件分析是否完成 标识
|
||||
String ASS_FLAG = "assFlag"; |
||||
|
||||
// file_data 库 isSelect 0:完成文件解压,未进行任何提取 1:进入文件级特征提取 2:进入行级特征提取 3:进入成分分析特征提取
|
||||
//match_open_file isSelect 0 初始状态 1 代表已经查询过
|
||||
//line_hay 0:进入行级特征提取
|
||||
String IS_SELECT = "isSelect"; |
||||
|
||||
|
||||
} |
@ -0,0 +1,20 @@ |
||||
package com.keyware.composeanalysis.constant; |
||||
|
||||
/** |
||||
* @author liuzongren |
||||
* @date 2024/7/31 |
||||
* @description redis key 常量池 |
||||
*/ |
||||
public interface RedisConst { |
||||
|
||||
/** |
||||
* 分析任务,全局分布式锁前缀 |
||||
*/ |
||||
String TASK_LOCK_KEY_PREFIX = "ANALYSIS_TASK_LOCK_ID_%s"; |
||||
|
||||
|
||||
/** |
||||
* 分析任务,运行状态前缀 |
||||
*/ |
||||
String TASK_RUNNING_STATUS_KEY_PREFIX = "ANALYSIS_TASK_RUNNING_STATUS_%s"; |
||||
} |
@ -0,0 +1,33 @@ |
||||
package com.keyware.composeanalysis.constant; |
||||
|
||||
|
||||
/** |
||||
* @Author liuzongren |
||||
* @Description solrDB 数据库 常量 |
||||
* @Date 2024/7/24 |
||||
* @Param |
||||
* @return |
||||
**/ |
||||
public interface SolrDBConst { |
||||
|
||||
|
||||
/** |
||||
* solr versionTree 数据库名称 , 版本树 存储 开源项目的版本信息以及各个版本的文件目录信息 |
||||
*/ |
||||
String VERSION_TREE = "versionTree"; |
||||
|
||||
|
||||
/** |
||||
* solr _SourceFileInfo 数据库后缀 , 开源项目-源代码信息 ,根据源文件MD5值的第一位进行区分 |
||||
*/ |
||||
String CORE_NAME_SUFFIX_SOURCE_FILE_INFO = "_SourceFileInfo"; |
||||
|
||||
|
||||
/** |
||||
* solr SourceFileInfoTemp , 存储文件行特征信息 |
||||
*/ |
||||
String CORE_NAME_SOURCE_FILE_INFO_TEMP = "SourceFileInfoTemp"; |
||||
|
||||
|
||||
|
||||
} |
@ -0,0 +1,48 @@ |
||||
package com.keyware.composeanalysis.constant.enums; |
||||
|
||||
|
||||
/** |
||||
* 分析等级枚举对象 |
||||
*/ |
||||
public enum AnalysisLevelEnum { |
||||
//数据库的类型 还没有进行 更改,后面修改数据库 这里需要进行变更
|
||||
|
||||
//检测特征级别:0文件,1函数 2:代码块 3:行
|
||||
|
||||
FILE_LEVEL("文件级", 0), |
||||
|
||||
FUNCTION_LEVEL("函数级", 1), |
||||
|
||||
BLOCK_LEVEL("代码块级", 2), |
||||
|
||||
LINE_LEVEL("行级", 3); |
||||
|
||||
// 状态
|
||||
private String status; |
||||
//状态码
|
||||
private Integer code; |
||||
|
||||
AnalysisLevelEnum(String status, Integer code) { |
||||
this.status = status; |
||||
this.code = code; |
||||
} |
||||
|
||||
public String getStatus() { |
||||
return status; |
||||
} |
||||
|
||||
public Integer getCode() { |
||||
return code; |
||||
} |
||||
|
||||
public static AnalysisLevelEnum getAnalysisLevelEnum(Integer code){ |
||||
for (AnalysisLevelEnum analysisLevelEnum : AnalysisLevelEnum.values()) { |
||||
if(analysisLevelEnum.getCode().equals(code)){ |
||||
return analysisLevelEnum; |
||||
} |
||||
} |
||||
return null; |
||||
} |
||||
|
||||
|
||||
} |
@ -0,0 +1,48 @@ |
||||
package com.keyware.composeanalysis.constant.enums; |
||||
|
||||
|
||||
/** |
||||
* 分析状态枚举类 |
||||
*/ |
||||
|
||||
public enum AnalysisStatusEnum { |
||||
|
||||
//0:未分析 1:正在分析 2:分析完成 3:暂停分析 4:等待 5:开始分析 6:终止分析 7:分析失败
|
||||
|
||||
UN_ANALYSIS("未分析",0), |
||||
|
||||
ANALYSISING("正在分析",1), |
||||
|
||||
ANALYSIS_DONE("分析完成",2), |
||||
|
||||
PAUSE_ANALYSIS("暂停分析",3), |
||||
|
||||
WAIT_ANALYSIS("等待分析",4), |
||||
|
||||
START_ANALYSIS("开始分析",5), |
||||
|
||||
STOP_ANALYSIS("终止分析",6), |
||||
|
||||
FAIL_ANALYSIS("分析失败",7); |
||||
|
||||
// 状态
|
||||
private String status; |
||||
//状态码
|
||||
private Integer code; |
||||
|
||||
AnalysisStatusEnum(String status, Integer code) { |
||||
this.status = status; |
||||
this.code = code; |
||||
} |
||||
|
||||
public String getStatus() { |
||||
return status; |
||||
} |
||||
|
||||
public Integer getCode() { |
||||
return code; |
||||
} |
||||
|
||||
|
||||
|
||||
} |
@ -0,0 +1,42 @@ |
||||
package com.keyware.composeanalysis.constant.enums; |
||||
|
||||
|
||||
/** |
||||
* 文件分析状态枚举类 |
||||
*/ |
||||
|
||||
public enum FileAnalysisStatusEnum { |
||||
|
||||
//旧的文件分析状态(0:未分析;1:文件级已分析完成 2:文件级 3:行级别特征提取完成 4:分析失败 5:暂停分析 6:成分分析完成)
|
||||
|
||||
UN_START_ANALYSIS("未开始分析",0), |
||||
|
||||
FILE_ANALYSIS_DONE("文件级已分析完成",1), |
||||
|
||||
FAILED_ANALYSIS("分析失败",2), |
||||
|
||||
PAUSE_ANALYSIS("暂停分析",3), |
||||
|
||||
ANALYSIS_DONE("成分分析完成",4); |
||||
|
||||
// 状态
|
||||
private String status; |
||||
//状态码
|
||||
private Integer code; |
||||
|
||||
FileAnalysisStatusEnum(String status, Integer code) { |
||||
this.status = status; |
||||
this.code = code; |
||||
} |
||||
|
||||
public String getStatus() { |
||||
return status; |
||||
} |
||||
|
||||
public Integer getCode() { |
||||
return code; |
||||
} |
||||
|
||||
|
||||
|
||||
} |
@ -0,0 +1,119 @@ |
||||
package com.keyware.composeanalysis.controller; |
||||
|
||||
import com.keyware.common.constant.RedisConst; |
||||
import com.keyware.composeanalysis.api.ComposeAnalysisApi; |
||||
import com.keyware.composeanalysis.constant.enums.AnalysisStatusEnum; |
||||
import com.keyware.composeanalysis.entity.AnalysisTask; |
||||
import com.keyware.composeanalysis.response.AnalysisResp; |
||||
import com.keyware.composeanalysis.service.AnalysisTaskService; |
||||
import com.keyware.composeanalysis.util.IpUtil; |
||||
import jakarta.annotation.Resource; |
||||
import lombok.extern.log4j.Log4j2; |
||||
import org.redisson.api.RLock; |
||||
import org.redisson.api.RedissonClient; |
||||
import org.springframework.web.bind.annotation.RestController; |
||||
|
||||
/** |
||||
* @author liuzongren |
||||
* @date 2024/7/30 |
||||
*/ |
||||
@Log4j2 |
||||
@RestController |
||||
public class ComposeAnalysisController implements ComposeAnalysisApi { |
||||
|
||||
@Resource |
||||
private AnalysisTaskService taskService; |
||||
|
||||
@Resource |
||||
private RedissonClient redissonClient; |
||||
|
||||
@Override |
||||
public AnalysisResp startComposeAnalysisTask(String taskId) { |
||||
AnalysisResp result = new AnalysisResp(); |
||||
result.setNodeIp(IpUtil.getHostIp()); |
||||
RLock lock = redissonClient.getLock(String.format(RedisConst.TASK_LOCK_KEY_PREFIX, taskId)); |
||||
try { |
||||
//执行任务前 首先获取当前任务的锁,防止多节点并发分析同一任务
|
||||
if (lock.tryLock()) { |
||||
AnalysisTask analysisTask = taskService.getById(taskId); |
||||
|
||||
//校验任务是否存在
|
||||
if (analysisTask == null) { |
||||
result.setCode(202); |
||||
result.setResponseMsg("当前任务不存在"); |
||||
return result; |
||||
} |
||||
|
||||
if (analysisTask.getAnalysisStatus().equals(AnalysisStatusEnum.ANALYSIS_DONE.getCode())) { |
||||
result.setCode(202); |
||||
result.setResponseMsg("任务已分析完成,如需重新分析,请点击重新分析"); |
||||
return result; |
||||
} |
||||
|
||||
//执行成分分析任务
|
||||
result.setCode(200); |
||||
taskService.doComposeAnalyze(analysisTask); |
||||
result.setResponseMsg("任务执行成功"); |
||||
} |
||||
} catch (Exception e) { |
||||
result.setCode(500); |
||||
result.setResponseMsg("任务执行失败"); |
||||
log.error("任务执行失败", e); |
||||
lock.unlock(); |
||||
}finally { |
||||
lock.unlock(); |
||||
} |
||||
return result; |
||||
} |
||||
|
||||
@Override |
||||
public void stopComposeAnalysisTask(String taskId) { |
||||
taskService.stopComposeAnalysisTask(taskId); |
||||
} |
||||
|
||||
@Override |
||||
public AnalysisResp restartComposeAnalysisTask(String taskId) { |
||||
AnalysisResp result = new AnalysisResp(); |
||||
result.setNodeIp(IpUtil.getHostIp()); |
||||
AnalysisTask analysisTask = taskService.getById(taskId); |
||||
|
||||
//校验任务是否存在
|
||||
if (analysisTask == null) { |
||||
result.setCode(202); |
||||
result.setResponseMsg("当前任务不存在"); |
||||
return result; |
||||
} |
||||
|
||||
Boolean isRestart = taskService.restartComposeAnalysisTask(taskId); |
||||
|
||||
if (isRestart) { |
||||
result.setCode(200); |
||||
result.setResponseMsg("任务重启成功"); |
||||
} else { |
||||
result.setCode(202); |
||||
result.setResponseMsg("任务重启失败"); |
||||
} |
||||
return result; |
||||
} |
||||
|
||||
@Override |
||||
public AnalysisResp recoveryComposeAnalysisTask(String taskId) { |
||||
AnalysisResp result = new AnalysisResp(); |
||||
result.setNodeIp(IpUtil.getHostIp()); |
||||
AnalysisTask analysisTask = taskService.getById(taskId); |
||||
|
||||
//校验任务是否存在
|
||||
if (analysisTask == null) { |
||||
result.setCode(202); |
||||
result.setResponseMsg("当前任务不存在"); |
||||
return result; |
||||
} |
||||
taskService.recoveryComposeAnalysisTask(analysisTask); |
||||
|
||||
result.setCode(200); |
||||
result.setResponseMsg("任务恢复成功"); |
||||
return result; |
||||
} |
||||
|
||||
|
||||
} |
@ -0,0 +1,157 @@ |
||||
package com.keyware.composeanalysis.entity; |
||||
|
||||
import com.baomidou.mybatisplus.annotation.TableField; |
||||
import com.baomidou.mybatisplus.annotation.TableId; |
||||
import com.baomidou.mybatisplus.annotation.TableName; |
||||
import lombok.AllArgsConstructor; |
||||
import lombok.Data; |
||||
import lombok.NoArgsConstructor; |
||||
import lombok.experimental.Accessors; |
||||
|
||||
import java.io.Serializable; |
||||
import java.util.Date; |
||||
|
||||
/** |
||||
* <p> |
||||
* 成分分析任务 实体对象 |
||||
* </p> |
||||
* |
||||
* @author liuzongren |
||||
* @since 2024-07-23 |
||||
*/ |
||||
@Data |
||||
@NoArgsConstructor |
||||
@AllArgsConstructor |
||||
@Accessors(chain = true) |
||||
@TableName("analysis_task") |
||||
public class AnalysisTask implements Serializable { |
||||
|
||||
private static final long serialVersionUID = 1L; |
||||
|
||||
/** |
||||
* 逻辑主键ID,UUID |
||||
*/ |
||||
@TableId("id") |
||||
private String id; |
||||
|
||||
/** |
||||
* 被测件的名称(上传文件的名称) |
||||
*/ |
||||
@TableField("file_name") |
||||
private String fileName; |
||||
|
||||
/** |
||||
* 被测件版本 |
||||
*/ |
||||
@TableField("version") |
||||
private String version; |
||||
|
||||
|
||||
/** |
||||
* 成分分析等级 |
||||
* 检测特征级别:0文件,1函数 2:代码块 3:行 |
||||
*/ |
||||
@TableField("analysis_level") |
||||
private Integer analysisLevel; |
||||
|
||||
/** |
||||
* 开源比例阈值,当开源比例超过此阈值,则判断当前文件开源 |
||||
*/ |
||||
@TableField("open_rate_threshold") |
||||
private Integer openRateThreshold; |
||||
|
||||
/** |
||||
* 当前被测件是否是开源的 |
||||
*/ |
||||
@TableField("open_type") |
||||
private Boolean openType; |
||||
|
||||
/** |
||||
* 被测件的md5值 |
||||
*/ |
||||
@TableField("md5") |
||||
private String md5; |
||||
|
||||
|
||||
/** |
||||
* 被测件的文件总数 |
||||
*/ |
||||
@TableField("file_count") |
||||
private Integer fileCount; |
||||
|
||||
|
||||
/** |
||||
* 成分分析的状态 |
||||
*/ |
||||
@TableField("analysis_status") |
||||
private Integer analysisStatus; |
||||
|
||||
/** |
||||
* 分析开始时间 |
||||
*/ |
||||
@TableField("analysis_start_time") |
||||
private Date analysisStartTime; |
||||
|
||||
/** |
||||
* 分析结束时间 |
||||
*/ |
||||
@TableField("analysis_end_time") |
||||
private Date analysisEndTime; |
||||
|
||||
/** |
||||
* 成分分析是否完成(0:未完成 1:分析中 2:已完成) |
||||
*/ |
||||
@TableField("compose_flag") |
||||
private Integer composeFlag; |
||||
|
||||
/** |
||||
* 组件分析是否完成(0:未完成 1:分析中 2:已完成) |
||||
*/ |
||||
@TableField("assembly_flag") |
||||
private Integer assemblyFlag; |
||||
|
||||
/** |
||||
* 漏洞分析是否完成(0:未完成 1:分析中 2:已完成) |
||||
*/ |
||||
@TableField("hold_flag") |
||||
private Integer holdFlag; |
||||
|
||||
/** |
||||
* 许可证分析是否完成(0:未完成 1:分析中 2:已完成) |
||||
*/ |
||||
@TableField("licence_flag") |
||||
private Integer licenceFlag; |
||||
|
||||
/** |
||||
* 文件解压缩是否完成(false:未解压 2:true) |
||||
*/ |
||||
@TableField("decompression_flag") |
||||
private Boolean decompressionFlag = false; |
||||
|
||||
/** |
||||
* 任务创建时间 |
||||
*/ |
||||
@TableField("create_time") |
||||
private Date createTime; |
||||
|
||||
/** |
||||
* 任务创建ID |
||||
*/ |
||||
@TableField("create_user_id") |
||||
private String createUserId; |
||||
|
||||
/** |
||||
* 分析总耗时 |
||||
* 格式为:时-分-秒 |
||||
*/ |
||||
@TableField(exist = false) |
||||
private String analysisUsedTime; |
||||
|
||||
/** |
||||
* 分析进度 |
||||
* 100% |
||||
*/ |
||||
@TableField(exist = false) |
||||
private String analysisProgress; |
||||
|
||||
} |
@ -0,0 +1,18 @@ |
||||
package com.keyware.composeanalysis.mapper; |
||||
|
||||
import com.baomidou.mybatisplus.core.mapper.BaseMapper; |
||||
import com.keyware.composeanalysis.entity.AnalysisTask; |
||||
import org.apache.ibatis.annotations.Mapper; |
||||
import org.apache.ibatis.annotations.Param; |
||||
|
||||
import java.io.Serializable; |
||||
import java.util.List; |
||||
|
||||
/** |
||||
* @author liuzongren |
||||
* @since 2024-07-23 |
||||
*/ |
||||
@Mapper |
||||
public interface AnalyzeTaskMapper extends BaseMapper<AnalysisTask> { |
||||
|
||||
} |
@ -0,0 +1,37 @@ |
||||
package com.keyware.composeanalysis.mongo; |
||||
|
||||
import cn.hutool.core.date.DateTime; |
||||
import lombok.Data; |
||||
import lombok.experimental.Accessors; |
||||
import org.springframework.data.annotation.Id; |
||||
import org.springframework.data.mongodb.core.mapping.Document; |
||||
|
||||
import java.io.Serializable; |
||||
|
||||
/** |
||||
* @author liuzongren |
||||
* @ClassName AnalysisLogMongoDto |
||||
* @description: 分析过程日志记录 |
||||
* @datetime 2024年 07月 23日 18:05 |
||||
* @version: 1.0 |
||||
*/ |
||||
@Data |
||||
@Accessors(chain =true) |
||||
@Document(collection = "analysis_log") |
||||
public class AnalysisLogMongoDto implements Serializable { |
||||
|
||||
|
||||
@Id |
||||
private String id; |
||||
|
||||
/** |
||||
* 日志内容 |
||||
*/ |
||||
private String logInfo; |
||||
|
||||
/** |
||||
* 创建时间 |
||||
*/ |
||||
private DateTime createTime; |
||||
|
||||
} |
@ -0,0 +1,96 @@ |
||||
package com.keyware.composeanalysis.mongo; |
||||
|
||||
import lombok.Data; |
||||
import org.springframework.data.annotation.Id; |
||||
import org.springframework.data.mongodb.core.mapping.Document; |
||||
|
||||
import java.util.ArrayList; |
||||
import java.util.List; |
||||
|
||||
|
||||
/** |
||||
* AssemblyMongoDto 类,用于封装 MongoDB 中的装配数据文档。 |
||||
* 此类映射到名为 "assembly_data" 的 MongoDB 集合。 |
||||
* |
||||
* @author liuzongren |
||||
* @date 2024/7/9 |
||||
*/ |
||||
@Document(collection = "assembly_data") |
||||
@Data |
||||
public class AssemblyMongoDto { |
||||
|
||||
/** |
||||
* MongoDB 文档的唯一标识符。 |
||||
*/ |
||||
@Id |
||||
private String id; |
||||
|
||||
/** |
||||
* 项目引用文件的名称。 |
||||
*/ |
||||
private String fileName; |
||||
|
||||
/** |
||||
* 匹配到的开源组件名称。 |
||||
*/ |
||||
private String assemblyName; |
||||
|
||||
/** |
||||
* 匹配到的开源组件版本号。 |
||||
*/ |
||||
private String assemblyVersion; |
||||
|
||||
/** |
||||
* 组件的来源 URL。 |
||||
*/ |
||||
private String url; |
||||
|
||||
/** |
||||
* 开源率,描述组件开源程度的指标。 |
||||
*/ |
||||
private String semblance; |
||||
|
||||
/** |
||||
* 许可证列表,描述组件的许可协议。 |
||||
*/ |
||||
private List<String> license; |
||||
|
||||
/** |
||||
* 任务 ID,与分析任务关联。 |
||||
*/ |
||||
private String testFileId; |
||||
|
||||
/** |
||||
* 文件 ID,与特定文件关联。 |
||||
*/ |
||||
private String fileId; |
||||
|
||||
/** |
||||
* 父节点编号,用于构建树形结构。 |
||||
*/ |
||||
private int parentNode; |
||||
|
||||
/** |
||||
* 节点所在的层次。 |
||||
*/ |
||||
private int layer; |
||||
|
||||
/** |
||||
* 当前节点的编号。 |
||||
*/ |
||||
private int node; |
||||
|
||||
/** |
||||
* 子节点集合,用于构建树形结构。 |
||||
*/ |
||||
private List<AssemblyMongoDto> children = new ArrayList<>(); |
||||
|
||||
/** |
||||
* 添加子节点的方法。 |
||||
* |
||||
* @param child 要添加的子节点 |
||||
*/ |
||||
public void addChild(AssemblyMongoDto child) { |
||||
children.add(child); |
||||
} |
||||
} |
@ -0,0 +1,106 @@ |
||||
package com.keyware.composeanalysis.mongo; |
||||
|
||||
import lombok.Data; |
||||
import lombok.experimental.Accessors; |
||||
import org.springframework.data.annotation.Id; |
||||
import org.springframework.data.mongodb.core.mapping.Document; |
||||
|
||||
import java.io.Serializable; |
||||
import java.util.Date; |
||||
|
||||
@Document(collection = "file_data") |
||||
@Data |
||||
@Accessors(chain =true) |
||||
public class FileDataMongoDto implements Serializable { |
||||
|
||||
@Id |
||||
private String id; |
||||
|
||||
//文件名称
|
||||
private String name; |
||||
|
||||
//文件的MD5
|
||||
private String md5; |
||||
|
||||
/** |
||||
* 场景1: 文件级别匹配 :成功后 赋值 |
||||
*/ |
||||
//文件特征MD5
|
||||
@Deprecated |
||||
private String cutMd5; |
||||
|
||||
//文件大小, 单位是字节
|
||||
private Integer fileSize; |
||||
|
||||
//代码行数
|
||||
private Integer codeRowNum; |
||||
|
||||
//函数个数,目测无用,后期测试删除
|
||||
@Deprecated |
||||
private Integer funCount; |
||||
|
||||
//父级目录ID
|
||||
private String pId; |
||||
|
||||
//直接获取的文件hash,暂时无用
|
||||
@Deprecated |
||||
private String sourceMd5; |
||||
|
||||
//语言
|
||||
@Deprecated |
||||
private String lang; |
||||
|
||||
//标识军用代码(1:军用代码;2:民用代码)
|
||||
@Deprecated |
||||
private String militaryType; |
||||
|
||||
//是否是目录(0:false; 1:true)
|
||||
private Boolean isDirectory; |
||||
|
||||
//文件分析状态(0:未分析;1:文件级已分析同时匹配到数据 2:文件级未匹配到需要行级别提取 3:行级别提取完成 4:分析失败 5:暂停分析 6:成分分析完成)
|
||||
private Integer fileAnalysisStatus; |
||||
|
||||
//关联的文件ID
|
||||
@Deprecated |
||||
private String testFileId; |
||||
|
||||
//创建时间
|
||||
private Date createTime; |
||||
|
||||
//文件地址
|
||||
private String fileUrl; |
||||
|
||||
//文件后缀
|
||||
private String suffix; |
||||
|
||||
//组件分析是否完成(0:否 1:是)
|
||||
private String assFlag; |
||||
|
||||
//许可证分析是否完成(0:否 1:是)
|
||||
private String licenceFlag; |
||||
|
||||
//漏洞分析是否完成(0:否 1:是)
|
||||
private String holdFlag; |
||||
|
||||
//文件开源率
|
||||
private Float openRate = 0f; |
||||
|
||||
/** |
||||
* 场景1: 文件级别匹配 : 使用 文件特征MD5 或者 源文件md5匹配中 文件匹配成功后,直接设置为文件的总行数 |
||||
*/ |
||||
//文件开源代码行数
|
||||
private Integer openLineCount = 0; |
||||
|
||||
//文件开源类型 (false:自研 true:开源)
|
||||
private Boolean openType = false; |
||||
|
||||
//当前文件是否在分析中
|
||||
//充当文件分析状态的第二个字段,0:刚刚解压完成
|
||||
/** |
||||
* 场景1:刚刚解压完成 赋值为 0 |
||||
* 场景2: 文件级分析完,但是没有分析到结果,还需要进行下一级的分析,赋值为 1 |
||||
*/ |
||||
@Deprecated |
||||
private String isSelect; |
||||
|
||||
} |
@ -0,0 +1,56 @@ |
||||
package com.keyware.composeanalysis.mongo; |
||||
|
||||
import com.keyware.keyswan.common.LineModel; |
||||
import lombok.Data; |
||||
import lombok.experimental.Accessors; |
||||
import org.springframework.data.annotation.Id; |
||||
import org.springframework.data.mongodb.core.mapping.Document; |
||||
|
||||
import java.util.List; |
||||
|
||||
/** |
||||
* @author liuzongren |
||||
* @date 2024/7/25 |
||||
*/ |
||||
@Document(collection = "line_data") |
||||
@Data |
||||
@Accessors(chain =true) |
||||
public class LineDataMongoDto { |
||||
|
||||
/** |
||||
* MongoDB 文档的唯一标识符。 |
||||
*/ |
||||
@Id |
||||
private String id; |
||||
|
||||
/** |
||||
* 文件ID |
||||
*/ |
||||
private String fileId; |
||||
|
||||
/** |
||||
* 状态 |
||||
* 场景1: 新建的情况下赋值为 0 |
||||
*/ |
||||
@Deprecated |
||||
private Integer status; |
||||
|
||||
/** |
||||
* 选中状态 ? |
||||
* 场景1: 新建的情况下赋值为false |
||||
*/ |
||||
@Deprecated |
||||
private Boolean isSelect; |
||||
|
||||
/** |
||||
* 当前文件,行的特征MD5值 |
||||
*/ |
||||
private List<String> lineFeatueMd5s; |
||||
|
||||
|
||||
/** |
||||
* 当前文件,代码块的MD5值 |
||||
*/ |
||||
private List<LineModel> lineModels; |
||||
|
||||
} |
@ -0,0 +1,62 @@ |
||||
package com.keyware.composeanalysis.mongo; |
||||
|
||||
import lombok.Data; |
||||
import lombok.experimental.Accessors; |
||||
import org.springframework.data.annotation.Id; |
||||
|
||||
|
||||
import java.io.Serializable; |
||||
import java.util.List; |
||||
|
||||
/** |
||||
* @author liuzongren |
||||
* @ClassName MatchOpenFile |
||||
* @description: 匹配的开源文件信息 |
||||
* @datetime 2024年 07月 23日 18:05 |
||||
* @version: 1.0 |
||||
*/ |
||||
@Data |
||||
@Accessors(chain =true) |
||||
public class MatchOpenFile implements Serializable { |
||||
|
||||
//ID
|
||||
@Id |
||||
private String id; |
||||
|
||||
//开源项目版本名称
|
||||
private String version; |
||||
|
||||
//组件版本id
|
||||
private String versionId; |
||||
|
||||
//开源项目名称
|
||||
private String pName; |
||||
|
||||
//开源项目id
|
||||
private String pId; |
||||
|
||||
//与被测文件的特征相似度
|
||||
private Float featureSimilarity; |
||||
|
||||
//开源地址
|
||||
private String sourceUrl; |
||||
|
||||
//开源文件的详细路径
|
||||
private String sourceFilePath; |
||||
|
||||
//开源许可协议类型
|
||||
private List<String> licenseType; |
||||
|
||||
//长度
|
||||
private Integer fileSize; |
||||
|
||||
//文件MD5值
|
||||
private String md5; |
||||
|
||||
//分析类型 (0文件,1函数 2:代码块 3:行)
|
||||
private int analyzeType; |
||||
|
||||
//文件开源率
|
||||
private Float openRate; |
||||
|
||||
} |
@ -0,0 +1,58 @@ |
||||
package com.keyware.composeanalysis.mongo; |
||||
|
||||
import lombok.Data; |
||||
import lombok.experimental.Accessors; |
||||
import org.springframework.data.annotation.Id; |
||||
import org.springframework.data.mongodb.core.mapping.Document; |
||||
|
||||
import java.io.Serializable; |
||||
import java.util.List; |
||||
|
||||
/** |
||||
* @author liuzongren |
||||
* @ClassName MatchOpenFile |
||||
* @description: 文件开源率匹配信息 (成分分析表) |
||||
* @datetime 2024年 07月 23日 18:05 |
||||
* @version: 1.0 |
||||
*/ |
||||
@Data |
||||
@Document(collection = "match_open_file") |
||||
@Accessors(chain =true) |
||||
public class MatchOpenFileMongoDto implements Serializable { |
||||
|
||||
@Id |
||||
private String id; |
||||
|
||||
/** |
||||
* 文件名称 |
||||
*/ |
||||
private String fileName; |
||||
|
||||
/** |
||||
* 文件路径 |
||||
*/ |
||||
@Deprecated |
||||
private String filePath; |
||||
|
||||
/** |
||||
* 当前 文件 是否 开源 false:不开源 true:开源 |
||||
*/ |
||||
private Boolean openType; |
||||
|
||||
|
||||
/** |
||||
* 当前文件的开源率 |
||||
*/ |
||||
private float openRate; |
||||
|
||||
/** |
||||
* 特征相似度 |
||||
*/ |
||||
private Float featureSimilarity; |
||||
|
||||
/** |
||||
* 匹配的开源文件信息 |
||||
*/ |
||||
List<MatchOpenFile> matchOpenFile; |
||||
|
||||
} |
@ -0,0 +1,50 @@ |
||||
package com.keyware.composeanalysis.mongo; |
||||
|
||||
import lombok.Data; |
||||
import lombok.experimental.Accessors; |
||||
import org.springframework.data.annotation.Id; |
||||
import org.springframework.data.mongodb.core.mapping.Document; |
||||
|
||||
import java.io.Serializable; |
||||
import java.util.List; |
||||
|
||||
/** |
||||
* @author liuzongren |
||||
* @ClassName MatchOpenProjectMongoDto |
||||
* @description: 匹配的开源项目信息 |
||||
* @datetime 2024年 07月 26日 18:05 |
||||
* @version: 1.0 |
||||
*/ |
||||
@Data |
||||
@Document(collection = "match_open_project") |
||||
@Accessors(chain =true) |
||||
public class MatchOpenProjectMongoDto implements Serializable { |
||||
|
||||
@Id |
||||
private String id; |
||||
|
||||
//开源项目版本
|
||||
private String version; |
||||
|
||||
//开源项目id
|
||||
private String projectId; |
||||
|
||||
//开源项目名称
|
||||
private String projectName; |
||||
|
||||
//开源项目文件数量
|
||||
private Integer projectFileNum; |
||||
|
||||
//匹配到的开源项目文件数量
|
||||
private Integer matchFileNum; |
||||
|
||||
//开源地址
|
||||
private String sourceUrl; |
||||
|
||||
//匹配到的开源项目文件md5值集合
|
||||
private List<String> matchFilesMd5; |
||||
|
||||
//与开源项目相似度 matchFileNum / projectFileNum
|
||||
private Double similarity; |
||||
|
||||
} |
@ -0,0 +1,75 @@ |
||||
package com.keyware.composeanalysis.mongo; |
||||
|
||||
import lombok.Data; |
||||
import lombok.experimental.Accessors; |
||||
import org.springframework.data.annotation.Id; |
||||
import org.springframework.data.mongodb.core.mapping.Document; |
||||
|
||||
import java.util.List; |
||||
|
||||
|
||||
/** |
||||
* 此类映射到名为 "project_assembly" 的 MongoDB 集合。 |
||||
* <p> |
||||
* 当前项目匹配到的开源项目的信息 |
||||
* |
||||
* @author liuzongren |
||||
* @date 2024/7/9 |
||||
*/ |
||||
@Document(collection = "project_assembly") |
||||
@Data |
||||
@Accessors(chain = true) |
||||
public class ProjectAssemblyMongoDto { |
||||
|
||||
/** |
||||
* MongoDB 文档的唯一标识符。 |
||||
*/ |
||||
@Id |
||||
private String id; |
||||
|
||||
/** |
||||
* 项目的文件数量 |
||||
*/ |
||||
private Integer fileCount; |
||||
|
||||
/** |
||||
* 匹配到的开源文件的数量 |
||||
*/ |
||||
private Integer matchFileCount; |
||||
|
||||
/** |
||||
* 匹配到的开源项目版本Id |
||||
*/ |
||||
private String versionId; |
||||
|
||||
/** |
||||
* 匹配到的开源项目版本名称 |
||||
*/ |
||||
private String versionName; |
||||
|
||||
/** |
||||
* 匹配到的开源项目在开源网站的项目序号 |
||||
*/ |
||||
private String projectId; |
||||
|
||||
/** |
||||
* 匹配到的开源项目的名称。 |
||||
*/ |
||||
private String projectName; |
||||
|
||||
/** |
||||
* 开源项目的地址 |
||||
*/ |
||||
private String openSourceUrl; |
||||
|
||||
/** |
||||
* 被测件和当前项目的整体相似度 |
||||
*/ |
||||
private Double semblance; |
||||
|
||||
/** |
||||
* 当前开源项目的开源协议 |
||||
*/ |
||||
private List<String> licenseType; |
||||
|
||||
} |
@ -0,0 +1,75 @@ |
||||
package com.keyware.composeanalysis.mongo; |
||||
|
||||
import lombok.Data; |
||||
import org.springframework.data.annotation.Id; |
||||
import org.springframework.data.mongodb.core.mapping.Document; |
||||
import org.springframework.data.mongodb.core.mapping.Field; |
||||
|
||||
import java.io.Serializable; |
||||
|
||||
/** |
||||
* <p> |
||||
* 项目基本信息 |
||||
* </p> |
||||
* |
||||
* @author liuzongren |
||||
* @since 2024-07-23 |
||||
*/ |
||||
@Data |
||||
@Document(collection = "PROJECTBASEDATA") |
||||
public class ProjectBaseDataMongoDto implements Serializable { |
||||
|
||||
private static final long serialVersionUID = 1L; |
||||
|
||||
@Id |
||||
private String id; |
||||
|
||||
/** |
||||
* 项目id |
||||
*/ |
||||
@Field("ID") |
||||
private String projectId; |
||||
|
||||
/** |
||||
* 项目名称 |
||||
*/ |
||||
@Field("NAME") |
||||
private String name; |
||||
|
||||
/** |
||||
* 项目类型 |
||||
*/ |
||||
@Field("TYPE") |
||||
private String type; |
||||
|
||||
/** |
||||
* 当前开源项目被stars 的次数 |
||||
*/ |
||||
@Field("STARS") |
||||
private String stars; |
||||
|
||||
/** |
||||
* 项目描述 |
||||
*/ |
||||
@Field("DESCRIBE") |
||||
private String describe; |
||||
|
||||
/** |
||||
* 项目URL |
||||
*/ |
||||
@Field("URL") |
||||
private String url; |
||||
|
||||
/** |
||||
* 许可类型 |
||||
*/ |
||||
@Field("LICENSETYPE") |
||||
private String licenseType; |
||||
|
||||
/** |
||||
* 创建时间 |
||||
*/ |
||||
@Field("CREATE_TIME") |
||||
private String createTime; |
||||
|
||||
} |
@ -0,0 +1,75 @@ |
||||
package com.keyware.composeanalysis.mongo; |
||||
|
||||
import lombok.Data; |
||||
import lombok.EqualsAndHashCode; |
||||
import org.springframework.data.annotation.Id; |
||||
import org.springframework.data.mongodb.core.mapping.Document; |
||||
import org.springframework.data.mongodb.core.mapping.Field; |
||||
|
||||
import java.io.Serializable; |
||||
|
||||
/** |
||||
* <p> |
||||
* 項目的版本信息 |
||||
* </p> |
||||
* |
||||
* @author liuzongren |
||||
* @since 2024-07-23 |
||||
*/ |
||||
@Data |
||||
@EqualsAndHashCode(callSuper = false) |
||||
@Document(collection = "VERSIONBASEDATA") |
||||
public class VersionbasedataMongoDto implements Serializable { |
||||
|
||||
private static final long serialVersionUID = 1L; |
||||
|
||||
/** |
||||
* 主键 |
||||
*/ |
||||
@Id |
||||
private String id; |
||||
|
||||
/** |
||||
* 版本ID |
||||
*/ |
||||
@Field("ID") |
||||
private String versionId; |
||||
|
||||
/** |
||||
* pid 项目ID |
||||
*/ |
||||
@Field("PID") |
||||
private String projectId; |
||||
|
||||
/** |
||||
* 版本名称 |
||||
*/ |
||||
@Field("NAME") |
||||
private String versionName; |
||||
|
||||
/** |
||||
* 版本下载地址 |
||||
*/ |
||||
@Field("DOWNURL") |
||||
private String downloadUrl; |
||||
|
||||
/** |
||||
* 項目的相对路径 |
||||
*/ |
||||
@Field("PATH") |
||||
private String path; |
||||
|
||||
/** |
||||
* 创建时间 |
||||
*/ |
||||
@Field("CREATE_TIME") |
||||
private String createTime; |
||||
|
||||
/** |
||||
* 版本描述 |
||||
*/ |
||||
@Field("DESCRIBE") |
||||
private String description; |
||||
|
||||
|
||||
} |
@ -0,0 +1,68 @@ |
||||
//package com.keyware.composeanalysis.schedule;
|
||||
//
|
||||
//import cn.hutool.core.date.DateUnit;
|
||||
//import cn.hutool.core.date.DateUtil;
|
||||
//import com.baomidou.mybatisplus.core.conditions.query.LambdaQueryWrapper;
|
||||
//import com.keyware.common.constant.enums.AnalysisStatusEnum;
|
||||
//import com.keyware.composeanalysis.constant.MongoDBConst;
|
||||
//import com.keyware.composeanalysis.constant.enums.FileAnalysisStatusEnum;
|
||||
//import com.keyware.composeanalysis.entity.AnalysisTask;
|
||||
//import com.keyware.composeanalysis.mongo.FileDataMongoDto;
|
||||
//import com.keyware.composeanalysis.service.impl.AnalysisTaskServiceImpl;
|
||||
//import com.keyware.composeanalysis.util.AnalysisLogUtil;
|
||||
//import com.mongodb.client.MongoClient;
|
||||
//import jakarta.annotation.Resource;
|
||||
//import lombok.extern.log4j.Log4j2;
|
||||
//import org.springframework.context.annotation.Configuration;
|
||||
//import org.springframework.data.mongodb.core.MongoTemplate;
|
||||
//import org.springframework.data.mongodb.core.query.Query;
|
||||
//import org.springframework.scheduling.annotation.EnableScheduling;
|
||||
//import org.springframework.scheduling.annotation.Scheduled;
|
||||
//
|
||||
//import java.util.List;
|
||||
//
|
||||
//import static org.springframework.data.mongodb.core.query.Criteria.where;
|
||||
//
|
||||
///**
|
||||
// * 定时检测 分析任务是否完成
|
||||
// */
|
||||
//@Log4j2
|
||||
//@EnableScheduling
|
||||
//@Configuration
|
||||
//public class AnalysisStatusSchedule {
|
||||
//
|
||||
// @Resource
|
||||
// private AnalysisTaskServiceImpl taskService;
|
||||
//
|
||||
// @Resource
|
||||
// private MongoClient mongoClient;
|
||||
//
|
||||
// /**
|
||||
// * 定时查询任务库 ,看是否存在已经分析完成的任务,如果存在 变更任务的状态
|
||||
// */
|
||||
// @Scheduled(cron = "*/1 * * * * ?") // 每五秒钟执行一次
|
||||
// public void startTask() {
|
||||
// //查询正在进行成分分析的任务
|
||||
// LambdaQueryWrapper<AnalysisTask> taskQueryWrapper = new LambdaQueryWrapper<>();
|
||||
// taskQueryWrapper.eq(AnalysisTask::getAnalysisStatus, AnalysisStatusEnum.ANALYSISING.getCode());
|
||||
// taskQueryWrapper.eq(AnalysisTask::getDecompressionFlag,true);
|
||||
// taskQueryWrapper.eq(AnalysisTask::getComposeFlag,AnalysisStatusEnum.ANALYSISING.getCode());
|
||||
// List<AnalysisTask> composeAnalysisTasks = taskService.list(taskQueryWrapper);
|
||||
//
|
||||
// //循环遍历任务状态
|
||||
// for (AnalysisTask composeTask : composeAnalysisTasks) {
|
||||
// MongoTemplate mongoTemplate = new MongoTemplate(mongoClient, MongoDBConst.DB_NAME_PREFIX+composeTask.getId());
|
||||
//
|
||||
// Query fileQuery = new Query(where("isDirectory").is(false)
|
||||
// .and("fileAnalysisStatus").in(FileAnalysisStatusEnum.FAILED_ANALYSIS.getCode(),FileAnalysisStatusEnum.ANALYSIS_DONE.getCode()));
|
||||
// Long finishedAnalysisFileCount = mongoTemplate.count(fileQuery, FileDataMongoDto.class);
|
||||
//
|
||||
// //所有文件分析完毕,将成分分析的状态 更改为已完成
|
||||
// if (finishedAnalysisFileCount.intValue() == composeTask.getFileCount()){
|
||||
// composeTask.setComposeFlag(AnalysisStatusEnum.ANALYSIS_DONE.getCode());
|
||||
// taskService.updateById(composeTask);
|
||||
// AnalysisLogUtil.insert(mongoTemplate,"成分分析已完成,耗时:"+ DateUtil.between(composeTask.getCreateTime(),DateUtil.date(), DateUnit.SECOND) +"秒");
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
//}
|
@ -0,0 +1,56 @@ |
||||
package com.keyware.composeanalysis.service; |
||||
|
||||
import com.baomidou.mybatisplus.extension.service.IService; |
||||
import com.keyware.composeanalysis.entity.AnalysisTask; |
||||
import com.keyware.composeanalysis.response.AnalysisResp; |
||||
import org.springframework.web.bind.annotation.GetMapping; |
||||
import org.springframework.web.bind.annotation.PathVariable; |
||||
import org.springframework.web.bind.annotation.PostMapping; |
||||
|
||||
/** |
||||
* <p> |
||||
* 服务类 |
||||
* </p> |
||||
* |
||||
* @author liuzongren |
||||
* @since 2024-07-23 |
||||
*/ |
||||
public interface AnalysisTaskService extends IService<AnalysisTask> { |
||||
|
||||
/** |
||||
* 执行成分分析 |
||||
* |
||||
* @param analysisTask |
||||
*/ |
||||
void doComposeAnalyze(AnalysisTask analysisTask) throws InterruptedException; |
||||
|
||||
/** |
||||
* 停止或暂停分析任务 |
||||
* |
||||
* @param taskId 任务id |
||||
* @return AnalysisResp 成分分析任务响应 |
||||
* author liuzongren |
||||
*/ |
||||
void stopComposeAnalysisTask(String taskId); |
||||
|
||||
/** |
||||
* 重新分析任务, |
||||
* |
||||
* @param taskId 任务id |
||||
* @return AnalysisResp 成分分析任务响应 |
||||
* author liuzongren |
||||
*/ |
||||
Boolean restartComposeAnalysisTask(String taskId); |
||||
|
||||
|
||||
/** |
||||
* 恢复分析任务 |
||||
* |
||||
* @param analysisTask 任务 |
||||
* @return AnalysisResp 成分分析任务响应 |
||||
* author liuzongren |
||||
*/ |
||||
void recoveryComposeAnalysisTask(AnalysisTask analysisTask); |
||||
|
||||
|
||||
} |
@ -0,0 +1,215 @@ |
||||
package com.keyware.composeanalysis.service.impl; |
||||
|
||||
import cn.hutool.core.date.DateUnit; |
||||
import cn.hutool.core.date.DateUtil; |
||||
import com.baomidou.mybatisplus.extension.service.impl.ServiceImpl; |
||||
import com.keyware.common.constant.RedisConst; |
||||
import com.keyware.common.constant.enums.AnalysisStatusEnum; |
||||
import com.keyware.composeanalysis.constant.MongoDBConst; |
||||
import com.keyware.composeanalysis.constant.enums.FileAnalysisStatusEnum; |
||||
import com.keyware.composeanalysis.entity.AnalysisTask; |
||||
import com.keyware.composeanalysis.mapper.AnalyzeTaskMapper; |
||||
import com.keyware.composeanalysis.mongo.FileDataMongoDto; |
||||
import com.keyware.composeanalysis.mongo.MatchOpenFileMongoDto; |
||||
import com.keyware.composeanalysis.mongo.ProjectAssemblyMongoDto; |
||||
import com.keyware.composeanalysis.service.AnalysisTaskService; |
||||
import com.keyware.composeanalysis.task.*; |
||||
import com.keyware.composeanalysis.util.AnalysisLogUtil; |
||||
import com.keyware.composeanalysis.util.RedisUtil; |
||||
import com.keyware.composeanalysis.util.SolrUtils; |
||||
import com.mongodb.client.MongoClient; |
||||
import jakarta.annotation.Resource; |
||||
import lombok.extern.log4j.Log4j2; |
||||
import org.apache.commons.collections.CollectionUtils; |
||||
import org.springframework.core.task.TaskExecutor; |
||||
import org.springframework.data.mongodb.core.MongoTemplate; |
||||
import org.springframework.data.mongodb.core.query.Query; |
||||
import org.springframework.data.mongodb.core.query.Update; |
||||
import org.springframework.scheduling.annotation.Async; |
||||
import org.springframework.stereotype.Service; |
||||
|
||||
import java.math.BigDecimal; |
||||
import java.math.RoundingMode; |
||||
import java.util.List; |
||||
import java.util.concurrent.CountDownLatch; |
||||
|
||||
import static org.springframework.data.mongodb.core.query.Criteria.where; |
||||
|
||||
|
||||
/** |
||||
* <p> |
||||
* 成分分析服务实现类 |
||||
* </p> |
||||
* |
||||
* @author liuzongren |
||||
* @since 2024-07-23 |
||||
*/ |
||||
@Log4j2 |
||||
@Service |
||||
public class AnalysisTaskServiceImpl extends ServiceImpl<AnalyzeTaskMapper, AnalysisTask> implements AnalysisTaskService { |
||||
|
||||
@Resource |
||||
private MongoClient mongoClient; |
||||
|
||||
@Resource |
||||
private SolrUtils solrUtils; |
||||
|
||||
@Resource |
||||
private TaskExecutor taskExecutor; |
||||
|
||||
@Resource |
||||
private RedisUtil redisUtil; |
||||
|
||||
@Override |
||||
@Async |
||||
public void doComposeAnalyze(AnalysisTask analysisTask) throws InterruptedException { |
||||
long startTime = System.currentTimeMillis(); |
||||
log.info("开始成份分析,taskName:{}",analysisTask.getFileName()); |
||||
//校验文件压缩是否完成
|
||||
retryGetDecompressionFlag(analysisTask); |
||||
|
||||
//开始分析前,将成分分析的状态为 进行中
|
||||
analysisTask.setComposeFlag(AnalysisStatusEnum.ANALYSISING.getCode()); |
||||
this.updateById(analysisTask); |
||||
MongoTemplate mongoTemplate = new MongoTemplate(mongoClient, MongoDBConst.DB_NAME_PREFIX + analysisTask.getId()); |
||||
AnalysisLogUtil.insert(mongoTemplate, "【成分分析】开始:" + analysisTask.getFileName()); |
||||
|
||||
//首先进行项目级别的分析,将所有文件的源MD5批量去solr库中匹配
|
||||
PorjectAnalysisTask projectAnalysisTask = new PorjectAnalysisTask(mongoClient, analysisTask, solrUtils, this); |
||||
projectAnalysisTask.doAnalysis(); |
||||
|
||||
//项目级的分析完成后,没有匹配中的文件,根据分析的级别,对每个文件进行相应级别的分析
|
||||
analysisFile(mongoTemplate,analysisTask); |
||||
|
||||
//成份分析完成后,查询所有开源文件,判断当前项目是否开源
|
||||
checkProjectIfOpen(mongoTemplate,analysisTask); |
||||
|
||||
//修改成分分析状态为完成
|
||||
analysisTask.setComposeFlag(AnalysisStatusEnum.ANALYSIS_DONE.getCode()); |
||||
this.updateById(analysisTask); |
||||
|
||||
//插入分析日志
|
||||
AnalysisLogUtil.insert(mongoTemplate,"【成分分析】已完成,耗时:"+ DateUtil.between(analysisTask.getAnalysisStartTime(),DateUtil.date(), DateUnit.SECOND) +"秒"); |
||||
log.info("成份分析完成,taskName:{},耗时:{}",analysisTask.getFileName(),(System.currentTimeMillis()-startTime)/1000 +"秒"); |
||||
} |
||||
|
||||
@Override |
||||
public void stopComposeAnalysisTask(String taskId) { |
||||
//将成分分析的任务状态的标志位置为暂停,让线程池中的排队的任务队列停止分析
|
||||
redisUtil.set(String.format(RedisConst.TASK_RUNNING_STATUS_KEY_PREFIX, taskId), AnalysisStatusEnum.ANALYSIS_PAUSED.getCode()); |
||||
} |
||||
|
||||
@Override |
||||
public Boolean restartComposeAnalysisTask(String taskId) { |
||||
boolean result = false; |
||||
try { |
||||
//删除匹配的开源项目信息
|
||||
MongoTemplate mongoTemplate = new MongoTemplate(mongoClient, MongoDBConst.DB_NAME_PREFIX + taskId); |
||||
mongoTemplate.remove(ProjectAssemblyMongoDto.class); |
||||
|
||||
//删除项目匹配的开源文件
|
||||
mongoTemplate.remove(MatchOpenFileMongoDto.class); |
||||
|
||||
//将文件分析状态设置为未开始分析
|
||||
mongoTemplate.update(FileDataMongoDto.class) |
||||
.apply(new Update().set("openType", false) |
||||
.set("fileAnalysisStatus", FileAnalysisStatusEnum.UN_START_ANALYSIS.getCode())) |
||||
.all(); |
||||
|
||||
//重新开始分析任务
|
||||
doComposeAnalyze(getById(taskId)); |
||||
result = true; |
||||
} catch (Exception e) { |
||||
log.error("重新分析失败", e); |
||||
} |
||||
return result; |
||||
} |
||||
|
||||
@Override |
||||
@Async |
||||
public void recoveryComposeAnalysisTask(AnalysisTask analysisTask) { |
||||
/** |
||||
* todo 这里存在一个逻辑缺陷 |
||||
* 项目级别的分析是无法终止的,当前任务恢复恢复的是文件级的成分分析,如果文件级的没有分析完成,这里可能会将所有文件进行文件级别的分析 |
||||
*/ |
||||
try { |
||||
//将成分分析的任务状态的标志位置改为进行中
|
||||
redisUtil.set(String.format(RedisConst.TASK_RUNNING_STATUS_KEY_PREFIX, analysisTask.getId()), AnalysisStatusEnum.ANALYSISING.getCode()); |
||||
|
||||
MongoTemplate mongoTemplate = new MongoTemplate(mongoClient, MongoDBConst.DB_NAME_PREFIX + analysisTask.getId()); |
||||
//项目级的分析完成后
|
||||
Query unAnalyzedFileQuery = new Query(where("fileAnalysisStatus").ne(FileAnalysisStatusEnum.UN_START_ANALYSIS.getCode()) |
||||
.and("isDirectory").is(false)); |
||||
List<FileDataMongoDto> unAnalyzedFiles = mongoTemplate.find(unAnalyzedFileQuery, FileDataMongoDto.class); |
||||
|
||||
if (CollectionUtils.isNotEmpty(unAnalyzedFiles)){ |
||||
//使用线程池 并行的分析文件
|
||||
CountDownLatch countDownLatch = new CountDownLatch(unAnalyzedFiles.size()); |
||||
unAnalyzedFiles.parallelStream().forEach(fileDataMongoDto -> { |
||||
IAnalysisTask task = AnalysisTaskFactory.createAnalysisTask(analysisTask, fileDataMongoDto, mongoTemplate, countDownLatch); |
||||
taskExecutor.execute(task); |
||||
}); |
||||
countDownLatch.await(); |
||||
//修改成分分析状态为完成
|
||||
analysisTask.setComposeFlag(AnalysisStatusEnum.ANALYSIS_DONE.getCode()); |
||||
this.updateById(analysisTask); |
||||
AnalysisLogUtil.insert(mongoTemplate,"成分分析已完成,耗时:"+ DateUtil.between(analysisTask.getCreateTime(),DateUtil.date(), DateUnit.SECOND) +"秒"); |
||||
} |
||||
} catch (Exception e) { |
||||
log.error("恢复分析失败", e); |
||||
} |
||||
} |
||||
|
||||
|
||||
//引入解压缩有可能会很慢,这里添加重试机制,最多重试6次,60s
|
||||
private boolean retryGetDecompressionFlag(AnalysisTask analysisTask) { |
||||
int retryCount = 0; |
||||
while (retryCount < 60) { |
||||
AnalysisTask latestAnalysisTask = this.getById(analysisTask.getId()); |
||||
if (latestAnalysisTask.getDecompressionFlag()) { |
||||
analysisTask.setDecompressionFlag(true); |
||||
analysisTask.setFileCount(latestAnalysisTask.getFileCount()); |
||||
return true; |
||||
} |
||||
try { |
||||
Thread.sleep(1000); |
||||
} catch (InterruptedException e) { |
||||
log.error("线程休眠异常", e); |
||||
} |
||||
retryCount++; |
||||
} |
||||
return false; |
||||
} |
||||
|
||||
|
||||
//开启单个文件的分析
|
||||
private void analysisFile(MongoTemplate mongoTemplate,AnalysisTask analysisTask) throws InterruptedException { |
||||
Query unAnalyzedFileQuery = new Query(where("fileAnalysisStatus").is(FileAnalysisStatusEnum.UN_START_ANALYSIS.getCode()) |
||||
.and("isDirectory").is(false)); |
||||
List<FileDataMongoDto> unAnalyzedFiles = mongoTemplate.find(unAnalyzedFileQuery, FileDataMongoDto.class); |
||||
|
||||
//使用线程池 并行的分析
|
||||
CountDownLatch countDownLatch = new CountDownLatch(unAnalyzedFiles.size()); |
||||
unAnalyzedFiles.parallelStream().forEach(fileDataMongoDto -> { |
||||
IAnalysisTask task = AnalysisTaskFactory.createAnalysisTask(analysisTask, fileDataMongoDto, mongoTemplate, countDownLatch); |
||||
taskExecutor.execute(task); |
||||
}); |
||||
countDownLatch.await(); |
||||
} |
||||
|
||||
//校验当前项目是否开源
|
||||
private void checkProjectIfOpen(MongoTemplate mongoTemplate,AnalysisTask analysisTask){ |
||||
Query openFileQuery = new Query(where("openType").is(true)); |
||||
Long openFilesCount = mongoTemplate.count(openFileQuery, FileDataMongoDto.class); |
||||
//是否开源阈值
|
||||
Integer openThread = analysisTask.getOpenRateThreshold(); |
||||
BigDecimal totalFileCount = new BigDecimal(analysisTask.getFileCount()); |
||||
//统计开源率
|
||||
BigDecimal openRate = new BigDecimal(openFilesCount).divide(totalFileCount, 4, RoundingMode.HALF_UP).multiply(new BigDecimal(100)); |
||||
//超过阈值认为开源
|
||||
if (openRate.compareTo(new BigDecimal(openThread)) >= 0) { |
||||
analysisTask.setOpenType(true); |
||||
} |
||||
} |
||||
|
||||
} |
@ -0,0 +1,56 @@ |
||||
package com.keyware.composeanalysis.solr; |
||||
|
||||
import lombok.Data; |
||||
import lombok.experimental.Accessors; |
||||
|
||||
import java.util.Arrays; |
||||
import java.util.List; |
||||
|
||||
/** |
||||
* @author liuzongren |
||||
* @date 2024/7/26 |
||||
* @Description solr 库 VersionTree树信息 ,保存项目的整体信息 |
||||
*/ |
||||
@Data |
||||
@Accessors(chain = true) |
||||
public class VersionTree { |
||||
|
||||
/** |
||||
* 项目ID |
||||
*/ |
||||
private String proId; |
||||
|
||||
/** |
||||
* 项目名称 |
||||
*/ |
||||
private String proName; |
||||
|
||||
/** |
||||
* 项目编号 |
||||
*/ |
||||
private String versionId; |
||||
|
||||
/** |
||||
* 项目版本 |
||||
*/ |
||||
private String versionName; |
||||
|
||||
/** |
||||
* 项目地址 |
||||
*/ |
||||
private String downUrl; |
||||
|
||||
public void setLicenseType(String licenseType) { |
||||
if (licenseType != null){ |
||||
this.licenseType = Arrays.asList(licenseType.split("@@@")); |
||||
} |
||||
} |
||||
|
||||
private List<String> licenseType; |
||||
|
||||
/** |
||||
* 项目所有文件的信息 |
||||
*/ |
||||
private List<VersionTreeNode> dirTree; |
||||
|
||||
} |
@ -0,0 +1,42 @@ |
||||
package com.keyware.composeanalysis.solr; |
||||
|
||||
import lombok.Data; |
||||
|
||||
/** |
||||
* @author liuzongren |
||||
* @date 2024/7/26 |
||||
*/ |
||||
@Data |
||||
public class VersionTreeNode { |
||||
|
||||
/** |
||||
* 节点编号 |
||||
*/ |
||||
private String id; |
||||
|
||||
/** |
||||
* 父节点编号 |
||||
*/ |
||||
private String pid; |
||||
|
||||
/** |
||||
* 文件名称 |
||||
*/ |
||||
private String name; |
||||
|
||||
/** |
||||
* 是否是父节点 |
||||
*/ |
||||
private Boolean isParent; |
||||
|
||||
/** |
||||
* 文件的MD5 |
||||
*/ |
||||
private String sourceFileMd5; |
||||
|
||||
/** |
||||
* 文件在项目中的相对路径 |
||||
*/ |
||||
private String fullPath; |
||||
|
||||
} |
@ -0,0 +1,45 @@ |
||||
package com.keyware.composeanalysis.task; |
||||
|
||||
import com.keyware.composeanalysis.constant.enums.AnalysisLevelEnum; |
||||
import com.keyware.composeanalysis.entity.AnalysisTask; |
||||
import com.keyware.composeanalysis.mongo.FileDataMongoDto; |
||||
import org.springframework.data.mongodb.core.MongoTemplate; |
||||
|
||||
import java.util.concurrent.CountDownLatch; |
||||
|
||||
/** |
||||
* @author liuzongren |
||||
* @date 2024/7/31 |
||||
* @description |
||||
*/ |
||||
public class AnalysisTaskFactory { |
||||
|
||||
/** |
||||
* 根据分析类型,创建具体的分析任务 |
||||
* @param analysisTask |
||||
* @param analysisFile |
||||
* @param mongoTemplate |
||||
* @param countDownLatch 任务总数控制器 |
||||
* @return |
||||
*/ |
||||
|
||||
public static IAnalysisTask createAnalysisTask(AnalysisTask analysisTask, FileDataMongoDto analysisFile, MongoTemplate mongoTemplate, CountDownLatch countDownLatch) { |
||||
AnalysisLevelEnum analysisLevel = AnalysisLevelEnum.getAnalysisLevelEnum(analysisTask.getAnalysisLevel()); |
||||
switch (analysisLevel) { |
||||
case FILE_LEVEL: |
||||
return new FileAnalysisTask(analysisTask,analysisFile,mongoTemplate,countDownLatch); |
||||
case FUNCTION_LEVEL: |
||||
return new FunctionAnalysisTask(analysisTask,analysisFile,mongoTemplate,countDownLatch); |
||||
case BLOCK_LEVEL: |
||||
return new CodeBlockAnalysisTask(analysisTask,analysisFile,mongoTemplate,countDownLatch); |
||||
case LINE_LEVEL: |
||||
return new LineAnalysisTask(analysisTask,analysisFile,mongoTemplate,countDownLatch); |
||||
default: |
||||
break; |
||||
} |
||||
return null; |
||||
} |
||||
|
||||
|
||||
|
||||
} |
@ -0,0 +1,356 @@ |
||||
package com.keyware.composeanalysis.task; |
||||
|
||||
|
||||
import cn.hutool.core.collection.CollectionUtil; |
||||
import cn.hutool.core.lang.Pair; |
||||
import com.alibaba.fastjson.JSONArray; |
||||
import com.keyware.composeanalysis.constant.FixedValue; |
||||
import com.keyware.composeanalysis.constant.FunctionAndAnalysisAssemblyConst; |
||||
import com.keyware.composeanalysis.constant.RedisConst; |
||||
import com.keyware.composeanalysis.constant.SolrDBConst; |
||||
import com.keyware.composeanalysis.constant.enums.AnalysisLevelEnum; |
||||
import com.keyware.composeanalysis.constant.enums.AnalysisStatusEnum; |
||||
import com.keyware.composeanalysis.constant.enums.FileAnalysisStatusEnum; |
||||
import com.keyware.composeanalysis.entity.AnalysisTask; |
||||
import com.keyware.composeanalysis.mongo.FileDataMongoDto; |
||||
import com.keyware.composeanalysis.mongo.LineDataMongoDto; |
||||
import com.keyware.composeanalysis.mongo.MatchOpenFile; |
||||
import com.keyware.composeanalysis.mongo.MatchOpenFileMongoDto; |
||||
import com.keyware.composeanalysis.solr.VersionTree; |
||||
import com.keyware.composeanalysis.util.*; |
||||
import com.keyware.keyswan.anaysis.Analysis; |
||||
import com.keyware.keyswan.anaysis.AnalysisFactory; |
||||
import com.keyware.keyswan.common.CodeFile; |
||||
import com.keyware.keyswan.common.LineModel; |
||||
import com.keyware.utils.IdGenerator; |
||||
import lombok.extern.log4j.Log4j2; |
||||
import org.apache.commons.lang3.StringUtils; |
||||
import org.apache.solr.common.SolrDocument; |
||||
import org.apache.solr.common.SolrDocumentList; |
||||
import org.springframework.data.mongodb.core.MongoTemplate; |
||||
import org.springframework.data.mongodb.core.query.Update; |
||||
|
||||
import java.math.BigDecimal; |
||||
import java.math.RoundingMode; |
||||
import java.util.*; |
||||
import java.util.concurrent.CountDownLatch; |
||||
import java.util.stream.Collectors; |
||||
import java.util.stream.Stream; |
||||
|
||||
import static com.keyware.composeanalysis.util.SimilarityUtil.getOpenRateAndSaveRowNum; |
||||
import static org.springframework.data.mongodb.core.query.Criteria.where; |
||||
|
||||
/** |
||||
* @author liuzongren |
||||
* @ClassName LineAnalysisTask |
||||
* @description: 代码块级别溯源 任务 |
||||
* @datetime 2024年 07月 25日 16:19 |
||||
* @version: 1.0 |
||||
*/ |
||||
|
||||
@Log4j2 |
||||
public class CodeBlockAnalysisTask extends IAnalysisTask { |
||||
|
||||
private MongoTemplate mongoTemplate; |
||||
private AnalysisTask analysisTask; |
||||
//被测件的文件信息
|
||||
private FileDataMongoDto analysisFile; |
||||
|
||||
private SolrUtils solrUtils; |
||||
|
||||
private RedisUtil redisUtil; |
||||
|
||||
private CountDownLatch countDownLatch; |
||||
|
||||
public CodeBlockAnalysisTask(AnalysisTask analysisTask, FileDataMongoDto analysisFile, MongoTemplate mongoTemplate, CountDownLatch countDownLatch) { |
||||
this.mongoTemplate = mongoTemplate; |
||||
this.analysisTask = analysisTask; |
||||
this.analysisFile = analysisFile; |
||||
this.countDownLatch = countDownLatch; |
||||
this.solrUtils = SpringContextUtils.getBean(SolrUtils.class); |
||||
this.redisUtil = SpringContextUtils.getBean(RedisUtil.class); |
||||
} |
||||
|
||||
/** |
||||
* 方法 或者代码块 级别 源代码溯源 |
||||
* 当前任务 需要在 文件级分析完成后 进行 |
||||
*/ |
||||
|
||||
@Override |
||||
public void run() { |
||||
//执行任务前,判断一下任务执行的状态
|
||||
Object status = redisUtil.get(String.format(RedisConst.TASK_RUNNING_STATUS_KEY_PREFIX, analysisTask.getId())); |
||||
if (status != null && (status.equals(AnalysisStatusEnum.STOP_ANALYSIS.getCode()) || status.equals(AnalysisStatusEnum.PAUSE_ANALYSIS.getCode()))) { |
||||
log.info("任务已取消,fileName:{}", analysisFile.getName()); |
||||
countDownLatch.countDown(); |
||||
return; |
||||
} |
||||
|
||||
//获取文件地址
|
||||
String filePath = analysisFile.getFileUrl(); |
||||
//获取文件名称
|
||||
String fileName = analysisFile.getName(); |
||||
|
||||
try { |
||||
LineDataMongoDto lineDataMongoDto = new LineDataMongoDto(); |
||||
lineDataMongoDto.setFileId(analysisFile.getId()); |
||||
Analysis analysis = AnalysisFactory.getAnalysis(filePath); |
||||
//将代码块特征存入MongoDB
|
||||
//提取文件的代码块信息
|
||||
CodeFile codeFile = analysis.analysisFile(filePath, FunctionAndAnalysisAssemblyConst.LINE_EXTRACT, FunctionAndAnalysisAssemblyConst.LINE_EXTRACT_BY_6_LINE); |
||||
List<LineModel> lineFeatures = codeFile.getLine_hay(); |
||||
|
||||
//根据文件后缀判断需要查询的solr特征库库名称
|
||||
String featureCoreName = FixedValue.SUFFIX_SOLR_FILE.get(analysisFile.getSuffix()); |
||||
|
||||
//从solr库中获取特征相似的文件
|
||||
SolrDocumentList matchOpenSourceFiles = getFeatureSimilarityFromSolr(featureCoreName, lineFeatures); |
||||
|
||||
//计算开源率
|
||||
doAnalysis(matchOpenSourceFiles, codeFile); |
||||
|
||||
//更新文件表的分析状态为3 行级特征以分析完毕
|
||||
analysisFile.setFileAnalysisStatus(FileAnalysisStatusEnum.ANALYSIS_DONE.getCode()); |
||||
mongoTemplate.update(FileDataMongoDto.class) |
||||
.matching(where("_id").is(analysisFile.getId())) |
||||
.replaceWith(analysisFile) |
||||
.findAndReplace(); |
||||
|
||||
AnalysisLogUtil.insert(mongoTemplate, "【代码块级分析】完成" + fileName); |
||||
log.info("文件" + fileName + ":代码块级分析完成"); |
||||
} catch (Exception e) { |
||||
AnalysisLogUtil.insertErrorInfo(mongoTemplate, "【代码块分析】失败" + fileName, e); |
||||
log.error("文件:" + fileName + "代码块级分析失败!", e); |
||||
//修改当前文件分析状态未失败
|
||||
mongoTemplate.update(FileDataMongoDto.class) |
||||
.matching(where("_id").is(analysisFile.getId())) |
||||
.apply(new Update().set("fileAnalysisStatus", FileAnalysisStatusEnum.FAILED_ANALYSIS.getCode())) |
||||
.first(); |
||||
} finally { |
||||
countDownLatch.countDown(); |
||||
} |
||||
} |
||||
|
||||
|
||||
/** |
||||
* 根据 特征值 从特征库中检索 具有特征相似的 |
||||
* |
||||
* @param solrCoreName 检索的solr 库名称 |
||||
* @param functionAndCodeBlockInfos |
||||
* @return |
||||
*/ |
||||
private SolrDocumentList getFeatureSimilarityFromSolr(String solrCoreName, List<LineModel> functionAndCodeBlockInfos) { |
||||
//获取函数获取代码块的特征MD5值
|
||||
Set<String> traitLineMd5Arr = functionAndCodeBlockInfos.stream().map(LineModel::getTraitLineMd5).collect(Collectors.toSet()); |
||||
Set<String> cuttLineMd5Arr = functionAndCodeBlockInfos.stream().map(LineModel::getCutLineMd5).collect(Collectors.toSet()); |
||||
Set<String> queryMd5Arr = Stream.concat(traitLineMd5Arr.stream(), cuttLineMd5Arr.stream()).collect(Collectors.toSet()); |
||||
String queryStr = "line_hay:(" + StringUtils.join(queryMd5Arr, " OR ") + ")"; |
||||
log.info("查询条件: solrCoreName:{},queryStr:{}", solrCoreName, queryStr); |
||||
SolrDocumentList result = solrUtils.query(solrCoreName, queryStr, "sourceMd5,line_hay"); |
||||
log.info("查询结果: result:{}", result); |
||||
return result; |
||||
} |
||||
|
||||
|
||||
/** |
||||
* 计算开源率 被测件的开源率 |
||||
* |
||||
* @param matcheOpenSourceFiles 匹配的开源文件信息 |
||||
* @param fileAnalysisRes 被测件的解析结果 |
||||
*/ |
||||
private void doAnalysis(SolrDocumentList matcheOpenSourceFiles, CodeFile fileAnalysisRes) { |
||||
|
||||
if (CollectionUtil.isEmpty(matcheOpenSourceFiles)) { |
||||
return; |
||||
} |
||||
|
||||
//根据文件后缀判断需要查询的文件版本库名称
|
||||
String sourceFileBaseCoreName = FixedValue.SUFFIX_SOLR_VERSION.get(analysisFile.getSuffix()); |
||||
|
||||
|
||||
//保存所有匹配的特征代码块MD5信息,方便统计总的匹配行数
|
||||
Set<String> matchingTraitLineSet = new HashSet<>(); |
||||
|
||||
//匹配的特征代码块MD5
|
||||
Set<Integer> matchedLineRowsNum = new HashSet<>(); |
||||
|
||||
//统计每个文件的开源率
|
||||
List<MatchOpenFile> matchOpenFilesRes = calculateSimilarityAndOpenRate(matcheOpenSourceFiles, fileAnalysisRes, sourceFileBaseCoreName, matchedLineRowsNum, matchingTraitLineSet); |
||||
|
||||
//计算文件的总体的特征相似度
|
||||
Map<String, LineModel> traitMd5Map = fileAnalysisRes.getLine_hay().stream().collect(Collectors.toMap(LineModel::getTraitLineMd5, java.util.function.Function.identity())); |
||||
|
||||
int matchCodeBlockLineCount = 0; |
||||
for (String matchFeatureFunctionMd5 : matchingTraitLineSet) { |
||||
LineModel lineModel = traitMd5Map.get(matchFeatureFunctionMd5); |
||||
matchCodeBlockLineCount += (Integer.valueOf(lineModel.getEndLine()) - Integer.valueOf(lineModel.getStartLine())); |
||||
} |
||||
|
||||
BigDecimal featureSimilarity = new BigDecimal(matchCodeBlockLineCount).divide(new BigDecimal(analysisFile.getCodeRowNum()), 4, RoundingMode.HALF_UP).multiply(new BigDecimal(100)).setScale(2); |
||||
|
||||
//计算文件的总体开源率
|
||||
BigDecimal openRate = new BigDecimal(matchedLineRowsNum.size()).divide(new BigDecimal(analysisFile.getCodeRowNum()), 4, RoundingMode.HALF_UP).multiply(new BigDecimal(100)).setScale(2); |
||||
|
||||
//获取开源率的阈值
|
||||
Integer openRateThreshold = analysisTask.getOpenRateThreshold(); |
||||
|
||||
//如果开源率大于阈值,则将当前文件设置成开源
|
||||
if (openRate.compareTo(new BigDecimal(openRateThreshold)) >= 0) { |
||||
analysisFile.setOpenType(true); |
||||
} |
||||
|
||||
//保存当前文件的开源信息到mongo库中
|
||||
MatchOpenFileMongoDto matchOpenFileMongo = new MatchOpenFileMongoDto(); |
||||
matchOpenFileMongo.setId(IdGenerator.uuid32()) |
||||
.setFilePath(analysisFile.getFileUrl()) |
||||
.setFileName(analysisFile.getName()) |
||||
.setFeatureSimilarity(featureSimilarity.floatValue()) |
||||
.setOpenRate(openRate.floatValue()) |
||||
.setOpenType(analysisFile.getOpenType()) |
||||
.setMatchOpenFile(matchOpenFilesRes); |
||||
|
||||
mongoTemplate.save(matchOpenFileMongo); |
||||
} |
||||
|
||||
|
||||
/** |
||||
* 计算当前文件的特征相似度 和 开源率 |
||||
* |
||||
* @param matchOpenFiles 通过MD5 匹配到的所有开源文件 |
||||
* @param sourceFileBaseCoreName 当前文件特征文件的 solr coreName |
||||
* @param matchLineRowsNum 所有开源文件匹配到的开源行号列表 |
||||
* @param matchFeatureCodeBlockMd5s 所有开源文件匹配到的特征代码块MD5 |
||||
*/ |
||||
private List<MatchOpenFile> calculateSimilarityAndOpenRate(SolrDocumentList matchOpenFiles, CodeFile fileAnalysisRes, String sourceFileBaseCoreName, Set<Integer> matchLineRowsNum, Set<String> matchFeatureCodeBlockMd5s) { |
||||
|
||||
List<MatchOpenFile> matchOpenFilesRes = new ArrayList<>(); |
||||
|
||||
//首先根据文件的MD5查询开源文件的版本ID,和路径信息
|
||||
Set<String> openSourceFileMd5s = matchOpenFiles.stream().map(doc -> (String) doc.get("sourceMd5")).collect(Collectors.toSet()); |
||||
Map<String, SolrDocument> md5VersionInfoMap = solrUtils.batchQueryVersionIdFromSourceFileBaseBySourceMd5(sourceFileBaseCoreName, openSourceFileMd5s); |
||||
|
||||
//根据版本ID查询版本的详细信息
|
||||
//todo 这里 查询一个版本的信息 需要检索 两个 solr 库 而且还需要检索 versioinTree 后面需要优化
|
||||
Set<String> openSourceFileVersionIds = md5VersionInfoMap.values().stream().map(doc -> (String) doc.get("versionId")).collect(Collectors.toSet()); |
||||
List<VersionTree> versionTrees = solrUtils.queryBatchVersionInfoByVersionIds(openSourceFileVersionIds); |
||||
Map<String, VersionTree> versionIdVersionInfoMap = versionTrees.stream().collect(Collectors.toMap(VersionTree::getVersionId, java.util.function.Function.identity())); |
||||
|
||||
|
||||
for (SolrDocument matchFile : matchOpenFiles) { |
||||
|
||||
//开源文件md5
|
||||
String openSourceFileMd5 = matchFile.getFieldValue("sourceMd5").toString(); |
||||
|
||||
//解析文件的代码块特征值
|
||||
List<LineModel> openFileCodeBlockFeatureList = getOpenFileCodeBlockList(matchFile); |
||||
|
||||
//匹配的总特征行数
|
||||
int currentFileMatchFeatureLineCount = 0; |
||||
|
||||
//遍历当前文件的代码块特征,统计匹配的总行数
|
||||
for (LineModel lineModel : fileAnalysisRes.getLine_hay()) { |
||||
String traitLineMd5 = lineModel.getTraitLineMd5(); |
||||
//村换匹配到的文件的行信息
|
||||
for (LineModel matchLine : openFileCodeBlockFeatureList) { |
||||
if (traitLineMd5.equals(matchLine.getTraitLineMd5())) { |
||||
//计算匹配的特征行数
|
||||
currentFileMatchFeatureLineCount += (Integer.valueOf(matchLine.getEndLine()) - Integer.valueOf(matchLine.getStartLine()) + 1); |
||||
matchFeatureCodeBlockMd5s.add(traitLineMd5); |
||||
} |
||||
} |
||||
} |
||||
|
||||
|
||||
//根据源文件的MD5确定需要查询源码库的序号
|
||||
String openSourceCodeCoreIndex = openSourceFileMd5.substring(0, 1) + SolrDBConst.CORE_NAME_SUFFIX_SOURCE_FILE_INFO; |
||||
|
||||
//获取开源文件的文本信息
|
||||
SolrDocument openSourceContent = solrUtils.queryOne(openSourceCodeCoreIndex, "sourceFileMd5:" + openSourceFileMd5, "sourceContent"); |
||||
|
||||
//当前文件的开源率
|
||||
Pair<Float, HashSet<Integer>> openRateAndSaveRowNum = getOpenRateAndSaveRowNum(fileAnalysisRes.getSourceFileContent(), openSourceContent.getFieldValue("sourceContent").toString()); |
||||
|
||||
//将当前文件匹配的行号,存储到缓存中,方便统计整体的开源率
|
||||
matchLineRowsNum.addAll(openRateAndSaveRowNum.getValue()); |
||||
|
||||
//统计当前文件的特征相似度
|
||||
BigDecimal featureSimilarity = new BigDecimal(currentFileMatchFeatureLineCount).divide(fileAnalysisRes.getCodeRowNum(), 4, RoundingMode.HALF_UP).multiply(new BigDecimal(100)).setScale(2); |
||||
|
||||
SolrDocument openEntries = md5VersionInfoMap.get(openSourceFileMd5); |
||||
VersionTree versionInfo = versionIdVersionInfoMap.get(openEntries.get("versionId")); |
||||
|
||||
//组装当前开源文件的开源项目信息
|
||||
MatchOpenFile matchOpenFileInfo = new MatchOpenFile(); |
||||
matchOpenFileInfo.setPId(versionInfo.getProId()) |
||||
.setPName(versionInfo.getProName()) |
||||
.setSourceUrl((String) openEntries.get("fullPath")) |
||||
.setFeatureSimilarity(featureSimilarity.floatValue()) |
||||
.setOpenRate(openRateAndSaveRowNum.getKey()) |
||||
.setVersion(versionInfo.getVersionName()) |
||||
.setLicenseType(versionInfo.getLicenseType()) |
||||
.setAnalyzeType(AnalysisLevelEnum.BLOCK_LEVEL.getCode()); |
||||
matchOpenFilesRes.add(matchOpenFileInfo); |
||||
} |
||||
return matchOpenFilesRes; |
||||
} |
||||
|
||||
|
||||
/** |
||||
* 获取当前文件的代码块特征值 |
||||
* |
||||
* @param openSourceFile |
||||
* @return |
||||
*/ |
||||
private List<LineModel> getOpenFileCodeBlockList(SolrDocument openSourceFile) { |
||||
//解析文件的代码块特征值
|
||||
String lineFeatureMd5s = (String) openSourceFile.get("line_hay"); |
||||
lineFeatureMd5s = lineFeatureMd5s.replace("\\", "") |
||||
.replace("\"{", "{") |
||||
.replace("}\"", "}"); |
||||
return JSONArray.parseArray(lineFeatureMd5s, LineModel.class); |
||||
} |
||||
|
||||
|
||||
/** |
||||
* 将特征值插入到mongo库中 |
||||
* |
||||
* @param features 特征集合 |
||||
* @param lineDataMongoDto 当前分析任务 ,特征信息存储 |
||||
* todo 后期 看看有没有插入的必要 |
||||
* @param |
||||
*/ |
||||
@Deprecated |
||||
private void insertFeatureValue(List<LineModel> features, LineDataMongoDto lineDataMongoDto) { |
||||
List<LineModel> batchInsertList = new ArrayList<>(); |
||||
if (CollectionUtil.isNotEmpty(features)) { |
||||
//这里的批量插入逻辑可以进行校验
|
||||
//每10条存一次,解析的数据量如果过大,可能会超过MongoDB数据限制
|
||||
int batchInsertStpe = 10; |
||||
int total = 0; |
||||
for (int i = 0; i < features.size(); i++) { |
||||
LineModel lineModel = features.get(i); |
||||
if (total != batchInsertStpe) { |
||||
batchInsertList.add(lineModel); |
||||
total++; |
||||
} |
||||
if (i == features.size() - 1 && total != batchInsertStpe) { |
||||
total = 0; |
||||
lineDataMongoDto.setId(IdGenerator.uuid32()) |
||||
.setLineModels(batchInsertList); |
||||
mongoTemplate.insert(lineDataMongoDto); |
||||
} |
||||
if (total == batchInsertStpe) { |
||||
total = 0; |
||||
lineDataMongoDto.setId(IdGenerator.uuid32()) |
||||
.setLineModels(batchInsertList); |
||||
mongoTemplate.insert(lineDataMongoDto); |
||||
batchInsertList.clear(); |
||||
} |
||||
} |
||||
} else { |
||||
lineDataMongoDto.setId(IdGenerator.uuid32()); |
||||
mongoTemplate.insert(lineDataMongoDto); |
||||
} |
||||
} |
||||
|
||||
} |
@ -0,0 +1,232 @@ |
||||
package com.keyware.composeanalysis.task; |
||||
|
||||
import com.keyware.composeanalysis.constant.FixedValue; |
||||
import com.keyware.composeanalysis.constant.RedisConst; |
||||
import com.keyware.composeanalysis.constant.SolrDBConst; |
||||
import com.keyware.composeanalysis.constant.enums.AnalysisLevelEnum; |
||||
import com.keyware.composeanalysis.constant.enums.AnalysisStatusEnum; |
||||
import com.keyware.composeanalysis.constant.enums.FileAnalysisStatusEnum; |
||||
import com.keyware.composeanalysis.entity.AnalysisTask; |
||||
import com.keyware.composeanalysis.mongo.FileDataMongoDto; |
||||
import com.keyware.composeanalysis.mongo.MatchOpenFile; |
||||
import com.keyware.composeanalysis.mongo.MatchOpenFileMongoDto; |
||||
import com.keyware.composeanalysis.solr.VersionTree; |
||||
import com.keyware.composeanalysis.util.*; |
||||
import com.keyware.keyswan.anaysis.Analysis; |
||||
import com.keyware.keyswan.anaysis.AnalysisFactory; |
||||
import com.keyware.keyswan.common.CodeFile; |
||||
import com.keyware.utils.IdGenerator; |
||||
import lombok.extern.log4j.Log4j2; |
||||
import org.apache.commons.collections.CollectionUtils; |
||||
import org.apache.commons.lang3.StringUtils; |
||||
import org.apache.solr.common.SolrDocument; |
||||
import org.apache.solr.common.SolrDocumentList; |
||||
import org.springframework.data.mongodb.core.MongoTemplate; |
||||
import org.springframework.data.mongodb.core.query.Update; |
||||
|
||||
import java.io.IOException; |
||||
import java.math.BigDecimal; |
||||
import java.math.RoundingMode; |
||||
import java.nio.file.Files; |
||||
import java.nio.file.Paths; |
||||
import java.util.*; |
||||
import java.util.concurrent.CountDownLatch; |
||||
import java.util.function.Function; |
||||
import java.util.stream.Collectors; |
||||
|
||||
import static org.springframework.data.mongodb.core.query.Criteria.where; |
||||
|
||||
/** |
||||
* @author liuzongren |
||||
* @date 2024/7/23 |
||||
* desc 文件级溯源分析任务 |
||||
*/ |
||||
@Log4j2 |
||||
public class FileAnalysisTask extends IAnalysisTask { |
||||
|
||||
private MongoTemplate mongoTemplate; |
||||
private AnalysisTask analysisTask; |
||||
private SolrUtils solrUtils; |
||||
//文件信息
|
||||
private FileDataMongoDto analysisFile; |
||||
private RedisUtil redisUtil; |
||||
private CountDownLatch countDownLatch; |
||||
|
||||
|
||||
public FileAnalysisTask(AnalysisTask analysisTask, FileDataMongoDto analysisFile, MongoTemplate mongoTemplate, CountDownLatch countDownLatch) { |
||||
this.mongoTemplate = mongoTemplate; |
||||
this.analysisTask = analysisTask; |
||||
this.analysisFile = analysisFile; |
||||
this.countDownLatch = countDownLatch; |
||||
this.solrUtils = SpringContextUtils.getBean(SolrUtils.class); |
||||
this.redisUtil = SpringContextUtils.getBean(RedisUtil.class); |
||||
} |
||||
|
||||
|
||||
/** |
||||
* 文件级溯源分析 |
||||
* 当前级别溯源分析 需要在 项目级级分析完成后执行 |
||||
* 当前文件源MD5 已经在solr库中匹配不到了,需要提取特征去匹配 |
||||
*/ |
||||
@Override |
||||
public void run() { |
||||
//执行任务前,判断一下任务执行的状态
|
||||
Object status = redisUtil.get(String.format(RedisConst.TASK_RUNNING_STATUS_KEY_PREFIX, analysisTask.getId())); |
||||
if (status != null && (status.equals(AnalysisStatusEnum.STOP_ANALYSIS.getCode()) || status.equals(AnalysisStatusEnum.PAUSE_ANALYSIS.getCode()))) { |
||||
log.info("任务已取消,fileName:{}", analysisFile.getName()); |
||||
countDownLatch.countDown(); |
||||
return; |
||||
} |
||||
//获取当前文件名称
|
||||
String fileName = analysisFile.getName(); |
||||
|
||||
AnalysisLogUtil.insert(mongoTemplate, "【文件级分析】正在分析" + fileName); |
||||
try { |
||||
//只有主流语言的才能解析
|
||||
//非32种主流语言的不能提取文件特征,在文件级MD5匹配的时候,已经做过匹配
|
||||
if (StringUtils.isNotEmpty(analysisFile.getSuffix()) && FixedValue.SUFFIX_SOLR_VERSION.containsKey(analysisFile.getSuffix())) { |
||||
//根据文件后缀 查询 *_CutFileInfo库名称
|
||||
String featureCoreName = FixedValue.SUFFIX_SOLR_FILE.get(analysisFile.getSuffix()); |
||||
//根据文件名称,获取文件解析器
|
||||
Analysis analysis = AnalysisFactory.getAnalysis(fileName); |
||||
//如果 analysis 返回值为null 说明还未支持这种语言的特征提取 可以直接通过文件的MD5值去solr库中匹配
|
||||
if (analysis != null) { |
||||
//如果文件大小超过3M,则不进行文件级行级特征提取
|
||||
Integer fileSize = analysisFile.getFileSize(); |
||||
if (fileSize < (3 * 1024 * 1024)) { |
||||
CodeFile codeFile = analysis.analysisFile(analysisFile.getFileUrl(), "1", "0"); |
||||
//根据文件的特征值,去相应文件文件后缀的特征库中进行查询
|
||||
if (codeFile != null) { |
||||
String querySb = "sourceMd5:" + codeFile.getSourceMd5() + " OR cutFileMd5:" + codeFile.getCutFileMd5() + " OR traitFileMd5:" + codeFile.getTraitFileMd5(); |
||||
SolrDocumentList openSourceFileList = solrUtils.query(featureCoreName, querySb, "sourceMd5"); |
||||
//如果当前文件在源码库中,匹配到了数据,则统计当前文件的开源率
|
||||
if (CollectionUtils.isNotEmpty(openSourceFileList)) { |
||||
ananlyzeFileOpenRate(openSourceFileList); |
||||
} |
||||
} |
||||
} |
||||
} |
||||
} |
||||
//更新文件级分析结果
|
||||
analysisFile.setFileAnalysisStatus(FileAnalysisStatusEnum.ANALYSIS_DONE.getCode()); |
||||
mongoTemplate.update(FileDataMongoDto.class) |
||||
.matching(where("_id").is(analysisFile.getId())) |
||||
.replaceWith(analysisFile) |
||||
.findAndReplace(); |
||||
} catch (Exception e) { |
||||
AnalysisLogUtil.insertErrorInfo(mongoTemplate, "【文件级】提取失败" + fileName, e); |
||||
log.error("文件:" + fileName + "文件级别特征提取失败!", e); |
||||
//将当前文件的分析状态变更为失败
|
||||
analysisFile.setFileAnalysisStatus(FileAnalysisStatusEnum.FAILED_ANALYSIS.getCode()); |
||||
//更新文件级分析结果
|
||||
mongoTemplate.update(FileDataMongoDto.class) |
||||
.matching(where("_id").is(analysisFile.getId())) |
||||
.apply(new Update().set("fileAnalysisStatus", FileAnalysisStatusEnum.FAILED_ANALYSIS.getCode())) |
||||
.first(); |
||||
} finally { |
||||
countDownLatch.countDown(); |
||||
} |
||||
} |
||||
|
||||
|
||||
/** |
||||
* 分析文件的开源率 |
||||
* |
||||
* @param fileList 匹配的开源文件信息 |
||||
* @throws IOException |
||||
*/ |
||||
private void ananlyzeFileOpenRate(SolrDocumentList fileList) throws IOException { |
||||
//创建匹配开源文件信息匹配对象
|
||||
MatchOpenFileMongoDto matchOpenFileInfo = new MatchOpenFileMongoDto(); |
||||
matchOpenFileInfo.setId(IdGenerator.uuid32()) |
||||
.setFileName(analysisFile.getName()) |
||||
.setFilePath(analysisFile.getFileUrl()); |
||||
|
||||
//根据匹配的开源文件的md5 获取版本ID
|
||||
Set<String> sourceFileMd5 = fileList.stream().map(solrDocument -> (String) solrDocument.get("sourceMd5")).collect(Collectors.toSet()); |
||||
String sourceCoreName = FixedValue.SUFFIX_SOLR_VERSION.get(analysisFile.getSuffix()); |
||||
Map<String, SolrDocument> md5VersionObjMap = solrUtils.batchQueryVersionIdFromSourceFileBaseBySourceMd5(sourceCoreName, sourceFileMd5); |
||||
|
||||
//根据版本ID获取版本信息
|
||||
Set<String> versionIds = md5VersionObjMap.values().stream().map(solrDocument -> (String) solrDocument.get("versionId")).collect(Collectors.toSet()); |
||||
List<VersionTree> treeInfoList = solrUtils.queryBatchVersionInfoByVersionIds(versionIds); |
||||
Map<String, VersionTree> versionIdMap = treeInfoList.stream().collect(Collectors.toMap(VersionTree::getVersionId, Function.identity())); |
||||
|
||||
//获取被测件文本内容
|
||||
String fileContent = new String(Files.readAllBytes(Paths.get(analysisFile.getFileUrl())), "utf-8").replaceAll(" ", ""); |
||||
|
||||
//将被测件的文本内容拆分成行信息,用于匹配开源信息
|
||||
List<String> fileLines = SimilarityUtil.getSplitWords(fileContent); |
||||
|
||||
HashSet<Integer> openLineNum = new HashSet<>(); |
||||
|
||||
//开源文件结果集合
|
||||
ArrayList<MatchOpenFile> matchOpenFileList = new ArrayList<>(); |
||||
//遍历匹配到的开源文件列表
|
||||
for (int i = 0; i < fileList.size(); i++) { |
||||
String openFileMd5 = (String) fileList.get(i).get("sourceMd5"); |
||||
SolrDocument versionObj = md5VersionObjMap.get(openFileMd5); |
||||
String versionId = (String) versionObj.get("versionId"); |
||||
VersionTree versionInfo = versionIdMap.get(versionId); |
||||
if (versionInfo == null) { |
||||
log.error("未在versionTree中找到版本信息,openFileMd5:{},versionId:{}",openFileMd5, versionId); |
||||
continue; |
||||
} |
||||
MatchOpenFile matchOpenFile = new MatchOpenFile(); |
||||
matchOpenFile.setId(IdGenerator.uuid32()) |
||||
.setVersionId(versionId) |
||||
.setSourceFilePath((String) versionObj.get("fullPath")) |
||||
.setSourceUrl(versionInfo.getDownUrl()) |
||||
.setPId(versionInfo.getProId()) |
||||
.setPName(versionInfo.getProName()) |
||||
.setLicenseType(versionInfo.getLicenseType()) |
||||
.setAnalyzeType(AnalysisLevelEnum.FILE_LEVEL.getCode()) |
||||
.setVersion(versionInfo.getVersionName()) |
||||
.setFeatureSimilarity(100.00f); |
||||
//计算被测件和开源文件的文本相似度
|
||||
//根据文件的MD5的第一位获取solr库索引名称
|
||||
String solrNameIndex =openFileMd5.substring(0, 1) + SolrDBConst.CORE_NAME_SUFFIX_SOURCE_FILE_INFO; |
||||
SolrDocumentList sourceFileInfo = solrUtils.query(solrNameIndex, "sourceFileMd5:" + openFileMd5, "sourceContent"); |
||||
if (CollectionUtils.isNotEmpty(sourceFileInfo)) { |
||||
String openSourceContent = String.valueOf(sourceFileInfo.get(0).getFieldValue("sourceContent")); |
||||
//这里存在优化空间,被测件的文件行拆分 可以拿到循环外面
|
||||
double similarity = SimilarityUtil.getSimilarityAndSaveRowNum(fileLines, openSourceContent, openLineNum); |
||||
matchOpenFile.setOpenRate(new BigDecimal(similarity * 100).setScale(2, RoundingMode.HALF_UP).floatValue()); |
||||
//如果找不到源代码,直接将原文开源率置为 100%
|
||||
} else { |
||||
log.error("找不到源代码,DBname:{},sourceFileMd5:{}", solrNameIndex, openFileMd5); |
||||
matchOpenFile.setOpenRate(100.00f); |
||||
} |
||||
matchOpenFile.setMd5(openFileMd5); |
||||
matchOpenFileList.add(matchOpenFile); |
||||
} |
||||
//统计被测件的总体开源率
|
||||
//获取开源率阈值,判断当前文件是否开源
|
||||
Integer openRateThreshold = analysisTask.getOpenRateThreshold(); |
||||
int openLineCount = openLineNum.size(); |
||||
BigDecimal totalLineCount = new BigDecimal(fileLines.size()); |
||||
BigDecimal openRate = new BigDecimal(openLineCount).divide(totalLineCount, 4, RoundingMode.HALF_UP).multiply(new BigDecimal(100)); |
||||
|
||||
//超过阈值,则认为当前文件是开源文件
|
||||
if (openRate.compareTo(new BigDecimal(openRateThreshold)) > 0) { |
||||
analysisFile.setOpenType(true); |
||||
} else { |
||||
analysisFile.setOpenType(false); |
||||
} |
||||
|
||||
//修改保存测试文件信息
|
||||
analysisFile.setOpenLineCount(openLineCount) |
||||
.setOpenRate(openRate.floatValue()); |
||||
|
||||
//组装开源信息
|
||||
matchOpenFileInfo.setFilePath(analysisFile.getFileUrl()) |
||||
.setOpenType(analysisFile.getOpenType()) |
||||
.setOpenRate(analysisFile.getOpenType() ? 100.00f : 0.00f) |
||||
.setMatchOpenFile(matchOpenFileList); |
||||
|
||||
//保存当前开源信息数据
|
||||
mongoTemplate.insert(matchOpenFileInfo); |
||||
|
||||
} |
||||
|
||||
} |
@ -0,0 +1,409 @@ |
||||
package com.keyware.composeanalysis.task; |
||||
|
||||
|
||||
import cn.hutool.core.collection.CollectionUtil; |
||||
import cn.hutool.core.lang.Pair; |
||||
import cn.hutool.core.util.ObjUtil; |
||||
import com.alibaba.fastjson.JSONArray; |
||||
import com.keyware.composeanalysis.constant.FixedValue; |
||||
import com.keyware.composeanalysis.constant.RedisConst; |
||||
import com.keyware.composeanalysis.constant.SolrDBConst; |
||||
import com.keyware.composeanalysis.constant.enums.AnalysisLevelEnum; |
||||
import com.keyware.composeanalysis.constant.enums.AnalysisStatusEnum; |
||||
import com.keyware.composeanalysis.constant.enums.FileAnalysisStatusEnum; |
||||
import com.keyware.composeanalysis.entity.AnalysisTask; |
||||
import com.keyware.composeanalysis.mongo.FileDataMongoDto; |
||||
import com.keyware.composeanalysis.mongo.LineDataMongoDto; |
||||
import com.keyware.composeanalysis.mongo.MatchOpenFile; |
||||
import com.keyware.composeanalysis.mongo.MatchOpenFileMongoDto; |
||||
import com.keyware.composeanalysis.solr.VersionTree; |
||||
import com.keyware.composeanalysis.util.*; |
||||
import com.keyware.keyswan.common.LineModel; |
||||
import com.keyware.keyware.anaysis.Analysis; |
||||
import com.keyware.keyware.anaysis.AnalysisFactory; |
||||
import com.keyware.keyware.common.CodeFile; |
||||
import com.keyware.keyware.common.Function; |
||||
import com.keyware.utils.IdGenerator; |
||||
import lombok.extern.log4j.Log4j2; |
||||
import org.apache.commons.lang3.StringUtils; |
||||
import org.apache.solr.common.SolrDocument; |
||||
import org.apache.solr.common.SolrDocumentList; |
||||
import org.springframework.data.mongodb.core.MongoTemplate; |
||||
import org.springframework.data.mongodb.core.query.Update; |
||||
|
||||
import java.io.FileInputStream; |
||||
import java.math.BigDecimal; |
||||
import java.math.RoundingMode; |
||||
import java.util.*; |
||||
import java.util.concurrent.CountDownLatch; |
||||
import java.util.stream.Collectors; |
||||
import java.util.stream.Stream; |
||||
|
||||
import static org.springframework.data.mongodb.core.query.Criteria.where; |
||||
|
||||
/** |
||||
* @author liuzongren |
||||
* @ClassName LineAnalysisTask |
||||
* @description: 函数级别溯源 任务 |
||||
* @datetime 2024年 07月 25日 16:19 |
||||
* @version: 1.0 |
||||
*/ |
||||
|
||||
@Log4j2 |
||||
public class FunctionAnalysisTask extends IAnalysisTask { |
||||
|
||||
private MongoTemplate mongoTemplate; |
||||
private AnalysisTask analysisTask; |
||||
//被测件的文件信息
|
||||
private FileDataMongoDto analysisFile; |
||||
|
||||
private SolrUtils solrUtils; |
||||
|
||||
private RedisUtil redisUtil; |
||||
|
||||
private CountDownLatch countDownLatch; |
||||
|
||||
|
||||
public FunctionAnalysisTask(AnalysisTask analysisTask, FileDataMongoDto analysisFile, MongoTemplate mongoTemplate, CountDownLatch countDownLatch) { |
||||
this.mongoTemplate = mongoTemplate; |
||||
this.analysisTask = analysisTask; |
||||
this.analysisFile = analysisFile; |
||||
this.countDownLatch = countDownLatch; |
||||
this.solrUtils = SpringContextUtils.getBean(SolrUtils.class); |
||||
this.redisUtil = SpringContextUtils.getBean(RedisUtil.class); |
||||
} |
||||
|
||||
/** |
||||
* 方法 或者代码块 级别 源代码溯源 |
||||
* 当前任务 需要在 文件级分析完成后 进行 |
||||
*/ |
||||
|
||||
@Override |
||||
public void run() { |
||||
//执行任务前,判断一下任务执行的状态
|
||||
Object status = redisUtil.get(String.format(RedisConst.TASK_RUNNING_STATUS_KEY_PREFIX, analysisTask.getId())); |
||||
if (status != null && (status.equals(AnalysisStatusEnum.STOP_ANALYSIS.getCode()) || status.equals(AnalysisStatusEnum.PAUSE_ANALYSIS.getCode()))) { |
||||
log.info("任务已取消,fileName:{}", analysisFile.getName()); |
||||
countDownLatch.countDown(); |
||||
return; |
||||
} |
||||
//获取文件地址
|
||||
String filePath = analysisFile.getFileUrl(); |
||||
//获取文件名称
|
||||
String fileName = analysisFile.getName(); |
||||
|
||||
try { |
||||
|
||||
//根据文件后缀判断需要查询的solr特征库库名称
|
||||
String featureCoreName = FixedValue.SUFFIX_SOLR_FILE.get(analysisFile.getSuffix()); |
||||
|
||||
//根据文件后缀,去检索sourceFileBase库,来获取文件版本信息
|
||||
String sourceFileBaseCoreName = FixedValue.SUFFIX_SOLR_VERSION.get(analysisFile.getSuffix()); |
||||
|
||||
//根据文件的名称获取函数解析器
|
||||
Analysis analysis = AnalysisFactory.getAnalysis(filePath); |
||||
//解析文件
|
||||
if (!ObjUtil.hasEmpty(featureCoreName, sourceFileBaseCoreName, analysis)) { |
||||
CodeFile codeFile = analysis.analysisFile(new FileInputStream(filePath)); |
||||
if (codeFile != null) { |
||||
List<Function> functionList = codeFile.getFunctionList(); |
||||
if (CollectionUtil.isNotEmpty(functionList)) { |
||||
//获取函数的特征MD5,cutMD5
|
||||
List<String> featureFunctionMd5List = functionList.stream().map(Function::getMd5).collect(Collectors.toList()); |
||||
List<String> cutFunctionMd5List = functionList.stream().map(Function::getSourceMd5).collect(Collectors.toList()); |
||||
Set<String> queryMd5List = Stream.concat(featureFunctionMd5List.stream(), cutFunctionMd5List.stream()).collect(Collectors.toSet()); |
||||
String queryStr = "fun_hay:(" + StringUtils.join(queryMd5List, " OR ") + ")"; |
||||
// log.info("检索函数特征,coreName:{} ,queryStr:{}", featureCoreName, queryStr);
|
||||
SolrDocumentList matchOpenFiles = solrUtils.query(featureCoreName, queryStr, "sourceMd5,fun_hay"); |
||||
// log.info("resp", sourceMd5);
|
||||
//如果函数级特征匹配,能够匹配到开源文件信息,则根据开源文件的md5或者开源文件信息,做相似度对比
|
||||
if (matchOpenFiles != null) { |
||||
//对匹配到的文件进行分析
|
||||
doAnalysis(matchOpenFiles, sourceFileBaseCoreName, codeFile); |
||||
} else { |
||||
//因为函数的特征库较少,这里补充一个对比逻辑,如果当前文件解析失败,或者没有通过函数匹配到数据,则直接通过文件的md5 再次查询一次solr库
|
||||
checkByOriginalFileMd5(sourceFileBaseCoreName, analysisFile.getMd5()); |
||||
} |
||||
} |
||||
} |
||||
} else { |
||||
//因为函数的特征库较少,这里补充一个对比逻辑,如果当前文件解析失败,或者没有通过函数匹配到数据,则直接通过文件的md5 再次查询一次solr库
|
||||
checkByOriginalFileMd5(sourceFileBaseCoreName, analysisFile.getMd5()); |
||||
} |
||||
|
||||
//更新文件表的分析状态为3 函数级特征以分析完毕
|
||||
analysisFile.setFileAnalysisStatus(FileAnalysisStatusEnum.ANALYSIS_DONE.getCode()); |
||||
mongoTemplate.update(FileDataMongoDto.class) |
||||
.matching(where("_id").is(analysisFile.getId())) |
||||
.replaceWith(analysisFile) |
||||
.findAndReplace(); |
||||
|
||||
AnalysisLogUtil.insert(mongoTemplate, "【函数级分析】完成" + fileName); |
||||
log.info("文件" + fileName + ":函数级分析完成"); |
||||
} catch (Exception e) { |
||||
AnalysisLogUtil.insertErrorInfo(mongoTemplate, "【函数级级分析】失败" + fileName, e); |
||||
log.error("文件:" + fileName + "函数级别特征提取失败!", e); |
||||
//修改当前文件分析状态未失败
|
||||
mongoTemplate.update(FileDataMongoDto.class) |
||||
.matching(where("_id").is(analysisFile.getId())) |
||||
.apply(new Update().set("fileAnalysisStatus", FileAnalysisStatusEnum.FAILED_ANALYSIS.getCode())) |
||||
.first(); |
||||
} finally { |
||||
countDownLatch.countDown(); |
||||
} |
||||
} |
||||
|
||||
|
||||
/** |
||||
* 对比函数级文本相似度 |
||||
* |
||||
* @param matchOpenFiles 通过特征匹配到的开源文件的md5 |
||||
* @param sourceFileBaseCoreName 查询版开源文件版本ID的 solr库名称 |
||||
* @param fileAnalysisRes 被测件的函数解析结果 |
||||
* @throws Exception |
||||
*/ |
||||
private void doAnalysis(SolrDocumentList matchOpenFiles, String sourceFileBaseCoreName, CodeFile fileAnalysisRes) throws Exception { |
||||
|
||||
//按照函数的特征md5进行分组,getter ,setter等方法的 特征值会重复
|
||||
Map<String, List<Function>> featureMd5FunctionMap = fileAnalysisRes.getFunctionList().stream().collect(Collectors.groupingBy(Function::getMd5)); |
||||
|
||||
//函数代码总函数
|
||||
int totalFunctionLineCount = fileAnalysisRes.getFunctionList().stream().mapToInt(Function::getCodeRowNum).sum(); |
||||
|
||||
//匹配到的特征函数Md5
|
||||
Set<String> matchFeatureFunctionMd5s = new HashSet(); |
||||
|
||||
//匹配到源码的行号
|
||||
Set<Integer> matchOpenLineRowsNum = new HashSet(); |
||||
|
||||
//计算与每个开源文件的开源率和特征相似度
|
||||
List<MatchOpenFile> matchOpenFilesRes = calculateSimilarityAndOpenRate(matchOpenFiles, fileAnalysisRes, sourceFileBaseCoreName, matchOpenLineRowsNum, matchFeatureFunctionMd5s); |
||||
|
||||
//计算文件的总体的特征相似度
|
||||
int matchFunctionLineCount = 0; |
||||
for (String matchFeatureFunctionMd5 : matchFeatureFunctionMd5s) { |
||||
matchFunctionLineCount += featureMd5FunctionMap.get(matchFeatureFunctionMd5).stream().mapToInt(Function::getCodeRowNum).sum(); |
||||
} |
||||
|
||||
BigDecimal featureSimilarity = new BigDecimal(matchFunctionLineCount).divide(new BigDecimal(totalFunctionLineCount), 4, RoundingMode.HALF_UP).multiply(new BigDecimal(100)).setScale(2); |
||||
|
||||
//计算文件的总体开源率
|
||||
BigDecimal openRate = new BigDecimal(matchOpenLineRowsNum.size()).divide(new BigDecimal(analysisFile.getCodeRowNum()), 4, RoundingMode.HALF_UP).multiply(new BigDecimal(100)).setScale(2); |
||||
|
||||
//获取开源率的阈值
|
||||
Integer openRateThreshold = analysisTask.getOpenRateThreshold(); |
||||
|
||||
//如果开源率大于阈值,则将当前文件设置成开源
|
||||
if (openRate.floatValue() > openRateThreshold) { |
||||
analysisFile.setOpenType(true); |
||||
} |
||||
|
||||
//保存当前文件的开源信息到mongo库中
|
||||
MatchOpenFileMongoDto matchOpenFileMongo = new MatchOpenFileMongoDto(); |
||||
matchOpenFileMongo.setId(IdGenerator.uuid32()) |
||||
.setFilePath(analysisFile.getFileUrl()) |
||||
.setFileName(analysisFile.getName()) |
||||
.setFeatureSimilarity(featureSimilarity.floatValue()) |
||||
.setOpenRate(openRate.floatValue()) |
||||
.setOpenType(analysisFile.getOpenType()) |
||||
.setMatchOpenFile(matchOpenFilesRes); |
||||
|
||||
mongoTemplate.save(matchOpenFileMongo); |
||||
} |
||||
|
||||
|
||||
/** |
||||
* 计算当前文件的特征相似度 和 开源率 |
||||
* |
||||
* @param matchOpenFiles 通过MD5 匹配到的所有开源文件 |
||||
* @param sourceFileBaseCoreName 当前文件特征文件的 solr coreName |
||||
* @param matchLineRowsNum 所有开源文件匹配到的开源行号列表 |
||||
* @param matchFeatureFunctionMd5s 所有开源文件匹配到的特征函数MD5 |
||||
* return 匹配的开源文件解析后的结果集 |
||||
*/ |
||||
private List<MatchOpenFile> calculateSimilarityAndOpenRate(SolrDocumentList matchOpenFiles, CodeFile fileAnalysisRes, String sourceFileBaseCoreName, Set<Integer> matchLineRowsNum, Set<String> matchFeatureFunctionMd5s) { |
||||
|
||||
//匹配的开源文件列表
|
||||
List<MatchOpenFile> matchOpenFilesRes = new ArrayList<>(); |
||||
|
||||
//按照函数的特征md5进行分组,getter ,setter等方法的 特征值会重复
|
||||
Map<String, List<Function>> featureMd5FunctionMap = fileAnalysisRes.getFunctionList().stream().collect(Collectors.groupingBy(Function::getMd5)); |
||||
|
||||
//首先根据文件的MD5查询开源文件的版本ID,和路径信息
|
||||
Set<String> openSourceFileMd5s = matchOpenFiles.stream().map(doc -> (String) doc.get("sourceMd5")).collect(Collectors.toSet()); |
||||
Map<String, SolrDocument> md5VersionInfoMap = solrUtils.batchQueryVersionIdFromSourceFileBaseBySourceMd5(sourceFileBaseCoreName, openSourceFileMd5s); |
||||
|
||||
//根据版本ID查询版本的详细信息
|
||||
//todo 这里 查询一个版本的信息 需要检索 两个 solr 库 而且还需要检索 versioinTree 后面需要优化
|
||||
Set<String> openSourceFileVersionIds = md5VersionInfoMap.values().stream().map(doc -> (String) doc.get("versionId")).collect(Collectors.toSet()); |
||||
List<VersionTree> versionTrees = solrUtils.queryBatchVersionInfoByVersionIds(openSourceFileVersionIds); |
||||
Map<String, VersionTree> versionIdVersionInfoMap = versionTrees.stream().collect(Collectors.toMap(VersionTree::getVersionId, java.util.function.Function.identity())); |
||||
|
||||
|
||||
//函数总行数
|
||||
BigDecimal totalFunctionLineCount = new BigDecimal(fileAnalysisRes.getFunctionList().stream().mapToInt(Function::getCodeRowNum).sum()); |
||||
|
||||
for (SolrDocument openSourceFile : matchOpenFiles) { |
||||
|
||||
//开源文件md5
|
||||
String openSourceFileMd5 = openSourceFile.getFieldValue("sourceMd5").toString(); |
||||
|
||||
//解析文件的函数特征值
|
||||
List<Function> openFileFunctionList = getOpenFileFunctionList(openSourceFile); |
||||
|
||||
//根据源文件的MD5确定需要查询源码库的序号
|
||||
String openSourceCodeCoreIndex = openSourceFileMd5.substring(0, 1) + SolrDBConst.CORE_NAME_SUFFIX_SOURCE_FILE_INFO; |
||||
|
||||
//获取开源文件的文本信息
|
||||
SolrDocument openSourceContent = solrUtils.queryOne(openSourceCodeCoreIndex, "sourceFileMd5:" + openSourceFileMd5, "sourceContent"); |
||||
|
||||
//当前文件匹配特征函数总行数
|
||||
int currentFileMatchFeatureLineCount = 0; |
||||
|
||||
//当前文件所匹配的特征函数MD5
|
||||
Set<String> currentFileMatchFeatureFunctionMd5 = new HashSet(); |
||||
|
||||
//遍历函数特征MD5
|
||||
for (String funFeatureMd5 : featureMd5FunctionMap.keySet()) { |
||||
List<Function> currentFueatureFunctionList = featureMd5FunctionMap.get(funFeatureMd5); |
||||
//源文件的特征函数列表
|
||||
for (Function openFunction : openFileFunctionList) { |
||||
if (funFeatureMd5.equals(openFunction.getMd5())) { |
||||
//每个特征函数 不能多次匹配,影响整体特征相似度
|
||||
//匹配成功后,相同的特征行 一并加上
|
||||
if (!currentFileMatchFeatureFunctionMd5.contains(funFeatureMd5)) { |
||||
currentFileMatchFeatureFunctionMd5.add(funFeatureMd5); |
||||
matchFeatureFunctionMd5s.add(funFeatureMd5); |
||||
currentFileMatchFeatureLineCount += currentFueatureFunctionList.stream().mapToInt(Function::getCodeRowNum).sum(); |
||||
} |
||||
} |
||||
} |
||||
} |
||||
|
||||
//当前文件的开源率
|
||||
Pair<Float, HashSet<Integer>> openRateAndSaveRowNum = SimilarityUtil.getOpenRateAndSaveRowNum(new String(fileAnalysisRes.getFileContent()), openSourceContent.getFieldValue("sourceContent").toString()); |
||||
//将当前文件匹配的行号,存储到缓存中,方便统计整体的开源率
|
||||
matchLineRowsNum.addAll(openRateAndSaveRowNum.getValue()); |
||||
|
||||
//统计当前文件的特征相似度
|
||||
BigDecimal featureSimilarity = new BigDecimal(currentFileMatchFeatureLineCount).divide(totalFunctionLineCount, 4, RoundingMode.HALF_UP).multiply(new BigDecimal(100)).setScale(2); |
||||
|
||||
SolrDocument openEntries = md5VersionInfoMap.get(openSourceFileMd5); |
||||
VersionTree versionInfo = versionIdVersionInfoMap.get(openEntries.get("versionId")); |
||||
//组装当前开源文件的开源项目信息
|
||||
MatchOpenFile matchOpenFileInfo = new MatchOpenFile(); |
||||
matchOpenFileInfo.setPId(versionInfo.getProId()) |
||||
.setPName(versionInfo.getProName()) |
||||
.setSourceUrl((String) openEntries.get("fullPath")) |
||||
.setFeatureSimilarity(featureSimilarity.floatValue()) |
||||
.setOpenRate(openRateAndSaveRowNum.getKey()) |
||||
.setVersion(versionInfo.getVersionName()) |
||||
.setLicenseType(versionInfo.getLicenseType()) |
||||
.setAnalyzeType(AnalysisLevelEnum.FUNCTION_LEVEL.getCode()); |
||||
matchOpenFilesRes.add(matchOpenFileInfo); |
||||
} |
||||
return matchOpenFilesRes; |
||||
} |
||||
|
||||
|
||||
/** |
||||
* 防止函数特征库不全,再次根据文件MD5查询开源文件信息, 做二次校验 |
||||
* |
||||
* @param originalFileMd5 |
||||
* @param versionIdCoreName |
||||
*/ |
||||
private void checkByOriginalFileMd5(String versionIdCoreName, String originalFileMd5) { |
||||
|
||||
//根据文件的MD5,查询特征库,看当前文件是否在开源代码库中
|
||||
SolrDocument versionIdAndPath = solrUtils.queryOne(versionIdCoreName, "sourceFileMd5:" + originalFileMd5, "versionId,fullPath,sourceFileMd5"); |
||||
|
||||
if (versionIdAndPath != null) { |
||||
//根据版本ID查询版本的详细信息
|
||||
VersionTree versionInfo = solrUtils.queryVersionInfoByVersionId((String) versionIdAndPath.get("versionId")); |
||||
if (versionInfo != null) { |
||||
//当前开源文件的开源项目信息
|
||||
MatchOpenFile matchOpenFileInfo = new MatchOpenFile(); |
||||
matchOpenFileInfo.setPId(versionInfo.getProId()) |
||||
.setPName(versionInfo.getProName()) |
||||
.setSourceUrl(versionInfo.getDownUrl()) |
||||
.setFeatureSimilarity(100.00f) |
||||
.setOpenRate(100.00f) |
||||
.setAnalyzeType(AnalysisLevelEnum.FUNCTION_LEVEL.getCode()); |
||||
|
||||
//保存当前文件的开源信息到mongo库中
|
||||
MatchOpenFileMongoDto matchOpenFileMongo = new MatchOpenFileMongoDto(); |
||||
matchOpenFileMongo.setId(IdGenerator.uuid32()) |
||||
.setFilePath(analysisFile.getFileUrl()) |
||||
.setFileName(analysisFile.getName()) |
||||
.setOpenRate(100.00f) |
||||
.setOpenType(analysisFile.getOpenType()) |
||||
.setMatchOpenFile(Arrays.asList(matchOpenFileInfo)); |
||||
|
||||
mongoTemplate.save(matchOpenFileMongo); |
||||
} |
||||
} |
||||
} |
||||
|
||||
|
||||
/** |
||||
* 获取当前文件的函数特征值 |
||||
* |
||||
* @param matchOpenFile |
||||
* @return |
||||
*/ |
||||
private List<Function> getOpenFileFunctionList(SolrDocument matchOpenFile) { |
||||
try { |
||||
//解析文件的函数特征值
|
||||
String lineFeatureMd5s = matchOpenFile.getFieldValue("fun_hay").toString(); |
||||
lineFeatureMd5s = lineFeatureMd5s.replace("\\", "") |
||||
.replace("\"{", "{") |
||||
.replace("}\"", "}"); |
||||
return JSONArray.parseArray(lineFeatureMd5s, Function.class); |
||||
}catch (Exception e){ |
||||
log.error("解析文件特征值失败",e); |
||||
} |
||||
return new ArrayList<Function>(); |
||||
} |
||||
|
||||
/** |
||||
* 将特征值插入到mongo库中 |
||||
* |
||||
* @param features 特征集合 |
||||
* @param lineDataMongoDto 当前分析任务 ,特征信息存储 |
||||
* @param |
||||
*/ |
||||
@Deprecated |
||||
private void insertFeatureValue(List<LineModel> features, LineDataMongoDto lineDataMongoDto) { |
||||
List<LineModel> batchInsertList = new ArrayList<>(); |
||||
if (CollectionUtil.isNotEmpty(features)) { |
||||
//这里的批量插入逻辑可以进行校验
|
||||
//每10条存一次,解析的数据量如果过大,可能会超过MongoDB数据限制
|
||||
int batchInsertStpe = 10; |
||||
int total = 0; |
||||
for (int i = 0; i < features.size(); i++) { |
||||
LineModel lineModel = features.get(i); |
||||
if (total != batchInsertStpe) { |
||||
batchInsertList.add(lineModel); |
||||
total++; |
||||
} |
||||
if (i == features.size() - 1 && total != batchInsertStpe) { |
||||
total = 0; |
||||
lineDataMongoDto.setId(IdGenerator.uuid32()) |
||||
.setLineModels(batchInsertList); |
||||
mongoTemplate.insert(lineDataMongoDto); |
||||
} |
||||
if (total == batchInsertStpe) { |
||||
total = 0; |
||||
lineDataMongoDto.setId(IdGenerator.uuid32()) |
||||
.setLineModels(batchInsertList); |
||||
mongoTemplate.insert(lineDataMongoDto); |
||||
batchInsertList.clear(); |
||||
} |
||||
} |
||||
} else { |
||||
lineDataMongoDto.setId(IdGenerator.uuid32()); |
||||
mongoTemplate.insert(lineDataMongoDto); |
||||
} |
||||
} |
||||
|
||||
} |
@ -0,0 +1,10 @@ |
||||
package com.keyware.composeanalysis.task; |
||||
|
||||
/** |
||||
* @author liuzongren |
||||
* @date 2024/7/31 |
||||
* @description 分析任务抽象接口 |
||||
*/ |
||||
public abstract class IAnalysisTask implements Runnable{ |
||||
|
||||
} |
@ -0,0 +1,298 @@ |
||||
package com.keyware.composeanalysis.task; |
||||
|
||||
|
||||
import cn.hutool.core.collection.CollectionUtil; |
||||
import com.keyware.composeanalysis.constant.FixedValue; |
||||
import com.keyware.composeanalysis.constant.FunctionAndAnalysisAssemblyConst; |
||||
import com.keyware.composeanalysis.constant.RedisConst; |
||||
import com.keyware.composeanalysis.constant.SolrDBConst; |
||||
import com.keyware.composeanalysis.constant.enums.AnalysisLevelEnum; |
||||
import com.keyware.composeanalysis.constant.enums.AnalysisStatusEnum; |
||||
import com.keyware.composeanalysis.constant.enums.FileAnalysisStatusEnum; |
||||
import com.keyware.composeanalysis.entity.AnalysisTask; |
||||
import com.keyware.composeanalysis.mongo.FileDataMongoDto; |
||||
import com.keyware.composeanalysis.mongo.LineDataMongoDto; |
||||
import com.keyware.composeanalysis.mongo.MatchOpenFile; |
||||
import com.keyware.composeanalysis.mongo.MatchOpenFileMongoDto; |
||||
import com.keyware.composeanalysis.solr.VersionTree; |
||||
import com.keyware.composeanalysis.util.AnalysisLogUtil; |
||||
import com.keyware.composeanalysis.util.RedisUtil; |
||||
import com.keyware.composeanalysis.util.SolrUtils; |
||||
import com.keyware.composeanalysis.util.SpringContextUtils; |
||||
import com.keyware.keyswan.anaysis.Analysis; |
||||
import com.keyware.keyswan.anaysis.AnalysisFactory; |
||||
import com.keyware.keyswan.common.CodeFile; |
||||
import com.keyware.utils.IdGenerator; |
||||
import lombok.extern.log4j.Log4j2; |
||||
import org.apache.commons.lang3.StringUtils; |
||||
import org.apache.solr.common.SolrDocument; |
||||
import org.apache.solr.common.SolrDocumentList; |
||||
import org.springframework.data.mongodb.core.MongoTemplate; |
||||
import org.springframework.data.mongodb.core.query.Update; |
||||
|
||||
import java.math.BigDecimal; |
||||
import java.math.RoundingMode; |
||||
import java.util.*; |
||||
import java.util.concurrent.CountDownLatch; |
||||
|
||||
import static org.springframework.data.mongodb.core.query.Criteria.where; |
||||
|
||||
/** |
||||
* @author liuzongren |
||||
* @ClassName LineAnalysisTask |
||||
* @description: 行级别 特征提取定时任务 |
||||
* @datetime 2024年 07月 25日 16:19 |
||||
* @version: 1.0 |
||||
*/ |
||||
|
||||
@Log4j2 |
||||
public class LineAnalysisTask extends IAnalysisTask { |
||||
|
||||
private MongoTemplate mongoTemplate; |
||||
private AnalysisTask analysisTask; |
||||
//被测件的文件信息
|
||||
private FileDataMongoDto analysisFile; |
||||
|
||||
private SolrUtils solrUtils; |
||||
|
||||
private RedisUtil redisUtil; |
||||
|
||||
private CountDownLatch countDownLatch; |
||||
|
||||
public LineAnalysisTask(AnalysisTask analysisTask, FileDataMongoDto analysisFile, MongoTemplate mongoTemplate, CountDownLatch countDownLatch) { |
||||
this.mongoTemplate = mongoTemplate; |
||||
this.analysisTask = analysisTask; |
||||
this.analysisFile = analysisFile; |
||||
this.countDownLatch = countDownLatch; |
||||
this.solrUtils = SpringContextUtils.getBean(SolrUtils.class); |
||||
this.redisUtil = SpringContextUtils.getBean(RedisUtil.class); |
||||
} |
||||
|
||||
/** |
||||
* 行级别 源代码溯源 |
||||
* 当前任务 需要在 文件级分析完成后 进行 |
||||
*/ |
||||
|
||||
@Override |
||||
public void run() { |
||||
//执行任务前,判断一下任务执行的状态
|
||||
Object status = redisUtil.get(String.format(RedisConst.TASK_RUNNING_STATUS_KEY_PREFIX, analysisTask.getId())); |
||||
if (status != null && (status.equals(AnalysisStatusEnum.STOP_ANALYSIS.getCode()) || status.equals(AnalysisStatusEnum.PAUSE_ANALYSIS.getCode()))) { |
||||
log.info("任务已取消,fileName:{}", analysisFile.getName()); |
||||
countDownLatch.countDown(); |
||||
return; |
||||
} |
||||
|
||||
//获取文件地址
|
||||
String filePath = analysisFile.getFileUrl(); |
||||
//获取文件名称
|
||||
String fileName = analysisFile.getName(); |
||||
|
||||
AnalysisLogUtil.insert(mongoTemplate, "【行级特征提取】正在提取" + fileName); |
||||
try { |
||||
LineDataMongoDto lineDataMongoDto = new LineDataMongoDto(); |
||||
lineDataMongoDto.setFileId(analysisFile.getId()) |
||||
.setStatus(0) |
||||
.setIsSelect(false); |
||||
Analysis analysis = AnalysisFactory.getAnalysis(filePath); |
||||
CodeFile codeFile = null; |
||||
|
||||
//获取文件行级特征md5
|
||||
codeFile = analysis.analysisFile(filePath, FunctionAndAnalysisAssemblyConst.LINE_EXTRACT, FunctionAndAnalysisAssemblyConst.LINE_EXTRACT); |
||||
//每一行原内容MD5值集合
|
||||
// String cutFileLineMd5 = codeFile.getCutFileLineMd5();
|
||||
//每一行特征内容MD5值集合
|
||||
String traitFileLineMd5 = codeFile.getTraitFileLineMd5(); |
||||
|
||||
String[] featureMd5Arr = {}; |
||||
if (StringUtils.isNotBlank(traitFileLineMd5)) { |
||||
featureMd5Arr = traitFileLineMd5.split(","); |
||||
} |
||||
List<String> lineFeatures = Arrays.asList(featureMd5Arr); |
||||
|
||||
//从solr中获取特征相似的 文件
|
||||
SolrDocumentList featureSimilarityFromSolr = getFeatureSimilarityFromSolr(lineFeatures); |
||||
|
||||
//计算文件的开源率
|
||||
calculateOpenRate(featureSimilarityFromSolr, lineFeatures); |
||||
|
||||
//更新文件表的分析状态为3 行级特征以分析完毕
|
||||
analysisFile.setFileAnalysisStatus(FileAnalysisStatusEnum.ANALYSIS_DONE.getCode()); |
||||
mongoTemplate.update(FileDataMongoDto.class) |
||||
.matching(where("_id").is(analysisFile.getId())) |
||||
.replaceWith(analysisFile) |
||||
.findAndReplace(); |
||||
|
||||
AnalysisLogUtil.insert(mongoTemplate, "【行级特征提取】提取完成" + fileName); |
||||
log.info("文件" + fileName + ":行级分析完成"); |
||||
} catch (Exception e) { |
||||
AnalysisLogUtil.insertErrorInfo(mongoTemplate, "【行级特征提取】提取失败" + fileName, e); |
||||
log.error("文件:" + fileName + "行级别特征提取失败!", e); |
||||
//修改当前文件分析状态未失败
|
||||
mongoTemplate.update(FileDataMongoDto.class) |
||||
.matching(where("_id").is(analysisFile.getId())) |
||||
.apply(new Update().set("fileAnalysisStatus", FileAnalysisStatusEnum.FAILED_ANALYSIS.getCode())) |
||||
.first(); |
||||
} finally { |
||||
countDownLatch.countDown(); |
||||
} |
||||
} |
||||
|
||||
|
||||
/** |
||||
* 计算开源率 被测件的开源率 |
||||
* |
||||
* @param matcheOpenSourceFiles |
||||
* @param lineFeatures |
||||
*/ |
||||
private void calculateOpenRate(SolrDocumentList matcheOpenSourceFiles, List<String> lineFeatures) { |
||||
|
||||
if (CollectionUtil.isEmpty(matcheOpenSourceFiles)) { |
||||
return; |
||||
} |
||||
|
||||
//根据文件后缀判断需要查询的文件版本库名称
|
||||
String versionIdCoreName = FixedValue.SUFFIX_SOLR_VERSION.get(analysisFile.getSuffix()); |
||||
|
||||
|
||||
//定义结果集对象
|
||||
MatchOpenFileMongoDto matchOpenFileMongo = new MatchOpenFileMongoDto(); |
||||
matchOpenFileMongo.setId(IdGenerator.uuid32()) |
||||
.setFilePath(analysisFile.getFileUrl()) |
||||
.setFileName(analysisFile.getName()); |
||||
|
||||
//开源文件信息保存结果集
|
||||
List<MatchOpenFile> matchOpenFileInfoList = new ArrayList<>(); |
||||
|
||||
//保存所有匹配的行数信息,方便统计总的匹配行数
|
||||
Set<String> matchingLineSet = new HashSet<>(); |
||||
|
||||
//获取文件总行数
|
||||
BigDecimal totalCodeRowNum = new BigDecimal(analysisFile.getCodeRowNum()); |
||||
|
||||
//统计每个开源文件和被测件的匹配行数
|
||||
for (SolrDocument matchFile : matcheOpenSourceFiles) { |
||||
//解析文件的代码块特征值
|
||||
String lineFeatureMd5s = (String) matchFile.get("tz_line_hay"); |
||||
List<String> matchedLineFeatures = Arrays.asList(lineFeatureMd5s.split(",")); |
||||
|
||||
//匹配的总行数
|
||||
int currentFileMatchLineCount = 0; |
||||
|
||||
//遍历当前文件的代码块特征,统计匹配的总行数
|
||||
for (String originalLineFeatureMd5 : lineFeatures) { |
||||
for (String matchLineFeatureMd5 : matchedLineFeatures) { |
||||
if (originalLineFeatureMd5.equals(matchLineFeatureMd5)) { |
||||
currentFileMatchLineCount++; |
||||
matchingLineSet.add(originalLineFeatureMd5); |
||||
} |
||||
} |
||||
} |
||||
|
||||
//首先根据文件的MD5查询开源文件的版本ID,和路径信息
|
||||
SolrDocument versionIdAndPath = solrUtils.queryOne(versionIdCoreName, "sourceFileMd5:" + matchFile.get("sourceMd5"), "versionId,fullPath,sourceFileMd5"); |
||||
|
||||
//根据版本ID查询版本的详细信息
|
||||
//todo 这里 查询一个版本的信息 需要检索 两个 solr 库 而且还需要检索 versioinTree 后面需要优化
|
||||
VersionTree versionInfo = solrUtils.queryVersionInfoByVersionId((String) versionIdAndPath.get("versionId")); |
||||
|
||||
//计算与当前开源文件的开源率
|
||||
BigDecimal openRate = new BigDecimal(currentFileMatchLineCount).divide(totalCodeRowNum, 4, RoundingMode.HALF_UP).multiply(new BigDecimal(100)); |
||||
|
||||
//当前开源文件的开源项目信息
|
||||
MatchOpenFile matchOpenFileInfo = new MatchOpenFile(); |
||||
matchOpenFileInfo.setPId(versionInfo.getProId()) |
||||
.setPName(versionInfo.getProName()) |
||||
.setSourceUrl(versionInfo.getDownUrl()) |
||||
.setOpenRate(openRate.floatValue()) |
||||
.setVersion(versionInfo.getVersionName()) |
||||
.setLicenseType(versionInfo.getLicenseType()) |
||||
.setAnalyzeType(AnalysisLevelEnum.LINE_LEVEL.getCode()); |
||||
matchOpenFileInfoList.add(matchOpenFileInfo); |
||||
} |
||||
|
||||
//统计当前文件的整体开源率
|
||||
BigDecimal openRate = new BigDecimal(matchingLineSet.size()).divide(totalCodeRowNum, 4, RoundingMode.HALF_UP).multiply(new BigDecimal(100)); |
||||
|
||||
//获取开源率的阈值
|
||||
Integer openRateThreshold = analysisTask.getOpenRateThreshold(); |
||||
|
||||
|
||||
//如果开源率大于阈值,则将当前文件设置成开源
|
||||
if (openRate.compareTo(new BigDecimal(openRateThreshold)) >= 0) { |
||||
analysisFile.setOpenType(true); |
||||
} |
||||
|
||||
//保存当前文件的开源信息
|
||||
matchOpenFileMongo.setOpenType(analysisFile.getOpenType()) |
||||
.setMatchOpenFile(matchOpenFileInfoList); |
||||
mongoTemplate.save(matchOpenFileMongo); |
||||
|
||||
} |
||||
|
||||
|
||||
/** |
||||
* 将特征值插入到mongo库中 |
||||
* |
||||
* @param features 特征集合 |
||||
* @param lineDataMongoDto 当前分析任务 ,特征信息存储 |
||||
* todo 后期 看看有没有插入的必要 |
||||
* @param |
||||
*/ |
||||
@Deprecated |
||||
private void insertFeatureValue(String features, LineDataMongoDto lineDataMongoDto) { |
||||
String[] featureMd5Arr = {}; |
||||
if (StringUtils.isNotBlank(features)) { |
||||
featureMd5Arr = features.split(","); |
||||
} |
||||
List<String> lineFeatures = Arrays.asList(featureMd5Arr); |
||||
List<String> batchInsertList = new ArrayList<>(); |
||||
if (CollectionUtil.isNotEmpty(lineFeatures)) { |
||||
//这里的批量插入逻辑可以进行校验
|
||||
//每10条存一次,解析的数据量如果过大,可能会超过MongoDB数据限制
|
||||
int batchInsertStpe = 5000; |
||||
int total = 0; |
||||
for (int i = 0; i < lineFeatures.size(); i++) { |
||||
if (total != batchInsertStpe) { |
||||
batchInsertList.add(lineFeatures.get(i)); |
||||
total++; |
||||
} |
||||
if (i == lineFeatures.size() - 1 && total != batchInsertStpe) { |
||||
total = 0; |
||||
lineDataMongoDto.setId(IdGenerator.uuid32()) |
||||
.setLineFeatueMd5s(batchInsertList); |
||||
mongoTemplate.insert(lineDataMongoDto); |
||||
} |
||||
if (total == batchInsertStpe) { |
||||
total = 0; |
||||
lineDataMongoDto.setId(IdGenerator.uuid32()) |
||||
.setLineFeatueMd5s(batchInsertList); |
||||
mongoTemplate.insert(lineDataMongoDto); |
||||
batchInsertList.clear(); |
||||
} |
||||
} |
||||
} else { |
||||
lineDataMongoDto.setId(IdGenerator.uuid32()); |
||||
mongoTemplate.insert(lineDataMongoDto); |
||||
} |
||||
} |
||||
|
||||
|
||||
/** |
||||
* 根据 特征值 从特征库中检索 具有特征相似的 |
||||
* |
||||
* @param lineFeatureList 行特征信息 |
||||
* @return |
||||
*/ |
||||
private SolrDocumentList getFeatureSimilarityFromSolr(List<String> lineFeatureList) { |
||||
String solrCoreName = SolrDBConst.CORE_NAME_SOURCE_FILE_INFO_TEMP; |
||||
//拼接行特征查询条件
|
||||
String queryStr = "tz_line_hay:(" + StringUtils.join(lineFeatureList, " OR ") + ")"; |
||||
log.info("查询条件: solrCoreName:{},queryStr:{}", solrCoreName, queryStr); |
||||
SolrDocumentList result = solrUtils.query(solrCoreName, queryStr, "sourceMd5,tz_line_hay"); |
||||
log.info("查询结果: result:{}", result); |
||||
return result; |
||||
} |
||||
|
||||
|
||||
} |
@ -0,0 +1,378 @@ |
||||
package com.keyware.composeanalysis.task; |
||||
|
||||
import cn.hutool.core.collection.CollectionUtil; |
||||
import com.google.common.collect.Sets; |
||||
import com.keyware.composeanalysis.constant.FixedValue; |
||||
import com.keyware.composeanalysis.constant.MongoDBConst; |
||||
import com.keyware.composeanalysis.constant.enums.AnalysisLevelEnum; |
||||
import com.keyware.composeanalysis.constant.enums.AnalysisStatusEnum; |
||||
import com.keyware.composeanalysis.constant.enums.FileAnalysisStatusEnum; |
||||
import com.keyware.composeanalysis.entity.AnalysisTask; |
||||
import com.keyware.composeanalysis.mongo.*; |
||||
import com.keyware.composeanalysis.service.impl.AnalysisTaskServiceImpl; |
||||
import com.keyware.composeanalysis.solr.VersionTree; |
||||
import com.keyware.composeanalysis.solr.VersionTreeNode; |
||||
import com.keyware.composeanalysis.util.AnalysisLogUtil; |
||||
import com.keyware.composeanalysis.util.SolrUtils; |
||||
import com.keyware.composeanalysis.util.SpringContextUtils; |
||||
import com.keyware.utils.IdGenerator; |
||||
import com.mongodb.client.MongoClient; |
||||
import lombok.extern.log4j.Log4j2; |
||||
import org.apache.commons.collections.CollectionUtils; |
||||
import org.apache.commons.lang3.StringUtils; |
||||
import org.apache.solr.common.SolrDocument; |
||||
import org.springframework.core.task.TaskExecutor; |
||||
import org.springframework.data.mongodb.core.MongoTemplate; |
||||
import org.springframework.data.mongodb.core.query.Query; |
||||
import org.springframework.data.mongodb.core.query.Update; |
||||
|
||||
import java.math.BigDecimal; |
||||
import java.math.RoundingMode; |
||||
import java.util.*; |
||||
import java.util.function.Function; |
||||
import java.util.stream.Collectors; |
||||
|
||||
|
||||
import static org.springframework.data.mongodb.core.query.Criteria.where; |
||||
|
||||
/** |
||||
* @author liuzongren |
||||
* @date 2024/7/26 |
||||
* desc 项目级溯源分析任务,先将所有文件进行项目级匹配,匹配不中的文件在进行细致级别的匹配 |
||||
* 项目级匹配前,需要完成文件解压工作 |
||||
*/ |
||||
@Log4j2 |
||||
public class PorjectAnalysisTask { |
||||
private MongoTemplate mongoTemplate; |
||||
private MongoTemplate keyswanDBTemplate; |
||||
private AnalysisTask analysisTask; |
||||
private AnalysisTaskServiceImpl analysisService; |
||||
private SolrUtils solrUtils; |
||||
private TaskExecutor taskExecutor; |
||||
|
||||
/** |
||||
* 项目级分析 |
||||
* |
||||
* @param mongoClient |
||||
* @param analysisTask |
||||
* @param solrUtils |
||||
* @param analysisService |
||||
*/ |
||||
public PorjectAnalysisTask(MongoClient mongoClient, AnalysisTask analysisTask, SolrUtils solrUtils, AnalysisTaskServiceImpl analysisService) { |
||||
this.analysisService = analysisService; |
||||
keyswanDBTemplate = new MongoTemplate(mongoClient, MongoDBConst.DB_NAME_KEYSWAN); |
||||
this.mongoTemplate = new MongoTemplate(mongoClient, MongoDBConst.DB_NAME_PREFIX + analysisTask.getId()); |
||||
this.analysisTask = analysisTask; |
||||
this.solrUtils = solrUtils; |
||||
this.taskExecutor = SpringContextUtils.getBean(TaskExecutor.class); |
||||
} |
||||
|
||||
|
||||
public void doAnalysis() { |
||||
try { |
||||
long startTime = System.currentTimeMillis(); |
||||
//首先从versionbasedata库中匹配当前被测件
|
||||
Boolean matchedPrject = matchByProjectMd5(); |
||||
|
||||
//从versionbase 中整体匹配不到项目信息, 拿项目的所有文件去匹配 solr库的versionTree去检索
|
||||
if (!matchedPrject) { |
||||
List<FileDataMongoDto> unMatchedFiles = matchByAllFilesMd5(); |
||||
|
||||
//剩余没有匹配文件,用文件的md5去匹配solr库的versionTree
|
||||
if (CollectionUtils.isNotEmpty(unMatchedFiles)) { |
||||
matchByFileMd5s(unMatchedFiles); |
||||
} |
||||
} |
||||
//todo 如果整体耗时较长,將matchOpenFileInfo存储到数据库的逻辑修改成异步的
|
||||
log.info("项目级分析完成,用时:" + (System.currentTimeMillis() - startTime) / 1000 + "s"); |
||||
} catch (Exception e) { |
||||
AnalysisLogUtil.insert(mongoTemplate, "成分分析失败:" + e.getStackTrace()); |
||||
log.error("项目级分析失败,项目名称:" + analysisTask.getFileName(), e); |
||||
analysisTask.setAnalysisStatus(AnalysisStatusEnum.FAIL_ANALYSIS.getCode()); |
||||
analysisService.updateById(analysisTask); |
||||
} |
||||
} |
||||
|
||||
//项目整体匹配, 查看整个项目是否开源
|
||||
private Boolean matchByProjectMd5() { |
||||
//判断当前项目整体是否开源,去版本库中匹配
|
||||
Query versionBaseQuery = new Query(where("MD5").is(analysisTask.getMd5())); |
||||
VersionbasedataMongoDto openSourceProject = keyswanDBTemplate.findOne(versionBaseQuery, VersionbasedataMongoDto.class); |
||||
//如果匹配中了开源项目,则将状态设置为开源,并将结果存储到数据库中
|
||||
if (openSourceProject != null) { |
||||
analysisTask.setOpenType(true); |
||||
//将匹配中的开源项目信息存入当前文件开源信息中
|
||||
ProjectAssemblyMongoDto projectAssembly = new ProjectAssemblyMongoDto(); |
||||
projectAssembly.setId(IdGenerator.uuid32()) |
||||
.setFileCount(analysisTask.getFileCount()) |
||||
.setMatchFileCount(analysisTask.getFileCount()) |
||||
.setProjectId(openSourceProject.getProjectId()) |
||||
.setVersionId(openSourceProject.getVersionId()) |
||||
.setVersionName(openSourceProject.getVersionName()) |
||||
.setSemblance(100.00d) |
||||
.setOpenSourceUrl(openSourceProject.getDownloadUrl()); |
||||
//根据版本信息去查询项目名称和许可证信息
|
||||
Query projectBaseQuery = new Query(where("ID").is(openSourceProject.getProjectId())); |
||||
ProjectBaseDataMongoDto projectbasedata = keyswanDBTemplate.findOne(projectBaseQuery, ProjectBaseDataMongoDto.class); |
||||
if (projectbasedata != null) { |
||||
projectAssembly.setProjectName(projectbasedata.getName()); |
||||
if (StringUtils.isNotEmpty(projectbasedata.getLicenseType())) { |
||||
projectAssembly.setLicenseType(Arrays.asList(projectbasedata.getLicenseType())); |
||||
} |
||||
} |
||||
//当前文件开源信息存入数据库中
|
||||
mongoTemplate.insert(projectAssembly); |
||||
|
||||
analysisService.updateById(analysisTask); |
||||
|
||||
//更新文件分析的状态
|
||||
mongoTemplate.update(FileDataMongoDto.class) |
||||
.matching(where("isDirectory").is(false)) |
||||
.apply(new Update().set("openType", true) |
||||
.set("openRate", 100.00d) |
||||
.set("fileAnalysisStatus", FileAnalysisStatusEnum.ANALYSIS_DONE.getCode())) |
||||
.all(); |
||||
|
||||
//保存具体开源文件信息
|
||||
VersionTree openProjectList = solrUtils.queryVersionTreeByVersionId(openSourceProject.getVersionId()); |
||||
Query fileQuery = new Query(where("isDirectory").is(false)); |
||||
List<FileDataMongoDto> fileDataMongoDtos = mongoTemplate.find(fileQuery, FileDataMongoDto.class); |
||||
saveProjectOpenInfo(openProjectList, fileDataMongoDtos); |
||||
return true; |
||||
} |
||||
return false; |
||||
} |
||||
|
||||
//通过文件的md5 去匹配开源项目
|
||||
private List<FileDataMongoDto> matchByAllFilesMd5() { |
||||
//检索当前项目的所有文件
|
||||
Query query = new Query(where("isDirectory").is(false)); |
||||
List<FileDataMongoDto> projectFiles = mongoTemplate.query(FileDataMongoDto.class).matching(query).all(); |
||||
|
||||
//分多次拿所有文件匹配solr库
|
||||
Set<String> matchedFileMd5Set = multipleMatchByAllFilesMd5(projectFiles); |
||||
|
||||
//统计未匹配的文件
|
||||
List<FileDataMongoDto> unMatchedFiles = projectFiles.stream().filter(file -> !matchedFileMd5Set.contains(file.getMd5())).collect(Collectors.toList()); |
||||
return unMatchedFiles; |
||||
} |
||||
|
||||
//通过文件的md5去特征库匹配
|
||||
private void matchByFileMd5s(List<FileDataMongoDto> unMatchedFiles) { |
||||
|
||||
//将文件按照后缀分组,方便查询solr库
|
||||
Map<String, List<FileDataMongoDto>> allSuffixFiles = unMatchedFiles.stream().filter(file -> StringUtils.isNotEmpty(file.getSuffix())).collect(Collectors.groupingBy(FileDataMongoDto::getSuffix)); |
||||
|
||||
//统计语言的文件
|
||||
List<FileDataMongoDto> otherLanguageFiles = new ArrayList<>(); |
||||
|
||||
//遍历主流32语言
|
||||
allSuffixFiles.forEach((suffix, data) -> { |
||||
//根据文件后缀名获取特征库名称
|
||||
if (FixedValue.SUFFIX_SOLR_VERSION.containsKey(suffix)) { |
||||
String currentCoreName = FixedValue.SUFFIX_SOLR_VERSION.get(suffix); |
||||
//通过md5去*_SourceFileBase中匹配版本Id
|
||||
Set<String> fileMd5s = data.stream().map(FileDataMongoDto::getMd5).collect(Collectors.toSet()); |
||||
Map<String, SolrDocument> md5VersionObjMap = solrUtils.batchQueryVersionIdFromSourceFileBaseBySourceMd5(currentCoreName, fileMd5s); |
||||
if (CollectionUtil.isEmpty(md5VersionObjMap)) { |
||||
return; |
||||
} |
||||
//保存结果数据
|
||||
saveMatchOpenFileInfo(md5VersionObjMap, data); |
||||
} else { |
||||
//非主流语言的,没有单独的特征库,统一到默认的特征库进行检索
|
||||
otherLanguageFiles.addAll(data); |
||||
} |
||||
}); |
||||
|
||||
//将无后缀的文件 归纳于 处理非32种语言的文件
|
||||
List<FileDataMongoDto> noSuffixFiles = unMatchedFiles.stream().parallel().filter(file -> StringUtils.isEmpty(file.getSuffix())).collect(Collectors.toList()); |
||||
otherLanguageFiles.addAll(noSuffixFiles); |
||||
|
||||
if (CollectionUtils.isNotEmpty(otherLanguageFiles)) { |
||||
//非32种语言的会分2种MD5
|
||||
//暂时忽略字符流md5的匹配,因为大部分都是一样的
|
||||
Set<String> fileMd5s = otherLanguageFiles.stream().map(FileDataMongoDto::getMd5).collect(Collectors.toSet()); |
||||
Map<String, SolrDocument> md5VersionIdMap = solrUtils.batchQueryVersionIdFromSourceFileBaseBySourceMd5(MongoDBConst.TABLE_NAME_SOURCE_FILE_BASE, fileMd5s); |
||||
if (md5VersionIdMap == null || md5VersionIdMap.isEmpty()) { |
||||
//如果没有匹配到,直接更新文件分析状态已完成,因为非32种语言的文件,无法进行解析,通过源文件的MD5匹配不到,就匹配不到了,无需进行下一步的匹配
|
||||
updateFileAnalysisStatus(fileMd5s); |
||||
return; |
||||
} |
||||
saveMatchOpenFileInfo(md5VersionIdMap, otherLanguageFiles); |
||||
//直接更改没有匹配的文件分析状态
|
||||
updateFileAnalysisStatus(Sets.difference(fileMd5s, md5VersionIdMap.keySet())); |
||||
} |
||||
|
||||
} |
||||
|
||||
|
||||
//匹配到开源文件后,补充详细信息然后保存到mongo中
|
||||
private void saveMatchOpenFileInfo(Map<String, SolrDocument> md5VersionIdMap, List<FileDataMongoDto> originalFiles) { |
||||
List<MatchOpenFileMongoDto> batchInsertCache = new ArrayList<>(); |
||||
//根据版本id查询版本的详细信息
|
||||
//todo 这段逻辑如果耗时的话,可以异步处理 补充文件的版本信息
|
||||
Set<String> versionIds = md5VersionIdMap.values().stream().map(doc->(String)doc.get("versionId")).collect(Collectors.toSet()); |
||||
List<VersionTree> versionInfos = solrUtils.queryBatchVersionInfoByVersionIds(versionIds); |
||||
Map<String, VersionTree> versionTreeMap = versionInfos.stream().collect(Collectors.toMap(VersionTree::getVersionId, Function.identity(), (key1, key2) -> key1)); |
||||
Map<String, FileDataMongoDto> fileMd5ObjMap = originalFiles.stream().collect(Collectors.toMap(FileDataMongoDto::getMd5, Function.identity(), (key1, key2) -> key1)); |
||||
md5VersionIdMap.forEach((fileMd5, versionObj) -> { |
||||
String versionId = (String) versionObj.get("versionId"); |
||||
VersionTree versionInfo = versionTreeMap.get(versionId); |
||||
if (versionInfo == null){ |
||||
log.error("根据versionId,未在versionTree中找到版本信息,fileMd5:{},versionId:{}",fileMd5, versionId); |
||||
return; |
||||
} |
||||
FileDataMongoDto fileDataMongoDto = fileMd5ObjMap.get(fileMd5); |
||||
MatchOpenFileMongoDto matchOpenFile = getMatchOpenFile(versionInfo, fileDataMongoDto, versionObj.get("fullPath").toString()); |
||||
batchInsertCache.add(matchOpenFile); |
||||
}); |
||||
|
||||
if (CollectionUtils.isNotEmpty(batchInsertCache)) { |
||||
mongoTemplate.insert(batchInsertCache, MatchOpenFileMongoDto.class); |
||||
//更新文件分析的状态
|
||||
updateFileAnalysisStatus(md5VersionIdMap.keySet()); |
||||
} |
||||
} |
||||
|
||||
|
||||
//匹配到开源项目后,保存各个文件的开源信息
|
||||
private void saveProjectOpenInfo(VersionTree versionInfo, List<FileDataMongoDto> originalFiles) { |
||||
Map<String, FileDataMongoDto> originalMd5ObjMap = originalFiles.stream().collect(Collectors.toMap(FileDataMongoDto::getMd5, Function.identity(), (key1, key2) -> key1)); |
||||
Set<String> matchedMd5s = new HashSet<>(); |
||||
List<MatchOpenFileMongoDto> batchInsertCache = new ArrayList<>(); |
||||
List<VersionTreeNode> fileInfos = versionInfo.getDirTree(); |
||||
|
||||
fileInfos.forEach(versionTreeNodeObj->{ |
||||
String openFileMd5 = versionTreeNodeObj.getSourceFileMd5(); |
||||
//看是否和被测件的md5匹配
|
||||
if (originalMd5ObjMap.keySet().contains(openFileMd5)) { |
||||
//匹配的文件只保存一次
|
||||
if (!matchedMd5s.contains(openFileMd5)) { |
||||
MatchOpenFileMongoDto matchOpenFile = getMatchOpenFile(versionInfo, originalMd5ObjMap.get(openFileMd5),versionTreeNodeObj.getFullPath()); |
||||
batchInsertCache.add(matchOpenFile); |
||||
matchedMd5s.add(openFileMd5); |
||||
} |
||||
} |
||||
//分批保存,防止单个项目太大,撑爆内存 或 超过 mongodb最大插入数
|
||||
if (batchInsertCache.size() >= 1000) { |
||||
mongoTemplate.insert(batchInsertCache, MatchOpenFileMongoDto.class); |
||||
batchInsertCache.clear(); |
||||
} |
||||
}); |
||||
|
||||
if (batchInsertCache.size() != 0) { |
||||
mongoTemplate.insert(batchInsertCache, MatchOpenFileMongoDto.class); |
||||
} |
||||
|
||||
//更新文件分析的状态
|
||||
updateFileAnalysisStatus(matchedMd5s); |
||||
} |
||||
|
||||
//获取匹配到的开源文件信息
|
||||
private MatchOpenFileMongoDto getMatchOpenFile(VersionTree versionInfo, FileDataMongoDto originalFile,String openFilePath) { |
||||
//设置匹配文件的信息
|
||||
MatchOpenFile matchOpenFile = new MatchOpenFile(); |
||||
matchOpenFile.setId(IdGenerator.uuid32()) |
||||
.setVersionId(versionInfo.getVersionId()) |
||||
.setSourceFilePath(openFilePath) |
||||
.setSourceUrl(versionInfo.getDownUrl()) |
||||
.setPId(versionInfo.getProId()) |
||||
.setPName(versionInfo.getProName()) |
||||
.setLicenseType(versionInfo.getLicenseType()) |
||||
.setAnalyzeType(AnalysisLevelEnum.FILE_LEVEL.getCode()) |
||||
.setVersion(versionInfo.getVersionName()) |
||||
.setFeatureSimilarity(100.00f) |
||||
.setOpenRate(100.00f); |
||||
|
||||
//创建当前文件与开源代码的匹配信息
|
||||
MatchOpenFileMongoDto matchOpenFileInfo = new MatchOpenFileMongoDto(); |
||||
matchOpenFileInfo.setId(IdGenerator.uuid32()) |
||||
.setFileName(originalFile.getName()) |
||||
.setFilePath(originalFile.getFileUrl()) |
||||
.setOpenType(originalFile.getOpenType()) |
||||
.setFeatureSimilarity(100.00f) |
||||
.setOpenRate(100.00f) |
||||
.setMatchOpenFile(Arrays.asList(matchOpenFile)); |
||||
return matchOpenFileInfo; |
||||
} |
||||
|
||||
//匹配拿所有文件的md5去versionTree中,需要分多次匹配,单次匹配多个结果集的话,会导致solr响应长时间阻塞
|
||||
//多次匹配,每次匹配上一次未匹配种的文件
|
||||
//todo 这里需要设置一个阈值,一共匹配多少次,或者当相似度达到多少的时候,停止整体匹配
|
||||
//目前默认查询三次
|
||||
private Set<String> multipleMatchByAllFilesMd5(List<FileDataMongoDto> projectFiles) { |
||||
|
||||
//获取被测件所有文件的md5
|
||||
Set<String> projectFilesMd5 = projectFiles.stream().map(FileDataMongoDto::getMd5).collect(Collectors.toSet()); |
||||
|
||||
//匹配到的文件md5存入set中
|
||||
Set<String> matchedFileMd5Set = new HashSet<>(); |
||||
|
||||
//剩余未匹配中的文件md5
|
||||
Set<String> unMatchedFileMd5s = projectFilesMd5; |
||||
|
||||
//循环匹配5次,进行整体的文件匹配
|
||||
for (int i = 0; i < 5; i++){ |
||||
//检索versionTree库
|
||||
String queryStr = "dirTree:(" + StringUtils.join(unMatchedFileMd5s, " OR ") + ")"; |
||||
log.info("versionTree queryStr: " + queryStr); |
||||
long startTime = System.currentTimeMillis(); |
||||
VersionTree openProject = solrUtils.queryVersionTree(queryStr); |
||||
log.info("query versionTree cost:{}s", (System.currentTimeMillis() - startTime) / 1000); |
||||
//如果存在没有匹配到开源数据的情况,直接退出循环匹配
|
||||
if (openProject == null){ |
||||
break; |
||||
} |
||||
|
||||
//异步保存匹配的开源文件信息
|
||||
taskExecutor.execute(() -> saveProjectOpenInfo(openProject, projectFiles)); |
||||
|
||||
//获取开源项目的所有文件md5集合
|
||||
List<String> openFilesMd5 = openProject.getDirTree().stream().map(VersionTreeNode::getSourceFileMd5).collect(Collectors.toList()); |
||||
//获取被测件和开源项目相同的文件
|
||||
Set<String> matchedFiles = unMatchedFileMd5s.stream().filter(item -> openFilesMd5.contains(item)).collect(Collectors.toSet()); |
||||
|
||||
//保存已匹配的文件md5,后续需要统计整体的开源率
|
||||
matchedFileMd5Set.addAll(matchedFiles); |
||||
|
||||
//计算与当前项目的相似度
|
||||
BigDecimal semblance = new BigDecimal(matchedFiles.size()).divide(new BigDecimal(projectFilesMd5.size()), 4, RoundingMode.HALF_UP).multiply(new BigDecimal(100)); |
||||
|
||||
//当相似度小于30%,不保存项目级的信息
|
||||
if (semblance.compareTo(new BigDecimal(30)) < 0){ |
||||
break; |
||||
} |
||||
|
||||
ProjectAssemblyMongoDto projectAssembly = new ProjectAssemblyMongoDto(); |
||||
projectAssembly.setId(IdGenerator.uuid32()) |
||||
.setFileCount(openFilesMd5.size()) |
||||
.setMatchFileCount(matchedFiles.size()) |
||||
.setProjectId(openProject.getProId()) |
||||
.setProjectName(openProject.getProName()) |
||||
.setVersionName(openProject.getVersionName()) |
||||
.setOpenSourceUrl(openProject.getDownUrl()) |
||||
.setSemblance(semblance.doubleValue()); |
||||
mongoTemplate.insert(projectAssembly); |
||||
|
||||
//获取未匹配中的文件md5,更新下次匹配的md5集合
|
||||
unMatchedFileMd5s = Sets.difference(unMatchedFileMd5s, matchedFiles); |
||||
//如果没有剩余未匹配文件,退出整体匹配
|
||||
if (CollectionUtils.isEmpty(unMatchedFileMd5s) ) { |
||||
break; |
||||
} |
||||
} |
||||
return matchedFileMd5Set; |
||||
} |
||||
|
||||
//更新文件分析的状态
|
||||
private void updateFileAnalysisStatus(Set<String> fileMd5Set) { |
||||
mongoTemplate.update(FileDataMongoDto.class) |
||||
.matching(where("md5").in(fileMd5Set)) |
||||
.apply(new Update().set("openType", true) |
||||
.set("openRate", 100.00f) |
||||
.set("fileAnalysisStatus", FileAnalysisStatusEnum.ANALYSIS_DONE.getCode())) |
||||
.all(); |
||||
} |
||||
|
||||
} |
@ -0,0 +1,33 @@ |
||||
package com.keyware.composeanalysis.util; |
||||
|
||||
import cn.hutool.core.date.DateTime; |
||||
import com.keyware.composeanalysis.mongo.AnalysisLogMongoDto; |
||||
import org.springframework.data.mongodb.core.MongoTemplate; |
||||
import org.springframework.scheduling.annotation.Async; |
||||
|
||||
import java.io.PrintWriter; |
||||
import java.io.StringWriter; |
||||
|
||||
/** |
||||
* @author liuzongren |
||||
* @date 2024/7/30 |
||||
* @description 分析日志工具 |
||||
*/ |
||||
public class AnalysisLogUtil { |
||||
|
||||
public static void insert(MongoTemplate mongoTemplate,String logInfo) { |
||||
mongoTemplate.insert(new AnalysisLogMongoDto().setLogInfo(logInfo).setCreateTime(new DateTime())); |
||||
} |
||||
|
||||
public static void insertErrorInfo(MongoTemplate mongoTemplate, String logInfo, Exception e) { |
||||
mongoTemplate.insert(new AnalysisLogMongoDto().setLogInfo(logInfo + getErrorMsg(e)).setCreateTime(new DateTime())); |
||||
} |
||||
|
||||
|
||||
private static String getErrorMsg(Exception e) { |
||||
StringWriter errors = new StringWriter(); |
||||
e.printStackTrace(new PrintWriter(errors)); |
||||
return errors.toString(); |
||||
} |
||||
|
||||
} |
@ -0,0 +1,32 @@ |
||||
package com.keyware.composeanalysis.util; |
||||
|
||||
import com.alibaba.fastjson.JSON; |
||||
import com.alibaba.fastjson.JSONArray; |
||||
import com.keyware.composeanalysis.solr.VersionTree; |
||||
import com.keyware.composeanalysis.solr.VersionTreeNode; |
||||
import org.apache.solr.common.SolrDocument; |
||||
|
||||
import java.util.List; |
||||
|
||||
/** |
||||
* @author liuzongren |
||||
* @date 2024/8/7 |
||||
* @description dom 转 entity |
||||
*/ |
||||
public class BeanUtil { |
||||
|
||||
public static VersionTree domToVersionTree(SolrDocument dom) { |
||||
String dirTree = String.valueOf(dom.get("dirTree")); |
||||
dirTree = dirTree.replace("\\", ""); |
||||
dirTree = dirTree.replace("\"{", "{"); |
||||
dirTree = dirTree.replace("}\"", "}"); |
||||
dom.put("dirTree", null); |
||||
JSONArray treeArray = JSON.parseArray(dirTree); |
||||
List<VersionTreeNode> treeList = treeArray.toJavaList(VersionTreeNode.class); |
||||
String domObj = JSON.toJSONString(dom); |
||||
VersionTree versionTree = JSON.parseObject(domObj, VersionTree.class); |
||||
versionTree.setDirTree(treeList); |
||||
return versionTree; |
||||
} |
||||
|
||||
} |
@ -0,0 +1,38 @@ |
||||
package com.keyware.composeanalysis.util; |
||||
|
||||
import com.alibaba.fastjson.JSON; |
||||
import com.mongodb.BasicDBObject; |
||||
import org.bson.Document; |
||||
import org.bson.json.JsonWriterSettings; |
||||
|
||||
/** |
||||
* @author liuzongren |
||||
* @date 2024/7/24 |
||||
* 类型转化工具类 |
||||
*/ |
||||
public class ConvertUtil { |
||||
|
||||
public <T> T documentToBean(BasicDBObject dbObject, Class<T> clzss) { |
||||
String realJson = dbObject.toJson(JsonWriterSettings.builder().build()); |
||||
T obj = JSON.parseObject(realJson, clzss); |
||||
return obj; |
||||
} |
||||
|
||||
public static <T> T documentToBean(Document document, Class<T> clzss) { |
||||
String realJson = document.toJson(JsonWriterSettings.builder().build()); |
||||
T obj = JSON.parseObject(realJson, clzss); |
||||
return obj; |
||||
} |
||||
|
||||
public static <T> BasicDBObject toDBObject(T object) { |
||||
String json = JSON.toJSONString(object); |
||||
BasicDBObject basicDBObject = BasicDBObject.parse(json); |
||||
return basicDBObject; |
||||
} |
||||
|
||||
public static <T> Document beanToDocument(T object) { |
||||
String json = JSON.toJSONString(object); |
||||
Document document = Document.parse(json); |
||||
return document; |
||||
} |
||||
} |
@ -0,0 +1,23 @@ |
||||
package com.keyware.composeanalysis.util; |
||||
|
||||
import lombok.extern.log4j.Log4j2; |
||||
|
||||
import java.net.InetAddress; |
||||
import java.net.UnknownHostException; |
||||
|
||||
/** |
||||
* @author liuzongren |
||||
* @date 2024/7/30 |
||||
*/ |
||||
@Log4j2 |
||||
public class IpUtil { |
||||
|
||||
public static String getHostIp() { |
||||
try { |
||||
return InetAddress.getLocalHost().getHostAddress(); |
||||
} catch (UnknownHostException e) { |
||||
log.error(e.getMessage(), e); |
||||
} |
||||
return "127.0.0.1"; |
||||
} |
||||
} |
@ -0,0 +1,537 @@ |
||||
package com.keyware.composeanalysis.util; |
||||
|
||||
|
||||
import org.springframework.beans.factory.annotation.Autowired; |
||||
import org.springframework.beans.factory.annotation.Qualifier; |
||||
import org.springframework.data.redis.core.RedisTemplate; |
||||
import org.springframework.stereotype.Component; |
||||
|
||||
import java.util.List; |
||||
import java.util.Map; |
||||
import java.util.Set; |
||||
import java.util.concurrent.TimeUnit; |
||||
|
||||
/** |
||||
* liuzongren |
||||
* data 2024/04/02 |
||||
*/ |
||||
@Component |
||||
public class RedisUtil { |
||||
|
||||
|
||||
@Autowired |
||||
private RedisTemplate redisTemplate; |
||||
|
||||
/** |
||||
* 给一个指定的 key 值附加过期时间 |
||||
* |
||||
* @param key |
||||
* @param time |
||||
* @return |
||||
*/ |
||||
public boolean expire(String key, long time) { |
||||
return this.redisTemplate.expire(key, time, TimeUnit.SECONDS); |
||||
} |
||||
|
||||
/** |
||||
* 根据key 获取过期时间 |
||||
* |
||||
* @param key |
||||
* @return |
||||
*/ |
||||
public long getTime(String key) { |
||||
return redisTemplate.getExpire(key, TimeUnit.SECONDS); |
||||
} |
||||
|
||||
/** |
||||
* 根据key 获取过期时间 |
||||
* |
||||
* @param key |
||||
* @return |
||||
*/ |
||||
public boolean hasKey(String key) { |
||||
return redisTemplate.hasKey(key); |
||||
} |
||||
|
||||
/** |
||||
* 移除指定key 的过期时间 |
||||
* |
||||
* @param key |
||||
* @return |
||||
*/ |
||||
public boolean persist(String key) { |
||||
return redisTemplate.boundValueOps(key).persist(); |
||||
} |
||||
|
||||
//- - - - - - - - - - - - - - - - - - - - - String类型 - - - - - - - - - - - - - - - - - - - -
|
||||
|
||||
/** |
||||
* 根据key获取值 |
||||
* |
||||
* @param key 键 |
||||
* @return 值 |
||||
*/ |
||||
public Object get(String key) { |
||||
return key == null ? null : redisTemplate.opsForValue().get(key); |
||||
} |
||||
|
||||
/** |
||||
* 将值放入缓存 |
||||
* |
||||
* @param key 键 |
||||
* @param value 值 |
||||
* @return true成功 false 失败 |
||||
*/ |
||||
public void set(String key, Object value) { |
||||
redisTemplate.opsForValue().set(key, value); |
||||
} |
||||
|
||||
/** |
||||
* 将值放入缓存并设置时间 |
||||
* |
||||
* @param key 键 |
||||
* @param value 值 |
||||
* @param time 时间(秒) -1为无期限 |
||||
* @return true成功 false 失败 |
||||
*/ |
||||
public void set(String key, String value, long time) { |
||||
if (time > 0) { |
||||
redisTemplate.opsForValue().set(key, value, time, TimeUnit.SECONDS); |
||||
} else { |
||||
redisTemplate.opsForValue().set(key, value); |
||||
} |
||||
} |
||||
|
||||
public void delKey(String key) { |
||||
redisTemplate.delete(key); |
||||
} |
||||
|
||||
/** |
||||
* 批量添加 key (重复的键会覆盖) |
||||
* |
||||
* @param keyAndValue |
||||
*/ |
||||
public void batchSet(Map<String, String> keyAndValue) { |
||||
redisTemplate.opsForValue().multiSet(keyAndValue); |
||||
} |
||||
|
||||
/** |
||||
* 批量添加 key-value 只有在键不存在时,才添加 |
||||
* map 中只要有一个key存在,则全部不添加 |
||||
* |
||||
* @param keyAndValue |
||||
*/ |
||||
public void batchSetIfAbsent(Map<String, String> keyAndValue) { |
||||
redisTemplate.opsForValue().multiSetIfAbsent(keyAndValue); |
||||
} |
||||
|
||||
/** |
||||
* 对一个 key-value 的值进行加减操作, |
||||
* 如果该 key 不存在 将创建一个key 并赋值该 number |
||||
* 如果 key 存在,但 value 不是长整型 ,将报错 |
||||
* |
||||
* @param key |
||||
* @param number |
||||
*/ |
||||
public Long increment(String key, long number) { |
||||
return redisTemplate.opsForValue().increment(key, number); |
||||
} |
||||
|
||||
/** |
||||
* 对一个 key-value 的值进行加减操作, |
||||
* 如果该 key 不存在 将创建一个key 并赋值该 number |
||||
* 如果 key 存在,但 value 不是 纯数字 ,将报错 |
||||
* |
||||
* @param key |
||||
* @param number |
||||
*/ |
||||
public Double increment(String key, double number) { |
||||
return redisTemplate.opsForValue().increment(key, number); |
||||
} |
||||
|
||||
//- - - - - - - - - - - - - - - - - - - - - set类型 - - - - - - - - - - - - - - - - - - - -
|
||||
|
||||
/** |
||||
* 将数据放入set缓存 |
||||
* |
||||
* @param key 键 |
||||
* @return |
||||
*/ |
||||
public void sSet(String key, String value) { |
||||
redisTemplate.opsForSet().add(key, value); |
||||
} |
||||
|
||||
/** |
||||
* 获取变量中的值 |
||||
* |
||||
* @param key 键 |
||||
* @return |
||||
*/ |
||||
public Set<Object> members(String key) { |
||||
return redisTemplate.opsForSet().members(key); |
||||
} |
||||
|
||||
/** |
||||
* 随机获取变量中指定个数的元素 |
||||
* |
||||
* @param key 键 |
||||
* @param count 值 |
||||
* @return |
||||
*/ |
||||
public void randomMembers(String key, long count) { |
||||
redisTemplate.opsForSet().randomMembers(key, count); |
||||
} |
||||
|
||||
/** |
||||
* 随机获取变量中的元素 |
||||
* |
||||
* @param key 键 |
||||
* @return |
||||
*/ |
||||
public Object randomMember(String key) { |
||||
return redisTemplate.opsForSet().randomMember(key); |
||||
} |
||||
|
||||
/** |
||||
* 弹出变量中的元素 |
||||
* |
||||
* @param key 键 |
||||
* @return |
||||
*/ |
||||
public Object pop(String key) { |
||||
return redisTemplate.opsForSet().pop("setValue"); |
||||
} |
||||
|
||||
/** |
||||
* 获取变量中值的长度 |
||||
* |
||||
* @param key 键 |
||||
* @return |
||||
*/ |
||||
public long size(String key) { |
||||
return redisTemplate.opsForSet().size(key); |
||||
} |
||||
|
||||
/** |
||||
* 根据value从一个set中查询,是否存在 |
||||
* |
||||
* @param key 键 |
||||
* @param value 值 |
||||
* @return true 存在 false不存在 |
||||
*/ |
||||
public boolean sHasKey(String key, Object value) { |
||||
return redisTemplate.opsForSet().isMember(key, value); |
||||
} |
||||
|
||||
/** |
||||
* 检查给定的元素是否在变量中。 |
||||
* |
||||
* @param key 键 |
||||
* @param obj 元素对象 |
||||
* @return |
||||
*/ |
||||
public boolean isMember(String key, Object obj) { |
||||
return redisTemplate.opsForSet().isMember(key, obj); |
||||
} |
||||
|
||||
/** |
||||
* 转移变量的元素值到目的变量。 |
||||
* |
||||
* @param key 键 |
||||
* @param value 元素对象 |
||||
* @param destKey 元素对象 |
||||
* @return |
||||
*/ |
||||
public boolean move(String key, String value, String destKey) { |
||||
return redisTemplate.opsForSet().move(key, value, destKey); |
||||
} |
||||
|
||||
/** |
||||
* 批量移除set缓存中元素 |
||||
* |
||||
* @param key 键 |
||||
* @param values 值 |
||||
* @return |
||||
*/ |
||||
public void remove(String key, Object... values) { |
||||
redisTemplate.opsForSet().remove(key, values); |
||||
} |
||||
|
||||
/** |
||||
* 通过给定的key求2个set变量的差值 |
||||
* |
||||
* @param key 键 |
||||
* @param destKey 键 |
||||
* @return |
||||
*/ |
||||
public Set<Set> difference(String key, String destKey) { |
||||
return redisTemplate.opsForSet().difference(key, destKey); |
||||
} |
||||
|
||||
|
||||
//- - - - - - - - - - - - - - - - - - - - - hash类型 - - - - - - - - - - - - - - - - - - - -
|
||||
|
||||
/** |
||||
* 加入缓存 |
||||
* |
||||
* @param key 键 |
||||
* @param map 键 |
||||
* @return |
||||
*/ |
||||
public void add(String key, Map<String, String> map) { |
||||
redisTemplate.opsForHash().putAll(key, map); |
||||
} |
||||
|
||||
/** |
||||
* 获取 key 下的 所有 hashkey 和 value |
||||
* |
||||
* @param key 键 |
||||
* @return |
||||
*/ |
||||
public Map<Object, Object> getHashEntries(String key) { |
||||
return redisTemplate.opsForHash().entries(key); |
||||
} |
||||
|
||||
/** |
||||
* 验证指定 key 下 有没有指定的 hashkey |
||||
* |
||||
* @param key |
||||
* @param hashKey |
||||
* @return |
||||
*/ |
||||
public boolean hashKey(String key, String hashKey) { |
||||
return redisTemplate.opsForHash().hasKey(key, hashKey); |
||||
} |
||||
|
||||
/** |
||||
* 获取指定key的值string |
||||
* |
||||
* @param key 键 |
||||
* @param key2 键 |
||||
* @return |
||||
*/ |
||||
public String getMapString(String key, String key2) { |
||||
return redisTemplate.opsForHash().get("map1", "key1").toString(); |
||||
} |
||||
|
||||
/** |
||||
* 获取指定的值Int |
||||
* |
||||
* @param key 键 |
||||
* @param key2 键 |
||||
* @return |
||||
*/ |
||||
public Integer getMapInt(String key, String key2) { |
||||
return (Integer) redisTemplate.opsForHash().get("map1", "key1"); |
||||
} |
||||
|
||||
/** |
||||
* 弹出元素并删除 |
||||
* |
||||
* @param key 键 |
||||
* @return |
||||
*/ |
||||
public String popValue(String key) { |
||||
return redisTemplate.opsForSet().pop(key).toString(); |
||||
} |
||||
|
||||
/** |
||||
* 删除指定 hash 的 HashKey |
||||
* |
||||
* @param key |
||||
* @param hashKeys |
||||
* @return 删除成功的 数量 |
||||
*/ |
||||
public Long delete(String key, String... hashKeys) { |
||||
return redisTemplate.opsForHash().delete(key, hashKeys); |
||||
} |
||||
|
||||
/** |
||||
* 给指定 hash 的 hashkey 做增减操作 |
||||
* |
||||
* @param key |
||||
* @param hashKey |
||||
* @param number |
||||
* @return |
||||
*/ |
||||
public Long increment(String key, String hashKey, long number) { |
||||
return redisTemplate.opsForHash().increment(key, hashKey, number); |
||||
} |
||||
|
||||
/** |
||||
* 给指定 hash 的 hashkey 做增减操作 |
||||
* |
||||
* @param key |
||||
* @param hashKey |
||||
* @param number |
||||
* @return |
||||
*/ |
||||
public Double increment(String key, String hashKey, Double number) { |
||||
return redisTemplate.opsForHash().increment(key, hashKey, number); |
||||
} |
||||
|
||||
/** |
||||
* 获取 key 下的 所有 hashkey 字段 |
||||
* |
||||
* @param key |
||||
* @return |
||||
*/ |
||||
public Set<Object> hashKeys(String key) { |
||||
return redisTemplate.opsForHash().keys(key); |
||||
} |
||||
|
||||
/** |
||||
* 获取指定 hash 下面的 键值对 数量 |
||||
* |
||||
* @param key |
||||
* @return |
||||
*/ |
||||
public Long hashSize(String key) { |
||||
return redisTemplate.opsForHash().size(key); |
||||
} |
||||
|
||||
//- - - - - - - - - - - - - - - - - - - - - list类型 - - - - - - - - - - - - - - - - - - - -
|
||||
|
||||
/** |
||||
* 在变量左边添加元素值 |
||||
* |
||||
* @param key |
||||
* @param value |
||||
* @return |
||||
*/ |
||||
public void leftPush(String key, Object value) { |
||||
redisTemplate.opsForList().leftPush(key, value); |
||||
} |
||||
|
||||
/** |
||||
* 获取集合指定位置的值。 |
||||
* |
||||
* @param key |
||||
* @param index |
||||
* @return |
||||
*/ |
||||
public Object index(String key, long index) { |
||||
return redisTemplate.opsForList().index("list", 1); |
||||
} |
||||
|
||||
/** |
||||
* 获取指定区间的值。 |
||||
* |
||||
* @param key |
||||
* @param start |
||||
* @param end |
||||
* @return |
||||
*/ |
||||
public List<Object> range(String key, long start, long end) { |
||||
return redisTemplate.opsForList().range(key, start, end); |
||||
} |
||||
|
||||
/** |
||||
* 把最后一个参数值放到指定集合的第一个出现中间参数的前面, |
||||
* 如果中间参数值存在的话。 |
||||
* |
||||
* @param key |
||||
* @param pivot |
||||
* @param value |
||||
* @return |
||||
*/ |
||||
public void leftPush(String key, String pivot, String value) { |
||||
redisTemplate.opsForList().leftPush(key, pivot, value); |
||||
} |
||||
|
||||
/** |
||||
* 向左边批量添加参数元素。 |
||||
* |
||||
* @param key |
||||
* @param values |
||||
* @return |
||||
*/ |
||||
public void leftPushAll(String key, String... values) { |
||||
// redisTemplate.opsForList().leftPushAll(key,"w","x","y");
|
||||
redisTemplate.opsForList().leftPushAll(key, values); |
||||
} |
||||
|
||||
/** |
||||
* 向集合最右边添加元素。 |
||||
* |
||||
* @param key |
||||
* @param value |
||||
* @return |
||||
*/ |
||||
public void leftPushAll(String key, String value) { |
||||
redisTemplate.opsForList().rightPush(key, value); |
||||
} |
||||
|
||||
/** |
||||
* 向左边批量添加参数元素。 |
||||
* |
||||
* @param key |
||||
* @param values |
||||
* @return |
||||
*/ |
||||
public void rightPushAll(String key, String... values) { |
||||
//redisTemplate.opsForList().leftPushAll(key,"w","x","y");
|
||||
redisTemplate.opsForList().rightPushAll(key, values); |
||||
} |
||||
|
||||
/** |
||||
* 向已存在的集合中添加元素。 |
||||
* |
||||
* @param key |
||||
* @param value |
||||
* @return |
||||
*/ |
||||
public void rightPushIfPresent(String key, Object value) { |
||||
redisTemplate.opsForList().rightPushIfPresent(key, value); |
||||
} |
||||
|
||||
/** |
||||
* 向已存在的集合中添加元素。 |
||||
* |
||||
* @param key |
||||
* @return |
||||
*/ |
||||
public long listLength(String key) { |
||||
return redisTemplate.opsForList().size(key); |
||||
} |
||||
|
||||
/** |
||||
* 移除集合中的左边第一个元素。 |
||||
* |
||||
* @param key |
||||
* @return |
||||
*/ |
||||
public void leftPop(String key) { |
||||
redisTemplate.opsForList().leftPop(key); |
||||
} |
||||
|
||||
/** |
||||
* 移除集合中左边的元素在等待的时间里,如果超过等待的时间仍没有元素则退出。 |
||||
* |
||||
* @param key |
||||
* @return |
||||
*/ |
||||
public void leftPop(String key, long timeout, TimeUnit unit) { |
||||
redisTemplate.opsForList().leftPop(key, timeout, unit); |
||||
} |
||||
|
||||
/** |
||||
* 移除集合中右边的元素。 |
||||
* |
||||
* @param key |
||||
* @return |
||||
*/ |
||||
public void rightPop(String key) { |
||||
redisTemplate.opsForList().rightPop(key); |
||||
} |
||||
|
||||
/** |
||||
* 移除集合中右边的元素在等待的时间里,如果超过等待的时间仍没有元素则退出。 |
||||
* |
||||
* @param key |
||||
* @return |
||||
*/ |
||||
public void rightPop(String key, long timeout, TimeUnit unit) { |
||||
redisTemplate.opsForList().rightPop(key, timeout, unit); |
||||
} |
||||
} |
@ -0,0 +1,206 @@ |
||||
package com.keyware.composeanalysis.util; |
||||
|
||||
import cn.hutool.core.lang.Pair; |
||||
import cn.hutool.core.util.ArrayUtil; |
||||
import cn.hutool.core.util.ByteUtil; |
||||
import cn.hutool.core.util.StrUtil; |
||||
import io.micrometer.common.util.StringUtils; |
||||
import org.apache.commons.collections.CollectionUtils; |
||||
|
||||
import java.math.BigDecimal; |
||||
import java.math.RoundingMode; |
||||
import java.util.ArrayList; |
||||
import java.util.Arrays; |
||||
import java.util.HashSet; |
||||
import java.util.List; |
||||
|
||||
public class SimilarityUtil { |
||||
|
||||
|
||||
|
||||
private SimilarityUtil() { |
||||
|
||||
} |
||||
|
||||
public static void main(String[] args) { |
||||
String s1=""; |
||||
String s2=""; |
||||
double similarity = getSimilarityMe(s1, s2); |
||||
System.out.println(similarity); |
||||
} |
||||
|
||||
/** |
||||
* 获得两个文件的相似度 |
||||
* @param sentence1 |
||||
* @param sentence2 |
||||
* @return |
||||
*/ |
||||
public static double getSimilarityMe(String sentence1, String sentence2) { |
||||
//被测件文件行
|
||||
List<String> sent1Words = getSplitWords(sentence1); |
||||
if (sentence1.length()==0){ |
||||
return 0.00; |
||||
} |
||||
//溯源到文件行
|
||||
HashSet<String> sent2Words = getSplitWords1(sentence2); |
||||
//匹配到的行数
|
||||
double count=0; |
||||
for (String sent1Word : sent1Words) { |
||||
if (sent2Words.contains(sent1Word)){ |
||||
count++; |
||||
} |
||||
} |
||||
return count/sent1Words.size(); |
||||
} |
||||
|
||||
|
||||
/** |
||||
* 获取开源率和开源行号 |
||||
* @param analysisFile 被测件内容 |
||||
* @param openSourceFile 开源文件内容 |
||||
* @return |
||||
*/ |
||||
// public static Pair<Float, HashSet<Integer>> getOpenRateAndSaveRowNum(String analysisFile, String openSourceFile) {
|
||||
// if (StrUtil.hasBlank(analysisFile,openSourceFile)){
|
||||
// return new Pair<>(0.00f,new HashSet<>());
|
||||
// }
|
||||
// //匹配到的行号
|
||||
// HashSet<Integer> matchedRowsNum = new HashSet<>();
|
||||
//
|
||||
// //被测件文件行
|
||||
// List<String> analysisFileLineInfo = getSplitWords(analysisFile);
|
||||
//
|
||||
// //溯源到文件行
|
||||
// HashSet<String> openSourceFileLineInfo = getSplitWords1(openSourceFile);
|
||||
//
|
||||
// for (int i = 0; i < analysisFileLineInfo.size(); i++) {
|
||||
// String sent1Word = analysisFileLineInfo.get(i);
|
||||
// if (openSourceFileLineInfo.contains(sent1Word)) {
|
||||
// matchedRowsNum.add(i);
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// //计算开源率
|
||||
// BigDecimal openRate = new BigDecimal(matchedRowsNum.size()).divide(new BigDecimal(analysisFileLineInfo.size()), 4, RoundingMode.HALF_UP).multiply(new BigDecimal(100)).setScale(2);
|
||||
//
|
||||
// return new Pair<>(openRate.floatValue(), matchedRowsNum);
|
||||
// }
|
||||
|
||||
|
||||
// public static Pair<Float, HashSet<Integer>> getOpenRateAndSaveRowNum(byte[] analysisFile, byte[] openSourceFile) {
|
||||
// if (ArrayUtil.hasNull(analysisFile,openSourceFile)){
|
||||
// return new Pair<>(0.00f,new HashSet<>());
|
||||
// }
|
||||
// //匹配到的行号
|
||||
// HashSet<Integer> matchedRowsNum = new HashSet<>();
|
||||
//
|
||||
// //被测件文件行
|
||||
// List<String> analysisFileLineInfo = getSplitWords(new String(analysisFile));
|
||||
//
|
||||
// //溯源到文件行
|
||||
// HashSet<String> openSourceFileLineInfo = getSplitWords1(new String(openSourceFile));
|
||||
//
|
||||
// for (int i = 0; i < analysisFileLineInfo.size(); i++) {
|
||||
// String sent1Word = analysisFileLineInfo.get(i);
|
||||
// if (openSourceFileLineInfo.contains(sent1Word)) {
|
||||
// matchedRowsNum.add(i);
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// //计算开源率
|
||||
// BigDecimal openRate = new BigDecimal(matchedRowsNum.size()).divide(new BigDecimal(analysisFileLineInfo.size()), 4, RoundingMode.HALF_UP).multiply(new BigDecimal(100)).setScale(2);
|
||||
//
|
||||
// return new Pair<>(openRate.floatValue(), matchedRowsNum);
|
||||
// }
|
||||
|
||||
public static Pair<Float, HashSet<Integer>> getOpenRateAndSaveRowNum(String analysisFile, String openSourceFile) { |
||||
if (StrUtil.hasBlank(analysisFile,openSourceFile)){ |
||||
return new Pair<>(0.00f,new HashSet<>()); |
||||
} |
||||
//匹配到的行号
|
||||
HashSet<Integer> matchedRowsNum = new HashSet<>(); |
||||
|
||||
//被测件文件行
|
||||
List<String> analysisFileLineInfo = getSplitWords(analysisFile); |
||||
|
||||
//溯源到文件行
|
||||
HashSet<String> openSourceFileLineInfo = getSplitWords1(openSourceFile); |
||||
|
||||
for (int i = 0; i < analysisFileLineInfo.size(); i++) { |
||||
String sent1Word = analysisFileLineInfo.get(i); |
||||
if (openSourceFileLineInfo.contains(sent1Word)) { |
||||
matchedRowsNum.add(i); |
||||
} |
||||
} |
||||
|
||||
//计算开源率
|
||||
BigDecimal openRate = new BigDecimal(matchedRowsNum.size()).divide(new BigDecimal(analysisFileLineInfo.size()), 4, RoundingMode.HALF_UP).multiply(new BigDecimal(100)).setScale(2); |
||||
|
||||
return new Pair<>(openRate.floatValue(), matchedRowsNum); |
||||
} |
||||
|
||||
|
||||
/** |
||||
* 获得两个文件的相似度,并将被匹配的行 |
||||
* @param matchLineInfos 被匹配的行信息 |
||||
* @param sentence2 开源文件内容 |
||||
* @return |
||||
*/ |
||||
public static double getSimilarityAndSaveRowNum(List<String> matchLineInfos, String sentence2,HashSet<Integer> matchRows) { |
||||
if (CollectionUtils.isEmpty(matchLineInfos)){ |
||||
return 0.00d; |
||||
} |
||||
//溯源到文件行
|
||||
HashSet<String> sent2Words = getSplitWords1(sentence2); |
||||
//匹配到的行数
|
||||
double count = 0d; |
||||
for (int i = 0; i < matchLineInfos.size(); i++) { |
||||
String lineContents = matchLineInfos.get(i); |
||||
if (sent2Words.contains(lineContents)) { |
||||
//保存匹配中的行序号
|
||||
matchRows.add(i); |
||||
count++; |
||||
} |
||||
} |
||||
return count / matchLineInfos.size(); |
||||
} |
||||
|
||||
|
||||
|
||||
public static List<String> getSplitWords(String sentence) { |
||||
List<String> lineList = new ArrayList<String>(); |
||||
if (StringUtils.isBlank(sentence)){ |
||||
return lineList; |
||||
} |
||||
sentence = sentence.replaceAll("\n\r", "\n").replaceAll("\r\n", "\n").replaceAll("\r", "\n"); |
||||
List<String> list = Arrays.asList(sentence.split("\n")); |
||||
for (String string : list) { |
||||
if (string != null && !"".equals(string.trim())) { |
||||
lineList.add(string.replaceAll(" ","")); |
||||
} |
||||
} |
||||
return lineList; |
||||
|
||||
// // 去除掉html标签
|
||||
//
|
||||
// sentence = Jsoup.parse(sentence.replace(" ","")).body().text();
|
||||
//
|
||||
//
|
||||
// // 标点符号会被单独分为一个Term,去除之
|
||||
//
|
||||
// return HanLP.segment(sentence).stream().map(a -> a.word).filter(s -> !"`~!@#$^&*()=|{}':;',\\[\\].<>/?~!@#¥……&*()——|{}【】‘;:”“'。,、? ".contains(s)).collect(Collectors.toList());
|
||||
|
||||
} |
||||
private static HashSet<String> getSplitWords1(String sentence) { |
||||
HashSet<String> set = new HashSet<>(); |
||||
sentence = sentence.replaceAll("\n\r", "\n").replaceAll("\r\n", "\n").replaceAll("\r", "\n"); |
||||
List<String> list = Arrays.asList(sentence.split("\n")); |
||||
for (String string : list) { |
||||
if (string != null && !"".equals(string.trim())) { |
||||
set.add(string.replaceAll(" ","")); |
||||
} |
||||
} |
||||
return set; |
||||
} |
||||
|
||||
} |
@ -0,0 +1,321 @@ |
||||
package com.keyware.composeanalysis.util; |
||||
|
||||
import com.keyware.composeanalysis.constant.MongoDBConst; |
||||
import com.keyware.composeanalysis.solr.VersionTree; |
||||
import lombok.Data; |
||||
import lombok.extern.log4j.Log4j2; |
||||
import org.apache.commons.collections.CollectionUtils; |
||||
import org.apache.commons.lang3.StringUtils; |
||||
import org.apache.solr.client.solrj.SolrClient; |
||||
import org.apache.solr.client.solrj.SolrRequest; |
||||
import org.apache.solr.client.solrj.SolrServerException; |
||||
import org.apache.solr.client.solrj.impl.HttpSolrClient; |
||||
import org.apache.solr.client.solrj.response.QueryResponse; |
||||
import org.apache.solr.common.SolrDocument; |
||||
import org.apache.solr.common.SolrDocumentList; |
||||
import org.apache.solr.common.params.*; |
||||
import org.springframework.beans.factory.annotation.Value; |
||||
import org.springframework.stereotype.Component; |
||||
|
||||
import java.io.IOException; |
||||
import java.util.*; |
||||
import java.util.function.Function; |
||||
import java.util.stream.Collectors; |
||||
|
||||
|
||||
/** |
||||
* 注意: 注意: 注意 |
||||
* solr同步更新专用 solr地址以solr |
||||
* |
||||
* @author liuzongren |
||||
*/ |
||||
@Log4j2 |
||||
@Component |
||||
@Data |
||||
public class SolrUtils { |
||||
|
||||
@Value("${solr.solrUrl}") |
||||
private String clientUrl; |
||||
|
||||
@Value("${solr.row}") |
||||
private String ROWS; |
||||
|
||||
//源码上传和解压的地址
|
||||
@Value("${codeResourcePath}") |
||||
private String codeResourcePath; |
||||
|
||||
private String fileAndFunSolrUrl; |
||||
|
||||
//client 连接池
|
||||
private Map<String, HttpSolrClient> coreClientMap = new HashMap<>(); |
||||
|
||||
|
||||
/** |
||||
* @param coreName 表名 |
||||
* @return todo 这里的client 不知道是否支持并发,后续需要测试优化 |
||||
* @describe 获取solr连接 |
||||
*/ |
||||
public HttpSolrClient getClient(String coreName) { |
||||
if (coreClientMap.containsKey(coreName)) { |
||||
return coreClientMap.get(coreName); |
||||
} else { |
||||
HttpSolrClient solr = new HttpSolrClient.Builder(clientUrl + "" + coreName) |
||||
.withConnectionTimeout(6000000) |
||||
.withSocketTimeout(6000000) |
||||
.allowCompression(true) |
||||
.build(); |
||||
coreClientMap.put(coreName, solr); |
||||
return solr; |
||||
} |
||||
} |
||||
|
||||
|
||||
/** |
||||
* 简单查询,指定返回字段 |
||||
* |
||||
* @param searchContent 检索内容 |
||||
* @param returneFields 返回字段 |
||||
* @return |
||||
* @throws Exception |
||||
*/ |
||||
public SolrDocumentList query(String coreName, String searchContent, String returneFields) { |
||||
SolrDocumentList docsList = null; |
||||
try { |
||||
HttpSolrClient client = getClient(coreName); |
||||
Map<String, String> map = new HashMap<String, String>(); |
||||
map.put(CommonParams.Q, searchContent); |
||||
map.put(CommonParams.FL, returneFields); |
||||
map.put(CommonParams.START, "0"); |
||||
map.put(CommonParams.ROWS, ROWS); |
||||
SolrParams params = new MapSolrParams(map); |
||||
QueryResponse query = client.query(params, SolrRequest.METHOD.POST); |
||||
if (!query.getResults().isEmpty()){ |
||||
docsList = query.getResults(); |
||||
} |
||||
} catch (SolrServerException | IOException e) { |
||||
log.error("solr查询失败,coreName:{},queryStr:{}", coreName, searchContent, e); |
||||
} |
||||
return docsList; |
||||
} |
||||
|
||||
|
||||
|
||||
/** |
||||
* 根据文件的MD5 去*_SourceFileBase获取当前文件的版本ID |
||||
* @param coreName solrCoreName |
||||
* @param originalFileMd5s 需要检索的文件md5 |
||||
* todo 1.这里有一个极端的情况:如果查询的文件数量过多,返回值不知道会不会过大 |
||||
* todo 2.这里没有查询出dirTreeId, 下一步并没有从VersionTree中查询出当前文件的具体信息,只是从versionTree查询出版本信息 |
||||
* @return |
||||
*/ |
||||
public Map<String,SolrDocument> batchQueryVersionIdFromSourceFileBaseBySourceMd5(String coreName, Set<String> originalFileMd5s) { |
||||
String queryStr = "sourceFileMd5:(" + StringUtils.join(originalFileMd5s, " OR ") + ")"; |
||||
Map<String,SolrDocument> openFileMd5VersionIdMap = new HashMap<>(); |
||||
long strtTime = System.currentTimeMillis(); |
||||
log.info("batchQueryVersionIdFromSourceFileBaseBySourceMd5 queryStr:{},size:{}", queryStr, originalFileMd5s.size()); |
||||
try { |
||||
HttpSolrClient client = getClient(coreName); |
||||
Map<String, String> map = new HashMap<>(); |
||||
map.put(CommonParams.Q, queryStr); |
||||
map.put(CommonParams.FL, "sourceFileMd5,versionId,fullPath"); |
||||
map.put(CommonParams.START, "0"); |
||||
map.put(CommonParams.ROWS, String.valueOf(originalFileMd5s.size())); |
||||
//分组查询,某一个开源文件匹配一次即可
|
||||
//todo 这里把匹配次数也查询出来了,貌似还是扫描了很多文档,看是否还有方法只匹配一次的
|
||||
map.put(GroupParams.GROUP,"true"); |
||||
map.put(GroupParams.GROUP_FIELD, "sourceFileMd5"); |
||||
map.put(GroupParams.GROUP_LIMIT,"1"); |
||||
map.put(GroupParams.GROUP_FORMAT,"simple"); |
||||
SolrParams params = new MapSolrParams(map); |
||||
QueryResponse query = client.query(params, SolrRequest.METHOD.POST); |
||||
if (query.getGroupResponse().getValues().size() > 0){ |
||||
//拿到sourceFileMd5分组数据
|
||||
SolrDocumentList result = query.getGroupResponse().getValues().get(0).getValues().get(0).getResult(); |
||||
openFileMd5VersionIdMap = result.stream().collect(Collectors.toMap(doc -> (String) doc.get("sourceFileMd5"), Function.identity())); |
||||
} |
||||
} catch (Exception e) { |
||||
log.error("solr查询失败,coreName:{},queryStr:{}", coreName, queryStr, e); |
||||
} |
||||
log.info("batchQueryVersionIdFromSourceFileBaseBySourceMd5 cost:{}s", (System.currentTimeMillis()-strtTime) / 1000); |
||||
return openFileMd5VersionIdMap; |
||||
} |
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
/** |
||||
* 简单查询,指定返回字段 |
||||
* |
||||
* @param searchContent 检索内容 |
||||
* @param returneFields 返回字段 |
||||
* @return |
||||
* @throws Exception |
||||
*/ |
||||
public SolrDocument queryOne(String coreName, String searchContent, String returneFields) { |
||||
SolrDocument result = null; |
||||
try { |
||||
HttpSolrClient client = getClient(coreName); |
||||
Map<String, String> map = new HashMap<String, String>(); |
||||
map.put(CommonParams.Q, searchContent); |
||||
map.put(CommonParams.FL, returneFields); |
||||
map.put(CommonParams.START, "0"); |
||||
map.put(CommonParams.ROWS, "1"); |
||||
SolrParams params = new MapSolrParams(map); |
||||
QueryResponse query = client.query(params, SolrRequest.METHOD.POST); |
||||
SolrDocumentList resp = query.getResults(); |
||||
if (CollectionUtils.isNotEmpty(resp)) { |
||||
return resp.get(0); |
||||
} |
||||
} catch (SolrServerException | IOException e) { |
||||
log.error("查询solr失败!,coreName:{},queryStr:{}",coreName , searchContent, e); |
||||
} |
||||
return result; |
||||
} |
||||
|
||||
|
||||
/** |
||||
* 查询 versionTree |
||||
* |
||||
* @param searchContent 检索内容 |
||||
* @return |
||||
* @throws Exception |
||||
*/ |
||||
public VersionTree queryVersionTree(String searchContent) { |
||||
String returneFields = "proId,proName,versionName,downUrl,licenseType,dirTree"; |
||||
VersionTree results = null; |
||||
try { |
||||
HttpSolrClient client = getClient(MongoDBConst.VERSION_TREE); |
||||
Map<String, String> map = new HashMap<String, String>(); |
||||
map.put(CommonParams.Q, searchContent); |
||||
map.put(CommonParams.FL, returneFields); |
||||
map.put(CommonParams.START, "0"); |
||||
map.put(CommonParams.ROWS, "1"); |
||||
SolrParams params = new MapSolrParams(map); |
||||
QueryResponse query = client.query(params, SolrRequest.METHOD.POST); |
||||
SolrDocumentList response = query.getResults(); |
||||
if (!response.isEmpty()) { |
||||
//转化对象
|
||||
results = BeanUtil.domToVersionTree(response.get(0)); |
||||
} |
||||
} catch (SolrServerException | IOException e) { |
||||
log.error("查询solr失败!,queryStr:{}" , searchContent, e); |
||||
} |
||||
return results; |
||||
} |
||||
|
||||
|
||||
/** |
||||
* 查询 versionTree |
||||
* |
||||
* @param versionId 版本ID |
||||
* @return |
||||
*/ |
||||
public VersionTree queryVersionTreeByVersionId(String versionId) { |
||||
String returneFields = "proId,proName,versionName,downUrl,licenseType,dirTree"; |
||||
String queryStr = "versionId:"+ versionId; |
||||
VersionTree results = null; |
||||
try { |
||||
HttpSolrClient client = getClient(MongoDBConst.VERSION_TREE); |
||||
Map<String, String> map = new HashMap<String, String>(); |
||||
map.put(CommonParams.Q, queryStr); |
||||
map.put(CommonParams.FL, returneFields); |
||||
map.put(CommonParams.START, "0"); |
||||
map.put(CommonParams.ROWS, "1"); |
||||
SolrParams params = new MapSolrParams(map); |
||||
QueryResponse query = client.query(params, SolrRequest.METHOD.POST); |
||||
SolrDocumentList response = query.getResults(); |
||||
//转化对象
|
||||
if (!response.isEmpty()){ |
||||
results = BeanUtil.domToVersionTree(response.get(0)); |
||||
}else { |
||||
log.error("根据版本ID查询VersionTree失败,versionId:{}" , versionId); |
||||
} |
||||
} catch (SolrServerException | IOException e) { |
||||
log.error("查询solr失败!,queryStr:{}" , queryStr, e); |
||||
} |
||||
return results; |
||||
} |
||||
|
||||
|
||||
/** |
||||
* 查询 version 的具体信息 |
||||
* |
||||
* @param versionId versionId |
||||
* @return |
||||
* @throws Exception |
||||
*/ |
||||
public VersionTree queryVersionInfoByVersionId(Object versionId) { |
||||
String returneFields = "proId,proName,versionName,downUrl,licenseType"; |
||||
VersionTree result = new VersionTree(); |
||||
try { |
||||
HttpSolrClient client = getClient(MongoDBConst.VERSION_TREE); |
||||
Map<String, String> map = new HashMap<String, String>(); |
||||
map.put(CommonParams.Q, "versionId:" + versionId); |
||||
map.put(CommonParams.FL, returneFields); |
||||
map.put(CommonParams.START, "0"); |
||||
map.put(CommonParams.ROWS, "1"); |
||||
SolrParams params = new MapSolrParams(map); |
||||
QueryResponse query = client.query(params, SolrRequest.METHOD.POST); |
||||
SolrDocumentList response = query.getResults(); |
||||
//转化对象
|
||||
if (CollectionUtils.isNotEmpty(response)) { |
||||
cn.hutool.core.bean.BeanUtil.copyProperties(response.get(0), result); |
||||
result.setLicenseType(response.get(0).get("licenseType") == null ? "" : response.get(0).get("licenseType").toString()); |
||||
}else { |
||||
log.error("根据版本ID查询版本信息失败,versionId:{}" , versionId); |
||||
} |
||||
} catch (SolrServerException | IOException e) { |
||||
log.error("查询solr失败!,queryStr:{}" , versionId, e); |
||||
} |
||||
return result; |
||||
} |
||||
|
||||
|
||||
/** |
||||
* 批量查询 version 的具体信息 |
||||
* |
||||
* @param versionIds versionIds |
||||
* @return |
||||
* @throws Exception |
||||
*/ |
||||
public List<VersionTree> queryBatchVersionInfoByVersionIds(Collection<String> versionIds) { |
||||
List<VersionTree> results = new ArrayList<>(); |
||||
if (CollectionUtils.isEmpty(versionIds)) { |
||||
return results; |
||||
} |
||||
//去一波重
|
||||
versionIds = versionIds.stream().collect(Collectors.toSet()); |
||||
String queryStr = "versionId:(" + StringUtils.join(versionIds, " OR ") + ")"; |
||||
String returneFields = "versionId,proId,proName,versionName,downUrl,licenseType"; |
||||
try { |
||||
HttpSolrClient client = getClient(MongoDBConst.VERSION_TREE); |
||||
Map<String, String> map = new HashMap<String, String>(); |
||||
map.put(CommonParams.Q, queryStr); |
||||
map.put(CommonParams.FL, returneFields); |
||||
map.put(CommonParams.START, "0"); |
||||
map.put(CommonParams.ROWS,String.valueOf(versionIds.size())); |
||||
SolrParams params = new MapSolrParams(map); |
||||
QueryResponse query = client.query(params, SolrRequest.METHOD.POST); |
||||
SolrDocumentList response = query.getResults(); |
||||
//转化对象
|
||||
if (!response.isEmpty()) { |
||||
for (int i = 0; i < response.size(); i++) { |
||||
VersionTree versionTree = new VersionTree(); |
||||
try { |
||||
cn.hutool.core.bean.BeanUtil.copyProperties(response.get(i), versionTree); |
||||
versionTree.setLicenseType(response.get(i).get("licenseType") == null ? "" : response.get(i).get("licenseType").toString()); |
||||
results.add(versionTree); |
||||
} catch (Exception e) { |
||||
e.printStackTrace(); |
||||
} |
||||
} |
||||
} |
||||
} catch (SolrServerException | IOException e) { |
||||
log.error("查询solr失败!,queryStr:{}" , queryStr, e); |
||||
} |
||||
return results; |
||||
} |
||||
|
||||
} |
@ -0,0 +1,53 @@ |
||||
package com.keyware.composeanalysis.util; |
||||
|
||||
import org.springframework.beans.BeansException; |
||||
import org.springframework.context.ApplicationContext; |
||||
import org.springframework.context.ApplicationContextAware; |
||||
import org.springframework.stereotype.Component; |
||||
|
||||
@Component |
||||
public class SpringContextUtils implements ApplicationContextAware { |
||||
|
||||
/** |
||||
* 上下文对象实例 |
||||
*/ |
||||
private static ApplicationContext applicationContext; |
||||
|
||||
@Override |
||||
public void setApplicationContext(ApplicationContext applicationContext) throws BeansException { |
||||
SpringContextUtils.applicationContext = applicationContext; |
||||
} |
||||
|
||||
/** |
||||
* 获取applicationContext |
||||
*/ |
||||
public static ApplicationContext getApplicationContext() { |
||||
//判断是否为null
|
||||
if (applicationContext == null) { |
||||
throw new IllegalStateException("applicaitonContext未注入,请在applicationContext.xml中定义SpringContextHolder."); |
||||
} |
||||
return applicationContext; |
||||
} |
||||
|
||||
/** |
||||
* 通过name获取Bean |
||||
*/ |
||||
public static Object getBean(String name) { |
||||
return getApplicationContext().getBean(name); |
||||
} |
||||
|
||||
/** |
||||
* 通过class获取Bean |
||||
*/ |
||||
public static <T> T getBean(Class<T> clazz) { |
||||
return getApplicationContext().getBean(clazz); |
||||
} |
||||
|
||||
/** |
||||
* 通过name和class获取Bean |
||||
*/ |
||||
public static <T> T getBean(String name, Class<T> clazz) { |
||||
return getApplicationContext().getBean(name, clazz); |
||||
} |
||||
|
||||
} |
@ -0,0 +1,18 @@ |
||||
server: |
||||
port: 8001 |
||||
|
||||
spring: |
||||
application: |
||||
name: compose-analysis-service |
||||
cloud: |
||||
nacos: |
||||
discovery: |
||||
server-addr: 172.16.36.100:8848 |
||||
namespace: 7f9bb282-8ee3-4948-8182-24b7dcadcd5a |
||||
config: |
||||
server-addr: 172.16.36.100:8848 |
||||
namespace: 7f9bb282-8ee3-4948-8182-24b7dcadcd5a |
||||
group: dev_group |
||||
file-extension: yaml |
||||
config: |
||||
import: nacos:compose-analysis-dev.yaml |
@ -0,0 +1,215 @@ |
||||
<?xml version="1.0" encoding="UTF-8"?> |
||||
<!-- 分级别异步文件日志输出配置 --> |
||||
<!-- 级别从高到低 OFF 、 FATAL 、 ERROR 、 WARN 、 INFO 、 DEBUG 、 TRACE 、 ALL --> |
||||
<!-- 日志输出规则 根据当前ROOT 级别,日志输出时,级别高于root默认的级别时 会输出 --> |
||||
<!-- 以下 每个配置的 filter 是过滤掉输出文件里面,会出现高级别文件,依然出现低级别的日志信息,通过filter 过滤只记录本级别的日志 --> |
||||
<!-- scan 当此属性设置为true时,配置文件如果发生改变,将会被重新加载,默认值为true。 --> |
||||
<!-- scanPeriod 设置监测配置文件是否有修改的时间间隔,如果没有给出时间单位,默认单位是毫秒。当scan为true时,此属性生效。默认的时间间隔为1分钟。 --> |
||||
<!-- debug 当此属性设置为true时,将打印出logback内部日志信息,实时查看logback运行状态。默认值为false。 --> |
||||
<configuration scan="true" scanPeriod="60 seconds" debug="false"> |
||||
|
||||
<!-- 关闭无用日志--> |
||||
<statusListener class="ch.qos.logback.core.status.NopStatusListener" /> |
||||
|
||||
<!-- 引入spirng boot默认的logback配置文件 --> |
||||
<include resource="org/springframework/boot/logging/logback/defaults.xml"/> |
||||
|
||||
<springProperty scope="context" name="springAppName" source="spring.application.name"/> |
||||
|
||||
<!-- 日志路径--> |
||||
<property name="logPath" value="./logs/"/> |
||||
|
||||
<!-- logback项目名称 --> |
||||
<property name="appName" value="${springAppName}"/> |
||||
|
||||
<!-- 日志级别 DEBUGER INFO WARN ERROR --> |
||||
<property name="logLevel" value="INFO"></property> |
||||
|
||||
|
||||
<!-- 最大保存时间 60天--> |
||||
<property name="maxHistory" value="60"/> |
||||
|
||||
<!-- 异步缓冲队列的深度,该值会影响性能.默认值为256 --> |
||||
<property name="queueSize" value="512"></property> |
||||
|
||||
|
||||
<!-- lOGGER PATTERN 根据个人喜好选择匹配 --> |
||||
<property name="logPattern" value="[ %-5level] [%date{yyyy-MM-dd HH:mm:ss.SSS}] %logger{36} [%line] [%thread]- %msg%n"></property> |
||||
<!-- %d{yyyy-MM-dd HH:mm:ss.SSS} [%-5level] %logger - %msg%n --> |
||||
<!-- %d{yyyy-MM-dd HH:mm:ss} %-4relative [%thread] %-5level %logger{35} - %msg %n --> |
||||
<!-- [ %-5level] [%date{yyyy-MM-dd HH:mm:ss.SSS}] %logger{96} [%line] [%thread]- %msg%n --> |
||||
|
||||
<!-- 动态日志级别 --> |
||||
<jmxConfigurator/> |
||||
|
||||
<!-- 控制台的标准输出 --> |
||||
<appender name="CONSOLE" class="ch.qos.logback.core.ConsoleAppender"> |
||||
<!--此日志appender是为开发使用,只配置最底级别,控制台输出的日志级别是大于或等于此级别的日志信息--> |
||||
<filter class="ch.qos.logback.classic.filter.ThresholdFilter"> |
||||
<level>debug</level> |
||||
</filter> |
||||
<encoder> |
||||
<charset>UTF-8</charset> |
||||
<!-- 控制台输出使用默认的输出模版(可以彩色打印)--> |
||||
<pattern>${CONSOLE_LOG_PATTERN}</pattern> |
||||
</encoder> |
||||
</appender> |
||||
|
||||
<!-- DUBUG 日志记录 --> |
||||
<appender name="FILE_DEBUG" class="ch.qos.logback.core.rolling.RollingFileAppender"> |
||||
<file>${logPath}/debug/${appName}_debug.log</file> |
||||
<!-- 日志记录器的滚动策略,按日期,按大小记录 --> |
||||
<rollingPolicy class="ch.qos.logback.core.rolling.SizeAndTimeBasedRollingPolicy"> |
||||
<!-- 归档的日志文件的路径,例如今天是2022-11-06日志,当前写的日志文件路径为file节点指定, |
||||
可以将此文件与file指定文件路径设置为不同路径,从而将当前日志文件或归档日志文件置不同的目录。 |
||||
而2022-11-06的日志文件在由fileNamePattern指定。%d{yyyy-MM-dd}指定日期格式,%i指定索引 --> |
||||
<fileNamePattern>${logPath}/debug/${appName}_debug-%d{yyyy-MM-dd}.%i.log.gz</fileNamePattern> |
||||
<maxFileSize>128MB</maxFileSize> |
||||
<maxHistory>${maxHistory}</maxHistory> |
||||
<totalSizeCap>10GB</totalSizeCap> |
||||
</rollingPolicy> |
||||
<encoder> |
||||
<pattern>${logPattern}</pattern> |
||||
<charset>utf-8</charset> |
||||
</encoder> |
||||
<filter class="ch.qos.logback.classic.filter.LevelFilter"> |
||||
<level>DEBUG</level> |
||||
<onMatch>ACCEPT</onMatch> |
||||
<onMismatch>DENY</onMismatch> |
||||
</filter> |
||||
</appender> |
||||
|
||||
<!-- INFO 级别的日志记录 --> |
||||
<appender name="FILE_INFO" class="ch.qos.logback.core.rolling.RollingFileAppender"> |
||||
<file>${logPath}/info/${appName}_info.log</file> |
||||
<!-- 日志记录器的滚动策略,按日期,按大小记录 --> |
||||
<rollingPolicy class="ch.qos.logback.core.rolling.SizeAndTimeBasedRollingPolicy"> |
||||
<!-- 归档的日志文件的路径,例如今天是2022-11-06日志,当前写的日志文件路径为file节点指定, |
||||
可以将此文件与file指定文件路径设置为不同路径,从而将当前日志文件或归档日志文件置不同的目录。 |
||||
而2022-11-06的日志文件在由fileNamePattern指定。%d{yyyy-MM-dd}指定日期格式,%i指定索引 --> |
||||
<fileNamePattern>${logPath}/info/${appName}_info-%d{yyyy-MM-dd}.%i.log.gz</fileNamePattern> |
||||
<maxFileSize>128MB</maxFileSize> |
||||
<maxHistory>${maxHistory}</maxHistory> |
||||
<totalSizeCap>10GB</totalSizeCap> |
||||
</rollingPolicy> |
||||
<encoder> |
||||
<pattern>${logPattern}</pattern> |
||||
<charset>utf-8</charset> |
||||
</encoder> |
||||
<filter class="ch.qos.logback.classic.filter.LevelFilter"> |
||||
<level>INFO</level> |
||||
<onMatch>ACCEPT</onMatch> |
||||
<onMismatch>DENY</onMismatch> |
||||
</filter> |
||||
</appender> |
||||
|
||||
<!-- WARN 级别的日志记录 --> |
||||
<appender name="FILE_WARN" class="ch.qos.logback.core.rolling.RollingFileAppender"> |
||||
<file>${logPath}/warn/${appName}_warn.log</file> |
||||
<!-- 日志记录器的滚动策略,按日期,按大小记录 --> |
||||
<rollingPolicy class="ch.qos.logback.core.rolling.SizeAndTimeBasedRollingPolicy"> |
||||
<!-- 归档的日志文件的路径,例如今天是2022-11-06日志,当前写的日志文件路径为file节点指定, |
||||
可以将此文件与file指定文件路径设置为不同路径,从而将当前日志文件或归档日志文件置不同的目录。 |
||||
而2022-11-06的日志文件在由fileNamePattern指定。%d{yyyy-MM-dd}指定日期格式,%i指定索引 --> |
||||
<fileNamePattern>${logPath}/warn/${appName}_warn-%d{yyyy-MM-dd}.%i.log.gz</fileNamePattern> |
||||
<maxFileSize>128MB</maxFileSize> |
||||
<maxHistory>${maxHistory}</maxHistory> |
||||
<totalSizeCap>10GB</totalSizeCap> |
||||
</rollingPolicy> |
||||
<encoder> |
||||
<pattern>${logPattern}</pattern> |
||||
<charset>utf-8</charset> |
||||
</encoder> |
||||
<filter class="ch.qos.logback.classic.filter.LevelFilter"> |
||||
<level>WARN</level> |
||||
<onMatch>ACCEPT</onMatch> |
||||
<onMismatch>DENY</onMismatch> |
||||
</filter> |
||||
</appender> |
||||
|
||||
<!-- Error 级别的日志记录 --> |
||||
<appender name="FILE_ERROR" class="ch.qos.logback.core.rolling.RollingFileAppender"> |
||||
<file>${logPath}/error/${appName}_error.log</file> |
||||
<!-- 日志记录器的滚动策略,按日期,按大小记录 --> |
||||
<rollingPolicy class="ch.qos.logback.core.rolling.SizeAndTimeBasedRollingPolicy"> |
||||
<!-- 归档的日志文件的路径,例如今天是2022-11-06日志,当前写的日志文件路径为file节点指定, |
||||
可以将此文件与file指定文件路径设置为不同路径,从而将当前日志文件或归档日志文件置不同的目录。 |
||||
而2022-11-06的日志文件在由fileNamePattern指定。%d{yyyy-MM-dd}指定日期格式,%i指定索引 --> |
||||
<fileNamePattern>${logPath}/error/${appName}_error-%d{yyyy-MM-dd}.%i.log.gz</fileNamePattern> |
||||
<maxFileSize>128MB</maxFileSize> |
||||
<maxHistory>${maxHistory}</maxHistory> |
||||
<totalSizeCap>10GB</totalSizeCap> |
||||
</rollingPolicy> |
||||
<encoder> |
||||
<pattern>${logPattern}</pattern> |
||||
<charset>utf-8</charset> |
||||
</encoder> |
||||
<filter class="ch.qos.logback.classic.filter.LevelFilter"> |
||||
<level>ERROR</level> |
||||
<onMatch>ACCEPT</onMatch> |
||||
<onMismatch>DENY</onMismatch> |
||||
</filter> |
||||
</appender> |
||||
|
||||
<!-- ASYNC_LOG_DEBUG --> |
||||
<appender name="ASYNC_LOG_DEBUG" class="ch.qos.logback.classic.AsyncAppender"> |
||||
<!-- 不丢失日志.默认的,如果队列的80%已满,则会丢弃TRACT、DEBUG、INFO级别的日志 --> |
||||
<discardingThreshold>0</discardingThreshold> |
||||
<!-- 更改默认的队列的深度,该值会影响性能.默认值为256 --> |
||||
<queueSize>${queueSize}</queueSize> |
||||
<!-- 设置该属性 logback 会使用 ArrayBlockingQueue 的非阻塞方法 offer 代替 put, 防止在队列满时阻塞业务线程 --> |
||||
<neverBlock>true</neverBlock> |
||||
<appender-ref ref="FILE_DEBUG"/> |
||||
</appender> |
||||
|
||||
<!-- ASYNC_LOG_INFO --> |
||||
<appender name="ASYNC_LOG_INFO" class="ch.qos.logback.classic.AsyncAppender"> |
||||
<!-- 不丢失日志.默认的,如果队列的80%已满,则会丢弃TRACT、DEBUG、INFO级别的日志 --> |
||||
<discardingThreshold>0</discardingThreshold> |
||||
<!-- 更改默认的队列的深度,该值会影响性能.默认值为256 --> |
||||
<queueSize>${queueSize}</queueSize> |
||||
<!-- 设置该属性 logback 会使用 ArrayBlockingQueue 的非阻塞方法 offer 代替 put, 防止在队列满时阻塞业务线程 --> |
||||
<neverBlock>true</neverBlock> |
||||
<appender-ref ref="FILE_INFO"/> |
||||
</appender> |
||||
|
||||
<!-- ASYNC_LOG_WARN --> |
||||
<appender name="ASYNC_LOG_WARN" class="ch.qos.logback.classic.AsyncAppender"> |
||||
<!-- 不丢失日志.默认的,如果队列的80%已满,则会丢弃TRACT、DEBUG、INFO级别的日志 --> |
||||
<discardingThreshold>0</discardingThreshold> |
||||
<!-- 更改默认的队列的深度,该值会影响性能.默认值为256 --> |
||||
<queueSize>${queueSize}</queueSize> |
||||
<!-- 设置该属性 logback 会使用 ArrayBlockingQueue 的非阻塞方法 offer 代替 put, 防止在队列满时阻塞业务线程 --> |
||||
<neverBlock>true</neverBlock> |
||||
<appender-ref ref="FILE_WARN"/> |
||||
</appender> |
||||
|
||||
<!--ASYNC_LOG_ERROR --> |
||||
<appender name="ASYNC_LOG_ERROR" class="ch.qos.logback.classic.AsyncAppender"> |
||||
<!-- 不丢失日志.默认的,如果队列的80%已满,则会丢弃TRACT、DEBUG、INFO级别的日志 --> |
||||
<discardingThreshold>0</discardingThreshold> |
||||
<!-- 更改默认的队列的深度,该值会影响性能.默认值为256 --> |
||||
<queueSize>${queueSize}</queueSize> |
||||
<!-- 设置该属性 logback 会使用 ArrayBlockingQueue 的非阻塞方法 offer 代替 put, 防止在队列满时阻塞业务线程 --> |
||||
<neverBlock>true</neverBlock> |
||||
<appender-ref ref="FILE_ERROR"/> |
||||
</appender> |
||||
|
||||
|
||||
<!-- <logger name="com.keyware.composeanalysis" level ="DEBUG">--> |
||||
<!-- <!– 引用的appender,类似于spring的ref –>--> |
||||
<!-- <appender-ref ref="CONSOLE" />--> |
||||
<!-- </logger>--> |
||||
|
||||
|
||||
|
||||
<!-- 在定义后引用APPENDER --> |
||||
<!-- <root level="DEBUG">--> |
||||
<root level="INFO"> |
||||
<appender-ref ref="CONSOLE" /> |
||||
<appender-ref ref="ASYNC_LOG_DEBUG"/> |
||||
<appender-ref ref="ASYNC_LOG_INFO"/> |
||||
<appender-ref ref="ASYNC_LOG_WARN"/> |
||||
<appender-ref ref="ASYNC_LOG_ERROR"/> |
||||
</root> |
||||
</configuration> |
@ -0,0 +1,25 @@ |
||||
<?xml version="1.0" encoding="UTF-8"?> |
||||
<!DOCTYPE mapper PUBLIC "-//mybatis.org//DTD Mapper 3.0//EN" "http://mybatis.org/dtd/mybatis-3-mapper.dtd"> |
||||
<mapper namespace="com.keyware.composeanalysis.mapper.AnalyzeTaskMapper"> |
||||
|
||||
<!-- 通用查询映射结果 --> |
||||
<resultMap id="BaseResultMap" type="com.keyware.composeanalysis.entity.AnalysisTask"> |
||||
<id column="id" property="id" /> |
||||
<result column="file_name" property="fileName" /> |
||||
<result column="version" property="version" /> |
||||
<result column="open_rate_threshold" property="openRateThreshold" /> |
||||
<result column="open_type" property="openType" /> |
||||
<result column="md5" property="md5" /> |
||||
<result column="analysis_status" property="analysisStatus" /> |
||||
<result column="analysis_start_time" property="analysisStartTime" /> |
||||
<result column="analysis_end_time" property="analysisEndTime" /> |
||||
<result column="compose_flag" property="composeFlag" /> |
||||
<result column="assembly_flag" property="assemblyFlag" /> |
||||
<result column="hold_flag" property="holdFlag" /> |
||||
<result column="licence_flag" property="licenceFlag" /> |
||||
<result column="decompression_flag" property="decompressionFlag" /> |
||||
<result column="create_time" property="createTime" /> |
||||
<result column="create_user_id" property="createUserId" /> |
||||
</resultMap> |
||||
|
||||
</mapper> |
Loading…
Reference in new issue