|
|
@ -3,9 +3,11 @@ package com.keyware.composeanalysis.task; |
|
|
|
|
|
|
|
|
|
|
|
import cn.hutool.core.collection.CollUtil; |
|
|
|
import cn.hutool.core.collection.CollUtil; |
|
|
|
import cn.hutool.core.collection.CollectionUtil; |
|
|
|
import cn.hutool.core.collection.CollectionUtil; |
|
|
|
|
|
|
|
import cn.hutool.core.io.FileUtil; |
|
|
|
import cn.hutool.core.lang.Pair; |
|
|
|
import cn.hutool.core.lang.Pair; |
|
|
|
import com.alibaba.fastjson.JSONArray; |
|
|
|
import com.alibaba.fastjson.JSONArray; |
|
|
|
import com.keyware.common.constant.enums.AnalysisStatusEnum; |
|
|
|
import com.keyware.common.constant.enums.AnalysisStatusEnum; |
|
|
|
|
|
|
|
import com.keyware.common.exception.BusinessException; |
|
|
|
import com.keyware.composeanalysis.constant.FixedValue; |
|
|
|
import com.keyware.composeanalysis.constant.FixedValue; |
|
|
|
import com.keyware.composeanalysis.constant.RedisConst; |
|
|
|
import com.keyware.composeanalysis.constant.RedisConst; |
|
|
|
import com.keyware.composeanalysis.constant.SolrDBConst; |
|
|
|
import com.keyware.composeanalysis.constant.SolrDBConst; |
|
|
@ -16,6 +18,7 @@ import com.keyware.composeanalysis.mongo.FileDataMongoDto; |
|
|
|
import com.keyware.composeanalysis.mongo.LineDataMongoDto; |
|
|
|
import com.keyware.composeanalysis.mongo.LineDataMongoDto; |
|
|
|
import com.keyware.composeanalysis.mongo.MatchOpenFile; |
|
|
|
import com.keyware.composeanalysis.mongo.MatchOpenFile; |
|
|
|
import com.keyware.composeanalysis.mongo.MatchOpenFileMongoDto; |
|
|
|
import com.keyware.composeanalysis.mongo.MatchOpenFileMongoDto; |
|
|
|
|
|
|
|
import com.keyware.composeanalysis.solr.FunctionInfo; |
|
|
|
import com.keyware.composeanalysis.solr.VersionTree; |
|
|
|
import com.keyware.composeanalysis.solr.VersionTree; |
|
|
|
import com.keyware.composeanalysis.util.*; |
|
|
|
import com.keyware.composeanalysis.util.*; |
|
|
|
import com.keyware.keyswan.common.LineModel; |
|
|
|
import com.keyware.keyswan.common.LineModel; |
|
|
@ -101,7 +104,11 @@ public class FunctionAnalysisTask extends IAnalysisTask { |
|
|
|
String sourceFileBaseCoreName = FixedValue.SUFFIX_SOLR_VERSION.get(analysisFile.getSuffix()); |
|
|
|
String sourceFileBaseCoreName = FixedValue.SUFFIX_SOLR_VERSION.get(analysisFile.getSuffix()); |
|
|
|
|
|
|
|
|
|
|
|
//根据文件的名称获取函数解析器
|
|
|
|
//根据文件的名称获取函数解析器
|
|
|
|
Analysis analysis = AnalysisFactory.getAnalysis(filePath); |
|
|
|
Analysis analysis = AnalysisFactory.getAnalysis(fileName); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if (analysis == null){ |
|
|
|
|
|
|
|
throw new BusinessException("获取文件解析器失败,文件名称:"+fileName); |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
//解析文件
|
|
|
|
//解析文件
|
|
|
|
CodeFile codeFile = analysis.analysisFile(new FileInputStream(filePath)); |
|
|
|
CodeFile codeFile = analysis.analysisFile(new FileInputStream(filePath)); |
|
|
@ -123,7 +130,7 @@ public class FunctionAnalysisTask extends IAnalysisTask { |
|
|
|
log.info("文件" + fileName + ":函数级分析完成"); |
|
|
|
log.info("文件" + fileName + ":函数级分析完成"); |
|
|
|
} catch (Exception e) { |
|
|
|
} catch (Exception e) { |
|
|
|
AnalysisLogUtil.insertErrorInfo(mongoTemplate, "【函数级级分析】失败" + fileName, e); |
|
|
|
AnalysisLogUtil.insertErrorInfo(mongoTemplate, "【函数级级分析】失败" + fileName, e); |
|
|
|
log.error("文件:" + fileName + "函数级别特征提取失败!", e); |
|
|
|
log.error("文件:" + fileName + "【函数级级分析】失败!", e); |
|
|
|
//修改当前文件分析状态未失败
|
|
|
|
//修改当前文件分析状态未失败
|
|
|
|
mongoTemplate.update(FileDataMongoDto.class) |
|
|
|
mongoTemplate.update(FileDataMongoDto.class) |
|
|
|
.matching(where("_id").is(analysisFile.getId())) |
|
|
|
.matching(where("_id").is(analysisFile.getId())) |
|
|
@ -152,7 +159,7 @@ public class FunctionAnalysisTask extends IAnalysisTask { |
|
|
|
Map<String, List<Function>> featureMd5FunctionMap = fileAnalysisRes.getFunctionList().stream().collect(Collectors.groupingBy(Function::getMd5)); |
|
|
|
Map<String, List<Function>> featureMd5FunctionMap = fileAnalysisRes.getFunctionList().stream().collect(Collectors.groupingBy(Function::getMd5)); |
|
|
|
|
|
|
|
|
|
|
|
//函数代码总函数
|
|
|
|
//函数代码总函数
|
|
|
|
int totalFunctionLineCount = fileAnalysisRes.getFunctionList().stream().mapToInt(Function::getCodeRowNum).sum(); |
|
|
|
BigDecimal totalFunctionLineCount = new BigDecimal(fileAnalysisRes.getFunctionList().stream().mapToInt(Function::getCodeRowNum).sum()); |
|
|
|
|
|
|
|
|
|
|
|
//匹配到的特征函数Md5
|
|
|
|
//匹配到的特征函数Md5
|
|
|
|
Set<String> matchFeatureFunctionMd5s = new HashSet(); |
|
|
|
Set<String> matchFeatureFunctionMd5s = new HashSet(); |
|
|
@ -169,7 +176,7 @@ public class FunctionAnalysisTask extends IAnalysisTask { |
|
|
|
matchFunctionLineCount += featureMd5FunctionMap.get(matchFeatureFunctionMd5).stream().mapToInt(Function::getCodeRowNum).sum(); |
|
|
|
matchFunctionLineCount += featureMd5FunctionMap.get(matchFeatureFunctionMd5).stream().mapToInt(Function::getCodeRowNum).sum(); |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
BigDecimal featureSimilarity = new BigDecimal(matchFunctionLineCount).divide(new BigDecimal(totalFunctionLineCount), 4, RoundingMode.HALF_UP).multiply(new BigDecimal(100)).setScale(2); |
|
|
|
BigDecimal featureSimilarity = new BigDecimal(matchFunctionLineCount).divide(totalFunctionLineCount, 4, RoundingMode.HALF_UP).multiply(new BigDecimal(100)).setScale(2); |
|
|
|
|
|
|
|
|
|
|
|
//计算文件的总体开源率
|
|
|
|
//计算文件的总体开源率
|
|
|
|
BigDecimal openRate = new BigDecimal(matchOpenLineRowsNum.size()).divide(new BigDecimal(analysisFile.getCodeRowNum()), 4, RoundingMode.HALF_UP).multiply(new BigDecimal(100)).setScale(2); |
|
|
|
BigDecimal openRate = new BigDecimal(matchOpenLineRowsNum.size()).divide(new BigDecimal(analysisFile.getCodeRowNum()), 4, RoundingMode.HALF_UP).multiply(new BigDecimal(100)).setScale(2); |
|
|
@ -182,16 +189,19 @@ public class FunctionAnalysisTask extends IAnalysisTask { |
|
|
|
analysisFile.setOpenType(true); |
|
|
|
analysisFile.setOpenType(true); |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
//保存当前文件开源行数
|
|
|
|
|
|
|
|
analysisFile.setOpenLineCount(matchOpenLineRowsNum.size()); |
|
|
|
|
|
|
|
|
|
|
|
//保存当前文件的开源信息到mongo库中
|
|
|
|
//保存当前文件的开源信息到mongo库中
|
|
|
|
MatchOpenFileMongoDto matchOpenFileMongo = new MatchOpenFileMongoDto(); |
|
|
|
MatchOpenFileMongoDto matchOpenFileMongo = new MatchOpenFileMongoDto(); |
|
|
|
matchOpenFileMongo.setId(IdGenerator.uuid32()) |
|
|
|
matchOpenFileMongo.setId(IdGenerator.uuid32()) |
|
|
|
.setFilePath(analysisFile.getFileUrl()) |
|
|
|
|
|
|
|
.setFileName(analysisFile.getName()) |
|
|
|
.setFileName(analysisFile.getName()) |
|
|
|
|
|
|
|
.setFilePath(analysisFile.getFileUrl()) |
|
|
|
|
|
|
|
.setOpenType(analysisFile.getOpenType()) |
|
|
|
.setFeatureSimilarity(featureSimilarity.floatValue()) |
|
|
|
.setFeatureSimilarity(featureSimilarity.floatValue()) |
|
|
|
.setOpenRate(openRate.floatValue()) |
|
|
|
.setOpenRate(openRate.floatValue()) |
|
|
|
.setOpenType(analysisFile.getOpenType()) |
|
|
|
.setAnalysisType(AnalysisLevelEnum.FUNCTION_LEVEL.getCode()) |
|
|
|
.setMatchOpenFile(matchOpenFilesRes); |
|
|
|
.setSubMatchOpenFiles(matchOpenFilesRes); |
|
|
|
|
|
|
|
|
|
|
|
mongoTemplate.save(matchOpenFileMongo); |
|
|
|
mongoTemplate.save(matchOpenFileMongo); |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
@ -226,13 +236,20 @@ public class FunctionAnalysisTask extends IAnalysisTask { |
|
|
|
//函数总行数
|
|
|
|
//函数总行数
|
|
|
|
BigDecimal totalFunctionLineCount = new BigDecimal(fileAnalysisRes.getFunctionList().stream().mapToInt(Function::getCodeRowNum).sum()); |
|
|
|
BigDecimal totalFunctionLineCount = new BigDecimal(fileAnalysisRes.getFunctionList().stream().mapToInt(Function::getCodeRowNum).sum()); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
//被测件文本内容
|
|
|
|
|
|
|
|
String sourcefileContent= FileUtil.readUtf8String(analysisFile.getFileUrl()); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
//将文本内容解析成行信息,用于后续文件的开源率计算
|
|
|
|
|
|
|
|
List<String> analysisFileLineInfo = SimilarityUtil.getSplitWords(sourcefileContent); |
|
|
|
|
|
|
|
|
|
|
|
for (SolrDocument openSourceFile : matchOpenFiles) { |
|
|
|
for (SolrDocument openSourceFile : matchOpenFiles) { |
|
|
|
|
|
|
|
|
|
|
|
//开源文件md5
|
|
|
|
//开源文件md5
|
|
|
|
String openSourceFileMd5 = openSourceFile.getFieldValue("sourceMd5").toString(); |
|
|
|
String openSourceFileMd5 = openSourceFile.getFieldValue("sourceMd5").toString(); |
|
|
|
|
|
|
|
|
|
|
|
//解析文件的函数特征值
|
|
|
|
//解析文件的函数特征值
|
|
|
|
List<Function> openFileFunctionList = getOpenFileFunctionList(openSourceFile); |
|
|
|
List<FunctionInfo> openFileFunctionList = getOpenFileFunctionList(openSourceFile); |
|
|
|
|
|
|
|
|
|
|
|
//根据源文件的MD5确定需要查询源码库的序号
|
|
|
|
//根据源文件的MD5确定需要查询源码库的序号
|
|
|
|
String openSourceCodeCoreIndex = openSourceFileMd5.substring(0, 1) + SolrDBConst.CORE_NAME_SUFFIX_SOURCE_FILE_INFO; |
|
|
|
String openSourceCodeCoreIndex = openSourceFileMd5.substring(0, 1) + SolrDBConst.CORE_NAME_SUFFIX_SOURCE_FILE_INFO; |
|
|
@ -250,8 +267,8 @@ public class FunctionAnalysisTask extends IAnalysisTask { |
|
|
|
for (String funFeatureMd5 : featureMd5FunctionMap.keySet()) { |
|
|
|
for (String funFeatureMd5 : featureMd5FunctionMap.keySet()) { |
|
|
|
List<Function> currentFueatureFunctionList = featureMd5FunctionMap.get(funFeatureMd5); |
|
|
|
List<Function> currentFueatureFunctionList = featureMd5FunctionMap.get(funFeatureMd5); |
|
|
|
//源文件的特征函数列表
|
|
|
|
//源文件的特征函数列表
|
|
|
|
for (Function openFunction : openFileFunctionList) { |
|
|
|
for (FunctionInfo openFunction : openFileFunctionList) { |
|
|
|
if (funFeatureMd5.equals(openFunction.getMd5())) { |
|
|
|
if (funFeatureMd5.equals(openFunction.getTraitFunMd5())) { |
|
|
|
//每个特征函数 不能多次匹配,影响整体特征相似度
|
|
|
|
//每个特征函数 不能多次匹配,影响整体特征相似度
|
|
|
|
//匹配成功后,相同的特征行 一并加上
|
|
|
|
//匹配成功后,相同的特征行 一并加上
|
|
|
|
if (!currentFileMatchFeatureFunctionMd5.contains(funFeatureMd5)) { |
|
|
|
if (!currentFileMatchFeatureFunctionMd5.contains(funFeatureMd5)) { |
|
|
@ -264,7 +281,7 @@ public class FunctionAnalysisTask extends IAnalysisTask { |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
//当前文件的开源率
|
|
|
|
//当前文件的开源率
|
|
|
|
Pair<Float, HashSet<Integer>> openRateAndSaveRowNum = SimilarityUtil.getOpenRateAndSaveRowNum(new String(fileAnalysisRes.getFileContent()), openSourceContent.getFieldValue("sourceContent").toString()); |
|
|
|
Pair<Float, HashSet<Integer>> openRateAndSaveRowNum = SimilarityUtil.getOpenRateAndSaveRowNum(analysisFileLineInfo, openSourceContent.getFieldValue("sourceContent").toString()); |
|
|
|
//将当前文件匹配的行号,存储到缓存中,方便统计整体的开源率
|
|
|
|
//将当前文件匹配的行号,存储到缓存中,方便统计整体的开源率
|
|
|
|
matchLineRowsNum.addAll(openRateAndSaveRowNum.getValue()); |
|
|
|
matchLineRowsNum.addAll(openRateAndSaveRowNum.getValue()); |
|
|
|
|
|
|
|
|
|
|
@ -273,16 +290,22 @@ public class FunctionAnalysisTask extends IAnalysisTask { |
|
|
|
|
|
|
|
|
|
|
|
SolrDocument openEntries = md5VersionInfoMap.get(openSourceFileMd5); |
|
|
|
SolrDocument openEntries = md5VersionInfoMap.get(openSourceFileMd5); |
|
|
|
VersionTree versionInfo = versionIdVersionInfoMap.get(openEntries.get("versionId")); |
|
|
|
VersionTree versionInfo = versionIdVersionInfoMap.get(openEntries.get("versionId")); |
|
|
|
|
|
|
|
String openFilePath = (String) md5VersionInfoMap.get(openSourceFileMd5).getFieldValue("fullPath"); |
|
|
|
|
|
|
|
|
|
|
|
//组装当前开源文件的开源项目信息
|
|
|
|
//组装当前开源文件的开源项目信息
|
|
|
|
MatchOpenFile matchOpenFileInfo = new MatchOpenFile(); |
|
|
|
MatchOpenFile matchOpenFileInfo = new MatchOpenFile(); |
|
|
|
matchOpenFileInfo.setPId(versionInfo.getProId()) |
|
|
|
matchOpenFileInfo.setId(IdGenerator.uuid32()) |
|
|
|
|
|
|
|
.setFileName(FileUtil.getName(openFilePath)) |
|
|
|
.setPName(versionInfo.getProName()) |
|
|
|
.setPName(versionInfo.getProName()) |
|
|
|
.setSourceUrl((String) openEntries.get("fullPath")) |
|
|
|
.setPId(versionInfo.getProId()) |
|
|
|
.setFeatureSimilarity(featureSimilarity.floatValue()) |
|
|
|
|
|
|
|
.setOpenRate(openRateAndSaveRowNum.getKey()) |
|
|
|
|
|
|
|
.setVersion(versionInfo.getVersionName()) |
|
|
|
.setVersion(versionInfo.getVersionName()) |
|
|
|
|
|
|
|
.setVersionId(versionInfo.getVersionId()) |
|
|
|
|
|
|
|
.setSourceFilePath(openFilePath) |
|
|
|
|
|
|
|
.setSourceUrl(versionInfo.getDownUrl()) |
|
|
|
.setLicenseType(versionInfo.getLicenseType()) |
|
|
|
.setLicenseType(versionInfo.getLicenseType()) |
|
|
|
.setAnalyzeType(AnalysisLevelEnum.FUNCTION_LEVEL.getCode()); |
|
|
|
.setFeatureSimilarity(featureSimilarity.floatValue()) |
|
|
|
|
|
|
|
.setOpenRate(openRateAndSaveRowNum.getKey()) |
|
|
|
|
|
|
|
.setMd5(openSourceFileMd5); |
|
|
|
matchOpenFilesRes.add(matchOpenFileInfo); |
|
|
|
matchOpenFilesRes.add(matchOpenFileInfo); |
|
|
|
} |
|
|
|
} |
|
|
|
return matchOpenFilesRes; |
|
|
|
return matchOpenFilesRes; |
|
|
@ -316,18 +339,20 @@ public class FunctionAnalysisTask extends IAnalysisTask { |
|
|
|
* @param matchOpenFile |
|
|
|
* @param matchOpenFile |
|
|
|
* @return |
|
|
|
* @return |
|
|
|
*/ |
|
|
|
*/ |
|
|
|
private List<Function> getOpenFileFunctionList(SolrDocument matchOpenFile) { |
|
|
|
private List<FunctionInfo> getOpenFileFunctionList(SolrDocument matchOpenFile) { |
|
|
|
try { |
|
|
|
try { |
|
|
|
//解析文件的函数特征值
|
|
|
|
//解析文件的函数特征值
|
|
|
|
String lineFeatureMd5s = matchOpenFile.getFieldValue("fun_hay").toString(); |
|
|
|
String lineFeatureMd5s = matchOpenFile.getFieldValue("fun_hay").toString(); |
|
|
|
lineFeatureMd5s = lineFeatureMd5s.replace("\\", "") |
|
|
|
lineFeatureMd5s = lineFeatureMd5s |
|
|
|
|
|
|
|
.replace("\\\\\\\"", "") |
|
|
|
|
|
|
|
.replace("\\", "") |
|
|
|
.replace("\"{", "{") |
|
|
|
.replace("\"{", "{") |
|
|
|
.replace("}\"", "}"); |
|
|
|
.replace("}\"", "}"); |
|
|
|
return JSONArray.parseArray(lineFeatureMd5s, Function.class); |
|
|
|
return JSONArray.parseArray(lineFeatureMd5s, FunctionInfo.class); |
|
|
|
} catch (Exception e) { |
|
|
|
} catch (Exception e) { |
|
|
|
log.error("解析文件特征值失败", e); |
|
|
|
log.error("解析文件特征值失败", e); |
|
|
|
} |
|
|
|
} |
|
|
|
return new ArrayList<Function>(); |
|
|
|
return new ArrayList<>(); |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
/** |
|
|
|
/** |
|
|
|