优化:文件进行ES写入逻辑

master
guoxin 1 year ago
parent eef9d53218
commit 7a04220205
  1. 21
      shandan-bianmu/src/main/java/com/keyware/shandan/bianmu/es/consumer/EsSysFileQueueConsumer.java
  2. 72
      shandan-common/src/main/java/com/keyware/shandan/common/util/PoiFileReadUtil.java

@ -84,6 +84,27 @@ public class EsSysFileQueueConsumer extends Thread {
String targetNumber = DictUtil.getDictName("target_type", file.getTargetNumber()); String targetNumber = DictUtil.getDictName("target_type", file.getTargetNumber());
file.setTargetNumber(StringUtils.hasText(targetNumber) ? targetNumber : file.getTargetNumber()); file.setTargetNumber(StringUtils.hasText(targetNumber) ? targetNumber : file.getTargetNumber());
} }
if (StringUtils.hasText(file.getEquipmentModel())) {
String value = DictUtil.getDictName("equipment_model", file.getEquipmentModel());
file.setEquipmentModel(StringUtils.hasText(value) ? value : file.getEquipmentModel());
}
if (StringUtils.hasText(file.getTaskCode())) {
String value = DictUtil.getDictName("task_code", file.getTaskCode());
file.setTaskCode(StringUtils.hasText(value) ? value : file.getTaskCode());
}
if (StringUtils.hasText(file.getTroopCode())) {
String value = DictUtil.getDictName("troop_code", file.getTaskCode());
file.setTroopCode(StringUtils.hasText(value) ? value : file.getTroopCode());
}
if (StringUtils.hasText(file.getTargetNumber())) {
String value = DictUtil.getDictName("target_number", file.getTargetNumber());
file.setTargetNumber(StringUtils.hasText(value) ? value : file.getTargetNumber());
}
if (StringUtils.hasText(file.getMissileNumber())) {
String value = DictUtil.getDictName("missile_number", file.getMissileNumber());
file.setMissileNumber(StringUtils.hasText(value) ? value : file.getMissileNumber());
}
file.setLabels(null);
} catch (Exception e) { } catch (Exception e) {
e.printStackTrace(); e.printStackTrace();
} }

@ -26,6 +26,8 @@ import javax.swing.text.DefaultStyledDocument;
import javax.swing.text.rtf.RTFEditorKit; import javax.swing.text.rtf.RTFEditorKit;
import java.io.*; import java.io.*;
import java.nio.charset.StandardCharsets; import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.util.Arrays;
import java.util.List; import java.util.List;
import java.util.function.Consumer; import java.util.function.Consumer;
@ -36,6 +38,7 @@ import java.util.function.Consumer;
* @since 2021/6/29 * @since 2021/6/29
*/ */
public class PoiFileReadUtil { public class PoiFileReadUtil {
private final static String[] readilyFileTypes = {"doc", "docx", "xls", "xlsx", "ppt", "pptx", "rtf", "pdf", "txt"};
/** /**
* 解析文件文本内容 * 解析文件文本内容
@ -67,11 +70,28 @@ public class PoiFileReadUtil {
case "txt": case "txt":
return readContentByTxt(file); return readContentByTxt(file);
default: default:
return ""; if (isText(file)) {
return readContentByTxt(file);
}
} }
} else { } else {
throw new Exception("文件不存在"); throw new FileNotFoundException("文件不存在");
}
return null;
}
/**
* 判断是否为可读文件
*
* @param file 文件
* @return
*/
public static boolean isReadilyFile(File file) {
String fileSuffix = FileUtil.extName(file);
if (Arrays.asList(readilyFileTypes).contains(fileSuffix)) {
return true;
} }
return isText(file);
} }
/** /**
@ -81,7 +101,7 @@ public class PoiFileReadUtil {
* @return 文件内容 * @return 文件内容
*/ */
private static String readContentByDoc(File file) throws IOException { private static String readContentByDoc(File file) throws IOException {
InputStream fis = new FileInputStream(file); InputStream fis = Files.newInputStream(file.toPath());
WordExtractor wordExtractor = new WordExtractor(fis);//使用HWPF组件中WordExtractor类从Word文档中提取文本或段落 WordExtractor wordExtractor = new WordExtractor(fis);//使用HWPF组件中WordExtractor类从Word文档中提取文本或段落
StringBuilder result = new StringBuilder(); StringBuilder result = new StringBuilder();
for (String words : wordExtractor.getParagraphText()) {//获取段落内容 for (String words : wordExtractor.getParagraphText()) {//获取段落内容
@ -115,7 +135,7 @@ public class PoiFileReadUtil {
* @return 文件内容 * @return 文件内容
*/ */
private static String readContentByXls(File file) throws IOException { private static String readContentByXls(File file) throws IOException {
InputStream is = new FileInputStream(file); InputStream is = Files.newInputStream(file.toPath());
HSSFWorkbook wb = new HSSFWorkbook(new POIFSFileSystem(is)); HSSFWorkbook wb = new HSSFWorkbook(new POIFSFileSystem(is));
ExcelExtractor extractor = new ExcelExtractor(wb); ExcelExtractor extractor = new ExcelExtractor(wb);
extractor.setFormulasNotResults(false); extractor.setFormulasNotResults(false);
@ -133,7 +153,7 @@ public class PoiFileReadUtil {
* @return 文件内容 * @return 文件内容
*/ */
private static String readContentByXlsx(File file) throws IOException { private static String readContentByXlsx(File file) throws IOException {
InputStream is = new FileInputStream(file); InputStream is = Files.newInputStream(file.toPath());
XSSFExcelExtractor extractor = new XSSFExcelExtractor(new XSSFWorkbook(is)); XSSFExcelExtractor extractor = new XSSFExcelExtractor(new XSSFWorkbook(is));
extractor.setIncludeSheetNames(false); extractor.setIncludeSheetNames(false);
String result = extractor.getText(); String result = extractor.getText();
@ -174,12 +194,12 @@ public class PoiFileReadUtil {
private static String readContentByRtf(File file) throws IOException, BadLocationException { private static String readContentByRtf(File file) throws IOException, BadLocationException {
DefaultStyledDocument styledDoc = new DefaultStyledDocument(); DefaultStyledDocument styledDoc = new DefaultStyledDocument();
// 创建文件输入流 // 创建文件输入流
InputStream is = new FileInputStream(file); InputStream is = Files.newInputStream(file.toPath());
new RTFEditorKit().read(is, styledDoc, 0); new RTFEditorKit().read(is, styledDoc, 0);
is.close(); is.close();
byte[] buff = styledDoc.getText(0, styledDoc.getLength()).getBytes(StandardCharsets.ISO_8859_1); byte[] buff = styledDoc.getText(0, styledDoc.getLength()).getBytes(StandardCharsets.ISO_8859_1);
return new String(buff, get_charset(buff)); return new String(buff, getCharset(buff));
} }
/** /**
@ -190,7 +210,7 @@ public class PoiFileReadUtil {
*/ */
private static String readContentByPpt(File file) throws IOException { private static String readContentByPpt(File file) throws IOException {
// word 2003: 图片不会被读取 // word 2003: 图片不会被读取
InputStream fis = new FileInputStream(file); InputStream fis = Files.newInputStream(file.toPath());
PowerPointExtractor ex = new PowerPointExtractor(fis); PowerPointExtractor ex = new PowerPointExtractor(fis);
String text = ex.getText().replace("\n", ""); String text = ex.getText().replace("\n", "");
ex.close(); ex.close();
@ -205,7 +225,7 @@ public class PoiFileReadUtil {
* @return 文件内容 * @return 文件内容
*/ */
private static String readContentByPptx(File file) throws IOException { private static String readContentByPptx(File file) throws IOException {
InputStream is = new FileInputStream(file); InputStream is = Files.newInputStream(file.toPath());
XMLSlideShow slide = new XMLSlideShow(is); XMLSlideShow slide = new XMLSlideShow(is);
XSLFPowerPointExtractor extractor = new XSLFPowerPointExtractor(slide); XSLFPowerPointExtractor extractor = new XSLFPowerPointExtractor(slide);
extractor.close(); extractor.close();
@ -221,7 +241,30 @@ public class PoiFileReadUtil {
*/ */
private static String readContentByTxt(File file) throws IOException { private static String readContentByTxt(File file) throws IOException {
FileInputStream fis = new FileInputStream(file); FileInputStream fis = new FileInputStream(file);
return getCharset(fis); return getFileText(fis);
}
/**
* 判断文件是否为文本格式的文件
*
* @param file
* @return
*/
public static boolean isText(File file) {
boolean isText = true;
try (FileInputStream fin = new FileInputStream(file)) {
long len = file.length();
for (int j = 0; j < (int) len; j++) {
int t = fin.read();
if (t < 32 && t != 9 && t != 10 && t != 13) {
isText = false;
break;
}
}
} catch (Exception e) {
e.printStackTrace();
}
return isText;
} }
/** /**
@ -231,7 +274,7 @@ public class PoiFileReadUtil {
* @return - * @return -
* @throws IOException - * @throws IOException -
*/ */
public static String getCharset(InputStream is) throws IOException { public static String getFileText(InputStream is) throws IOException {
BufferedInputStream bis = new BufferedInputStream(is); BufferedInputStream bis = new BufferedInputStream(is);
int len; int len;
@ -251,7 +294,7 @@ public class PoiFileReadUtil {
bis.close(); bis.close();
is.close(); is.close();
return new String(buffer, get_charset(buffer)); return new String(buffer, getCharset(buffer));
} }
/** /**
@ -261,7 +304,7 @@ public class PoiFileReadUtil {
* @return - * @return -
* @throws IOException - * @throws IOException -
*/ */
private static String get_charset(byte[] file) throws IOException { private static String getCharset(byte[] file) throws IOException {
String charset = "GBK"; String charset = "GBK";
byte[] first3Bytes = new byte[3]; byte[] first3Bytes = new byte[3];
InputStream bis = null; InputStream bis = null;
@ -330,7 +373,7 @@ public class PoiFileReadUtil {
public static void convertToUTF8(MultipartFile file, Consumer<? super MultipartFile> action) throws IOException { public static void convertToUTF8(MultipartFile file, Consumer<? super MultipartFile> action) throws IOException {
File temp = new File(file.getName()); File temp = new File(file.getName());
String charset = get_charset(file.getBytes()); String charset = getCharset(file.getBytes());
if ("UTF-8".equalsIgnoreCase(charset)) { if ("UTF-8".equalsIgnoreCase(charset)) {
action.accept(file); action.accept(file);
} }
@ -351,4 +394,5 @@ public class PoiFileReadUtil {
action.accept(toMultipartFile); action.accept(toMultipartFile);
temp.deleteOnExit(); temp.deleteOnExit();
} }
} }