|
|
|
@ -109,83 +109,120 @@ public class FenpanService {
|
|
|
|
|
this.findAllDir(readUrl, pieces);
|
|
|
|
|
//写入文件
|
|
|
|
|
|
|
|
|
|
List<Future<String>> futures2 = new ArrayList<>();
|
|
|
|
|
ExecutorService executorService2 = Executors.newFixedThreadPool(2);
|
|
|
|
|
for (Pieces piece : pieces) {
|
|
|
|
|
//根据文件类型
|
|
|
|
|
if (piece.getFileTypeEnum() == FileTypeEnum.DOC) {
|
|
|
|
|
//从word 直接提取图片
|
|
|
|
|
//提取图片为document ,然后用于后面判断
|
|
|
|
|
String teamName = "temp/" + UUID.randomUUID();
|
|
|
|
|
File file = new File(teamName);
|
|
|
|
|
if (!file.exists()) {
|
|
|
|
|
file.mkdirs();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
List<Document> documentList = new ArrayList<>();
|
|
|
|
|
ReadImgDoc.readPicture(piece.getAbsolutePath(), file.getAbsolutePath());
|
|
|
|
|
File[] files = file.listFiles();
|
|
|
|
|
for (File pdfImg : files) {
|
|
|
|
|
getDocumentList(documentList, pdfImg);
|
|
|
|
|
}
|
|
|
|
|
Future<String> aa = executorService2.submit(() -> {
|
|
|
|
|
|
|
|
|
|
piece.put(documentList);
|
|
|
|
|
//删除对应的temp 文件
|
|
|
|
|
FileUtils.delete(file);
|
|
|
|
|
} else if (piece.getFileTypeEnum() == FileTypeEnum.DOCX) {
|
|
|
|
|
//从word 直接提取图片
|
|
|
|
|
//提取图片为document ,然后用于后面判断
|
|
|
|
|
String teamName = "temp/" + UUID.randomUUID();
|
|
|
|
|
File file = new File(teamName);
|
|
|
|
|
if (!file.exists()) {
|
|
|
|
|
file.mkdirs();
|
|
|
|
|
}
|
|
|
|
|
//根据文件类型
|
|
|
|
|
if (piece.getFileTypeEnum() == FileTypeEnum.DOC) {
|
|
|
|
|
//从word 直接提取图片
|
|
|
|
|
//提取图片为document ,然后用于后面判断
|
|
|
|
|
String teamName = "temp/" + UUID.randomUUID();
|
|
|
|
|
File file = new File(teamName);
|
|
|
|
|
if (!file.exists()) {
|
|
|
|
|
file.mkdirs();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
List<Document> documentList = new ArrayList<>();
|
|
|
|
|
GetPicsDocx.getPics(piece.getAbsolutePath(), file.getAbsolutePath());
|
|
|
|
|
File[] files = file.listFiles();
|
|
|
|
|
for (File pdfImg : files) {
|
|
|
|
|
getDocumentList(documentList, pdfImg);
|
|
|
|
|
}
|
|
|
|
|
List<Document> documentList = new ArrayList<>();
|
|
|
|
|
ReadImgDoc.readPicture(piece.getAbsolutePath(), file.getAbsolutePath());
|
|
|
|
|
File[] files = file.listFiles();
|
|
|
|
|
for (File pdfImg : files) {
|
|
|
|
|
getDocumentList(documentList, pdfImg);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
piece.put(documentList);
|
|
|
|
|
//删除对应的temp 文件
|
|
|
|
|
FileUtils.delete(file);
|
|
|
|
|
} else if (piece.getFileTypeEnum() == FileTypeEnum.PDF) {
|
|
|
|
|
// 从pdf 提取图片,
|
|
|
|
|
//提取图片为document ,然后用于后面判断
|
|
|
|
|
String teamName = "temp/" + UUID.randomUUID();
|
|
|
|
|
File file = new File(teamName);
|
|
|
|
|
if (!file.exists()) {
|
|
|
|
|
file.mkdirs();
|
|
|
|
|
}
|
|
|
|
|
piece.put(documentList);
|
|
|
|
|
//删除对应的temp 文件
|
|
|
|
|
FileUtils.delete(file);
|
|
|
|
|
} else if (piece.getFileTypeEnum() == FileTypeEnum.DOCX) {
|
|
|
|
|
//从word 直接提取图片
|
|
|
|
|
//提取图片为document ,然后用于后面判断
|
|
|
|
|
String teamName = "temp/" + UUID.randomUUID();
|
|
|
|
|
File file = new File(teamName);
|
|
|
|
|
if (!file.exists()) {
|
|
|
|
|
file.mkdirs();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
List<Document> documentList = new ArrayList<>();
|
|
|
|
|
try {
|
|
|
|
|
PdfBoxUtils.pdf2image(piece.getAbsolutePath(), file.getAbsolutePath());
|
|
|
|
|
List<Document> documentList = new ArrayList<>();
|
|
|
|
|
GetPicsDocx.getPics(piece.getAbsolutePath(), file.getAbsolutePath());
|
|
|
|
|
File[] files = file.listFiles();
|
|
|
|
|
|
|
|
|
|
for (File pdfImg : files) {
|
|
|
|
|
getDocumentList(documentList, pdfImg);
|
|
|
|
|
}
|
|
|
|
|
} catch (IOException e) {
|
|
|
|
|
log.error(e.getMessage(), e);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
piece.put(documentList);
|
|
|
|
|
//删除对应的temp 文件
|
|
|
|
|
piece.put(documentList);
|
|
|
|
|
//删除对应的temp 文件
|
|
|
|
|
FileUtils.delete(file);
|
|
|
|
|
} else if (piece.getFileTypeEnum() == FileTypeEnum.PDF) {
|
|
|
|
|
// 从pdf 提取图片,
|
|
|
|
|
//提取图片为document ,然后用于后面判断
|
|
|
|
|
String teamName = "temp/" + UUID.randomUUID();
|
|
|
|
|
File file = new File(teamName);
|
|
|
|
|
if (!file.exists()) {
|
|
|
|
|
file.mkdirs();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
List<Document> documentList = new ArrayList<>();
|
|
|
|
|
try {
|
|
|
|
|
PdfBoxUtils.pdf2image(piece.getAbsolutePath(), file.getAbsolutePath());
|
|
|
|
|
File[] files = file.listFiles();
|
|
|
|
|
|
|
|
|
|
for (File pdfImg : files) {
|
|
|
|
|
getDocumentList(documentList, pdfImg);
|
|
|
|
|
}
|
|
|
|
|
} catch (IOException e) {
|
|
|
|
|
log.error(e.getMessage(), e);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
piece.put(documentList);
|
|
|
|
|
//删除对应的temp 文件
|
|
|
|
|
// FileUtils.delete(file);
|
|
|
|
|
} else {
|
|
|
|
|
//是图片,直接从图片提取
|
|
|
|
|
List<Document> documentList = new ArrayList<>();
|
|
|
|
|
File sourceFile = new File(piece.getAbsolutePath());
|
|
|
|
|
File[] files = sourceFile.listFiles();
|
|
|
|
|
for (File pdfImg : files) {
|
|
|
|
|
getDocumentList(documentList, pdfImg);
|
|
|
|
|
} else {
|
|
|
|
|
//是图片,直接从图片提取
|
|
|
|
|
List<Document> documentList = new ArrayList<>();
|
|
|
|
|
File sourceFile = new File(piece.getAbsolutePath());
|
|
|
|
|
File[] files = sourceFile.listFiles();
|
|
|
|
|
ExecutorService executorService = Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors());
|
|
|
|
|
|
|
|
|
|
List<Future<Document>> futures = new ArrayList<>();
|
|
|
|
|
|
|
|
|
|
// System.out.println("件名:"+file.getName());
|
|
|
|
|
for (File pdfImg : files) {
|
|
|
|
|
// System.out.println("图片名"+pdfImg.getName());
|
|
|
|
|
Future<Document> future = executorService.submit(() -> getDocumentList(pdfImg));
|
|
|
|
|
futures.add(future);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
for (Future<Document> future : futures) {
|
|
|
|
|
Document image = null;
|
|
|
|
|
try {
|
|
|
|
|
image = future.get();
|
|
|
|
|
} catch (InterruptedException e) {
|
|
|
|
|
throw new RuntimeException(e);
|
|
|
|
|
} catch (ExecutionException e) {
|
|
|
|
|
throw new RuntimeException(e);
|
|
|
|
|
}
|
|
|
|
|
if (image != null) {
|
|
|
|
|
documentList.add(image);
|
|
|
|
|
}
|
|
|
|
|
// 处理图片
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
executorService.shutdown();
|
|
|
|
|
}
|
|
|
|
|
piece.put(documentList);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return "OK";
|
|
|
|
|
});
|
|
|
|
|
futures2.add(aa);
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
for (Future<String> f : futures2) {
|
|
|
|
|
f.get();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
executorService2.shutdown();
|
|
|
|
|
|
|
|
|
|
for (Pieces piece : pieces) {
|
|
|
|
|
//根据文件类型
|
|
|
|
|