From 20c19cd6a6f364035d33e313d1797c74a6bfb6a9 Mon Sep 17 00:00:00 2001 From: zhanghai <120228220@qq.com> Date: Mon, 27 Nov 2023 18:17:07 +0800 Subject: [PATCH] =?UTF-8?q?=E8=A7=A3=E5=86=B3=E5=88=A0=E9=99=A4=E6=96=87?= =?UTF-8?q?=E4=BB=B6=E9=94=99=E8=AF=AF=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../com/docus/sw/fenpan/FenpanService.java | 159 +++++++++++------- .../java/com/docus/sw/word/PdfBoxUtils.java | 2 +- 2 files changed, 99 insertions(+), 62 deletions(-) diff --git a/src/main/java/com/docus/sw/fenpan/FenpanService.java b/src/main/java/com/docus/sw/fenpan/FenpanService.java index f06bb0b..f036a24 100644 --- a/src/main/java/com/docus/sw/fenpan/FenpanService.java +++ b/src/main/java/com/docus/sw/fenpan/FenpanService.java @@ -109,83 +109,120 @@ public class FenpanService { this.findAllDir(readUrl, pieces); //写入文件 + List> futures2 = new ArrayList<>(); + ExecutorService executorService2 = Executors.newFixedThreadPool(2); for (Pieces piece : pieces) { - //根据文件类型 - if (piece.getFileTypeEnum() == FileTypeEnum.DOC) { - //从word 直接提取图片 - //提取图片为document ,然后用于后面判断 - String teamName = "temp/" + UUID.randomUUID(); - File file = new File(teamName); - if (!file.exists()) { - file.mkdirs(); - } - List documentList = new ArrayList<>(); - ReadImgDoc.readPicture(piece.getAbsolutePath(), file.getAbsolutePath()); - File[] files = file.listFiles(); - for (File pdfImg : files) { - getDocumentList(documentList, pdfImg); - } + Future aa = executorService2.submit(() -> { - piece.put(documentList); - //删除对应的temp 文件 - FileUtils.delete(file); - } else if (piece.getFileTypeEnum() == FileTypeEnum.DOCX) { - //从word 直接提取图片 - //提取图片为document ,然后用于后面判断 - String teamName = "temp/" + UUID.randomUUID(); - File file = new File(teamName); - if (!file.exists()) { - file.mkdirs(); - } + //根据文件类型 + if (piece.getFileTypeEnum() == FileTypeEnum.DOC) { + //从word 直接提取图片 + //提取图片为document ,然后用于后面判断 + String teamName = "temp/" + UUID.randomUUID(); + File file = new File(teamName); + if (!file.exists()) { + file.mkdirs(); + } - List documentList = new ArrayList<>(); - GetPicsDocx.getPics(piece.getAbsolutePath(), file.getAbsolutePath()); - File[] files = file.listFiles(); - for (File pdfImg : files) { - getDocumentList(documentList, pdfImg); - } + List documentList = new ArrayList<>(); + ReadImgDoc.readPicture(piece.getAbsolutePath(), file.getAbsolutePath()); + File[] files = file.listFiles(); + for (File pdfImg : files) { + getDocumentList(documentList, pdfImg); + } - piece.put(documentList); - //删除对应的temp 文件 - FileUtils.delete(file); - } else if (piece.getFileTypeEnum() == FileTypeEnum.PDF) { - // 从pdf 提取图片, - //提取图片为document ,然后用于后面判断 - String teamName = "temp/" + UUID.randomUUID(); - File file = new File(teamName); - if (!file.exists()) { - file.mkdirs(); - } + piece.put(documentList); + //删除对应的temp 文件 + FileUtils.delete(file); + } else if (piece.getFileTypeEnum() == FileTypeEnum.DOCX) { + //从word 直接提取图片 + //提取图片为document ,然后用于后面判断 + String teamName = "temp/" + UUID.randomUUID(); + File file = new File(teamName); + if (!file.exists()) { + file.mkdirs(); + } - List documentList = new ArrayList<>(); - try { - PdfBoxUtils.pdf2image(piece.getAbsolutePath(), file.getAbsolutePath()); + List documentList = new ArrayList<>(); + GetPicsDocx.getPics(piece.getAbsolutePath(), file.getAbsolutePath()); File[] files = file.listFiles(); - for (File pdfImg : files) { getDocumentList(documentList, pdfImg); } - } catch (IOException e) { - log.error(e.getMessage(), e); - } - piece.put(documentList); - //删除对应的temp 文件 + piece.put(documentList); + //删除对应的temp 文件 + FileUtils.delete(file); + } else if (piece.getFileTypeEnum() == FileTypeEnum.PDF) { + // 从pdf 提取图片, + //提取图片为document ,然后用于后面判断 + String teamName = "temp/" + UUID.randomUUID(); + File file = new File(teamName); + if (!file.exists()) { + file.mkdirs(); + } + + List documentList = new ArrayList<>(); + try { + PdfBoxUtils.pdf2image(piece.getAbsolutePath(), file.getAbsolutePath()); + File[] files = file.listFiles(); + + for (File pdfImg : files) { + getDocumentList(documentList, pdfImg); + } + } catch (IOException e) { + log.error(e.getMessage(), e); + } + + piece.put(documentList); + //删除对应的temp 文件 // FileUtils.delete(file); - } else { - //是图片,直接从图片提取 - List documentList = new ArrayList<>(); - File sourceFile = new File(piece.getAbsolutePath()); - File[] files = sourceFile.listFiles(); - for (File pdfImg : files) { - getDocumentList(documentList, pdfImg); + } else { + //是图片,直接从图片提取 + List documentList = new ArrayList<>(); + File sourceFile = new File(piece.getAbsolutePath()); + File[] files = sourceFile.listFiles(); + ExecutorService executorService = Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors()); + + List> futures = new ArrayList<>(); + +// System.out.println("件名:"+file.getName()); + for (File pdfImg : files) { +// System.out.println("图片名"+pdfImg.getName()); + Future future = executorService.submit(() -> getDocumentList(pdfImg)); + futures.add(future); + } + + for (Future future : futures) { + Document image = null; + try { + image = future.get(); + } catch (InterruptedException e) { + throw new RuntimeException(e); + } catch (ExecutionException e) { + throw new RuntimeException(e); + } + if (image != null) { + documentList.add(image); + } + // 处理图片 + } + + executorService.shutdown(); } - piece.put(documentList); - } + return "OK"; + }); + futures2.add(aa); + + } + + for (Future f : futures2) { + f.get(); } + executorService2.shutdown(); for (Pieces piece : pieces) { //根据文件类型 diff --git a/src/main/java/com/docus/sw/word/PdfBoxUtils.java b/src/main/java/com/docus/sw/word/PdfBoxUtils.java index 638da8b..3becc43 100644 --- a/src/main/java/com/docus/sw/word/PdfBoxUtils.java +++ b/src/main/java/com/docus/sw/word/PdfBoxUtils.java @@ -145,7 +145,7 @@ public class PdfBoxUtils { File pdfFile = new File(src); PDDocument load = null; try{ - load = PDDocument.load(pdfFile, MemoryUsageSetting.setupMixed(512 * 1024)); + load = PDDocument.load(pdfFile, MemoryUsageSetting.setupMixed(128 * 1024)); DefPdfToImageEngine imageEngine = new DefPdfToImageEngine(pdfFile.getName().substring(0, pdfFile.getName().lastIndexOf(".")), des); for (PDPage page : load.getPages()) { imageEngine.processPage(page);