From 20c19cd6a6f364035d33e313d1797c74a6bfb6a9 Mon Sep 17 00:00:00 2001
From: zhanghai <120228220@qq.com>
Date: Mon, 27 Nov 2023 18:17:07 +0800
Subject: [PATCH] =?UTF-8?q?=E8=A7=A3=E5=86=B3=E5=88=A0=E9=99=A4=E6=96=87?=
 =?UTF-8?q?=E4=BB=B6=E9=94=99=E8=AF=AF=E9=97=AE=E9=A2=98?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../com/docus/sw/fenpan/FenpanService.java    | 159 +++++++++++-------
 .../java/com/docus/sw/word/PdfBoxUtils.java   |   2 +-
 2 files changed, 99 insertions(+), 62 deletions(-)
diff --git a/src/main/java/com/docus/sw/fenpan/FenpanService.java b/src/main/java/com/docus/sw/fenpan/FenpanService.java
index f06bb0b..f036a24 100644
--- a/src/main/java/com/docus/sw/fenpan/FenpanService.java
+++ b/src/main/java/com/docus/sw/fenpan/FenpanService.java
@@ -109,83 +109,120 @@ public class FenpanService {
         this.findAllDir(readUrl, pieces);
         //写入文件
 
+        List<Future<String>> futures2 = new ArrayList<>();
+        ExecutorService executorService2 = Executors.newFixedThreadPool(2);
         for (Pieces piece : pieces) {
-            //根据文件类型
-            if (piece.getFileTypeEnum() == FileTypeEnum.DOC) {
-                //从word 直接提取图片
-                //提取图片为document ，然后用于后面判断
-                String teamName = "temp/" + UUID.randomUUID();
-                File file = new File(teamName);
-                if (!file.exists()) {
-                    file.mkdirs();
-                }
 
-                List<Document> documentList = new ArrayList<>();
-                ReadImgDoc.readPicture(piece.getAbsolutePath(), file.getAbsolutePath());
-                File[] files = file.listFiles();
-                for (File pdfImg : files) {
-                    getDocumentList(documentList, pdfImg);
-                }
+            Future<String> aa = executorService2.submit(() -> {
 
-                piece.put(documentList);
-                //删除对应的temp 文件
-                FileUtils.delete(file);
-            } else if (piece.getFileTypeEnum() == FileTypeEnum.DOCX) {
-                //从word 直接提取图片
-                //提取图片为document ，然后用于后面判断
-                String teamName = "temp/" + UUID.randomUUID();
-                File file = new File(teamName);
-                if (!file.exists()) {
-                    file.mkdirs();
-                }
+                //根据文件类型
+                if (piece.getFileTypeEnum() == FileTypeEnum.DOC) {
+                    //从word 直接提取图片
+                    //提取图片为document ，然后用于后面判断
+                    String teamName = "temp/" + UUID.randomUUID();
+                    File file = new File(teamName);
+                    if (!file.exists()) {
+                        file.mkdirs();
+                    }
 
-                List<Document> documentList = new ArrayList<>();
-                GetPicsDocx.getPics(piece.getAbsolutePath(), file.getAbsolutePath());
-                File[] files = file.listFiles();
-                for (File pdfImg : files) {
-                    getDocumentList(documentList, pdfImg);
-                }
+                    List<Document> documentList = new ArrayList<>();
+                    ReadImgDoc.readPicture(piece.getAbsolutePath(), file.getAbsolutePath());
+                    File[] files = file.listFiles();
+                    for (File pdfImg : files) {
+                        getDocumentList(documentList, pdfImg);
+                    }
 
-                piece.put(documentList);
-                //删除对应的temp 文件
-                FileUtils.delete(file);
-            } else if (piece.getFileTypeEnum() == FileTypeEnum.PDF) {
-                // 从pdf 提取图片,
-                //提取图片为document ，然后用于后面判断
-                String teamName = "temp/" + UUID.randomUUID();
-                File file = new File(teamName);
-                if (!file.exists()) {
-                    file.mkdirs();
-                }
+                    piece.put(documentList);
+                    //删除对应的temp 文件
+                    FileUtils.delete(file);
+                } else if (piece.getFileTypeEnum() == FileTypeEnum.DOCX) {
+                    //从word 直接提取图片
+                    //提取图片为document ，然后用于后面判断
+                    String teamName = "temp/" + UUID.randomUUID();
+                    File file = new File(teamName);
+                    if (!file.exists()) {
+                        file.mkdirs();
+                    }
 
-                List<Document> documentList = new ArrayList<>();
-                try {
-                    PdfBoxUtils.pdf2image(piece.getAbsolutePath(), file.getAbsolutePath());
+                    List<Document> documentList = new ArrayList<>();
+                    GetPicsDocx.getPics(piece.getAbsolutePath(), file.getAbsolutePath());
                     File[] files = file.listFiles();
-
                     for (File pdfImg : files) {
                         getDocumentList(documentList, pdfImg);
                     }
-                } catch (IOException e) {
-                    log.error(e.getMessage(), e);
-                }
 
-                piece.put(documentList);
-                //删除对应的temp 文件
+                    piece.put(documentList);
+                    //删除对应的temp 文件
+                    FileUtils.delete(file);
+                } else if (piece.getFileTypeEnum() == FileTypeEnum.PDF) {
+                    // 从pdf 提取图片,
+                    //提取图片为document ，然后用于后面判断
+                    String teamName = "temp/" + UUID.randomUUID();
+                    File file = new File(teamName);
+                    if (!file.exists()) {
+                        file.mkdirs();
+                    }
+
+                    List<Document> documentList = new ArrayList<>();
+                    try {
+                        PdfBoxUtils.pdf2image(piece.getAbsolutePath(), file.getAbsolutePath());
+                        File[] files = file.listFiles();
+
+                        for (File pdfImg : files) {
+                            getDocumentList(documentList, pdfImg);
+                        }
+                    } catch (IOException e) {
+                        log.error(e.getMessage(), e);
+                    }
+
+                    piece.put(documentList);
+                    //删除对应的temp 文件
 //                FileUtils.delete(file);
-            } else {
-                //是图片，直接从图片提取
-                List<Document> documentList = new ArrayList<>();
-                File sourceFile = new File(piece.getAbsolutePath());
-                File[] files = sourceFile.listFiles();
-                for (File pdfImg : files) {
-                    getDocumentList(documentList, pdfImg);
+                } else {
+                    //是图片，直接从图片提取
+                    List<Document> documentList = new ArrayList<>();
+                    File sourceFile = new File(piece.getAbsolutePath());
+                    File[] files = sourceFile.listFiles();
+                    ExecutorService executorService = Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors());
+
+                    List<Future<Document>> futures = new ArrayList<>();
+
+//                    System.out.println("件名："+file.getName());
+                    for (File pdfImg : files) {
+//                        System.out.println("图片名"+pdfImg.getName());
+                        Future<Document> future = executorService.submit(() -> getDocumentList(pdfImg));
+                        futures.add(future);
+                    }
+
+                    for (Future<Document> future : futures) {
+                        Document image = null;
+                        try {
+                            image = future.get();
+                        } catch (InterruptedException e) {
+                            throw new RuntimeException(e);
+                        } catch (ExecutionException e) {
+                            throw new RuntimeException(e);
+                        }
+                        if (image != null) {
+                            documentList.add(image);
+                        }
+                        // 处理图片
+                    }
+
+                    executorService.shutdown();
                 }
-                piece.put(documentList);
-            }
 
+                return "OK";
+            });
+            futures2.add(aa);
+
+        }
+
+        for (Future<String> f : futures2) {
+            f.get();
         }
 
+        executorService2.shutdown();
 
         for (Pieces piece : pieces) {
             //根据文件类型
diff --git a/src/main/java/com/docus/sw/word/PdfBoxUtils.java b/src/main/java/com/docus/sw/word/PdfBoxUtils.java
index 638da8b..3becc43 100644
--- a/src/main/java/com/docus/sw/word/PdfBoxUtils.java
+++ b/src/main/java/com/docus/sw/word/PdfBoxUtils.java
@@ -145,7 +145,7 @@ public class PdfBoxUtils {
         File pdfFile = new File(src);
         PDDocument load = null;
         try{
-            load = PDDocument.load(pdfFile, MemoryUsageSetting.setupMixed(512 * 1024));
+            load = PDDocument.load(pdfFile, MemoryUsageSetting.setupMixed(128 * 1024));
             DefPdfToImageEngine imageEngine = new DefPdfToImageEngine(pdfFile.getName().substring(0, pdfFile.getName().lastIndexOf(".")), des);
             for (PDPage page : load.getPages()) {
                 imageEngine.processPage(page);