diff --git a/src/main/java/com/docus/sw/fenpan/FenpanService.java b/src/main/java/com/docus/sw/fenpan/FenpanService.java index 8cf0ba7..f06bb0b 100644 --- a/src/main/java/com/docus/sw/fenpan/FenpanService.java +++ b/src/main/java/com/docus/sw/fenpan/FenpanService.java @@ -6,6 +6,7 @@ import com.docus.sw.word.GetPicsDocx; import com.docus.sw.word.MyFileUtil; import com.docus.sw.word.PdfBoxUtils; import com.docus.sw.word.ReadImgDoc; +import lombok.SneakyThrows; import lombok.extern.slf4j.Slf4j; import org.apache.commons.imaging.ImageInfo; import org.apache.commons.imaging.ImageReadException; @@ -19,6 +20,7 @@ import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.StandardCopyOption; import java.util.*; +import java.util.concurrent.*; @Slf4j public class FenpanService { @@ -78,6 +80,10 @@ public class FenpanService { } catch (IOException e) { log.error("读取配置文件失败!", e); throw new RuntimeException(e); + } catch (ExecutionException e) { + throw new RuntimeException(e); + } catch (InterruptedException e) { + throw new RuntimeException(e); } } @@ -90,12 +96,12 @@ public class FenpanService { new FenpanService().fenpan(saveUrl, readUrl); } - public List getDocument(Pieces piece){ + public List getDocument(Pieces piece) { return null; } - public Map readFile(String readUrl) { + public Map readFile(String readUrl) throws ExecutionException, InterruptedException { //读取文件夹。 List pieces = new ArrayList<>(); Map rollMap = new LinkedHashMap<>(); @@ -108,35 +114,35 @@ public class FenpanService { if (piece.getFileTypeEnum() == FileTypeEnum.DOC) { //从word 直接提取图片 //提取图片为document ,然后用于后面判断 - String teamName = "temp/"+UUID.randomUUID(); + String teamName = "temp/" + UUID.randomUUID(); File file = new File(teamName); - if(!file.exists()){ + if (!file.exists()) { file.mkdirs(); } List documentList = new ArrayList<>(); - ReadImgDoc.readPicture(piece.getAbsolutePath(),file.getAbsolutePath()); + ReadImgDoc.readPicture(piece.getAbsolutePath(), file.getAbsolutePath()); File[] files = file.listFiles(); - for(File pdfImg:files){ + for (File pdfImg : files) { getDocumentList(documentList, pdfImg); } piece.put(documentList); //删除对应的temp 文件 FileUtils.delete(file); - } else if (piece.getFileTypeEnum() == FileTypeEnum.DOCX) { + } else if (piece.getFileTypeEnum() == FileTypeEnum.DOCX) { //从word 直接提取图片 //提取图片为document ,然后用于后面判断 - String teamName = "temp/"+UUID.randomUUID(); + String teamName = "temp/" + UUID.randomUUID(); File file = new File(teamName); - if(!file.exists()){ + if (!file.exists()) { file.mkdirs(); } List documentList = new ArrayList<>(); - GetPicsDocx.getPics(piece.getAbsolutePath(),file.getAbsolutePath()); + GetPicsDocx.getPics(piece.getAbsolutePath(), file.getAbsolutePath()); File[] files = file.listFiles(); - for(File pdfImg:files){ + for (File pdfImg : files) { getDocumentList(documentList, pdfImg); } @@ -146,23 +152,22 @@ public class FenpanService { } else if (piece.getFileTypeEnum() == FileTypeEnum.PDF) { // 从pdf 提取图片, //提取图片为document ,然后用于后面判断 - String teamName = "temp/"+UUID.randomUUID(); + String teamName = "temp/" + UUID.randomUUID(); File file = new File(teamName); - if(!file.exists()){ + if (!file.exists()) { file.mkdirs(); } List documentList = new ArrayList<>(); try { - PdfBoxUtils.pdf2image(piece.getAbsolutePath(),file.getAbsolutePath()); + PdfBoxUtils.pdf2image(piece.getAbsolutePath(), file.getAbsolutePath()); File[] files = file.listFiles(); -// System.out.println("件名:"+file.getName()); - for(File pdfImg:files){ -// System.out.println("图片名"+pdfImg.getName()); + + for (File pdfImg : files) { getDocumentList(documentList, pdfImg); } } catch (IOException e) { - log.error(e.getMessage(),e); + log.error(e.getMessage(), e); } piece.put(documentList); @@ -173,12 +178,17 @@ public class FenpanService { List documentList = new ArrayList<>(); File sourceFile = new File(piece.getAbsolutePath()); File[] files = sourceFile.listFiles(); - for (File file : files) { - getDocumentList(documentList, file); + for (File pdfImg : files) { + getDocumentList(documentList, pdfImg); } piece.put(documentList); } + } + + + for (Pieces piece : pieces) { + //根据文件类型 //填充卷 File file = new File(piece.getAbsolutePath()); File parentFile = file.getParentFile(); @@ -237,9 +247,7 @@ public class FenpanService { private static void getDocumentList(List documentList, File file) { //非图片模式,跳过。 - if (!(file.getName().endsWith(".jpg") || file.getName().endsWith(".png") - || file.getName().endsWith(".jpeg") || file.getName().endsWith(".tif") - || file.getName().endsWith(".tiff")) || file.getName().endsWith(".jp2") || file.getName().endsWith(".jpm")|| file.getName().endsWith(".gif")) { + if (!(file.getName().endsWith(".jpg") || file.getName().endsWith(".png") || file.getName().endsWith(".jpeg") || file.getName().endsWith(".tif") || file.getName().endsWith(".tiff")) || file.getName().endsWith(".jp2") || file.getName().endsWith(".jpm") || file.getName().endsWith(".gif")) { return; } @@ -277,6 +285,47 @@ public class FenpanService { } + private static Document getDocumentList(File file) { + //非图片模式,跳过。 + if (!(file.getName().endsWith(".jpg") || file.getName().endsWith(".png") || file.getName().endsWith(".jpeg") || file.getName().endsWith(".tif") || file.getName().endsWith(".tiff")) || file.getName().endsWith(".jp2") || file.getName().endsWith(".jpm") || file.getName().endsWith(".gif")) { + return null; + } + + if (file.getName().endsWith(".jp2") || file.getName().endsWith(".jpm")) { + // 读取 JPEG 2000 图像文件 + + try { + BufferedImage image = ImageIO.read(file); + int height = image.getHeight(); + int width = image.getWidth(); + Document document = new Document(width, height, 300); + return document; + } catch (IOException e) { + throw new RuntimeException(e); + } + + } else { + try { + ImageInfo imageInfo = Imaging.getImageInfo(file); + int height = imageInfo.getHeight(); + int width = imageInfo.getWidth(); + int physicalHeightDpi = imageInfo.getPhysicalHeightDpi(); + Document document = new Document(width, height, physicalHeightDpi); + return document; + } catch (IOException e) { + FileUtils.delete(file); + throw new RuntimeException("非图片格式", e); + } catch (ImageReadException e) { + FileUtils.delete(file); + throw new RuntimeException(e); + } catch (IllegalArgumentException e) { + FileUtils.delete(file); + } + } + return null; + } + + private void findAllDir(String absolutePath, List allDirectory) { File sourceFile = new File(absolutePath); File[] files = sourceFile.listFiles(); @@ -290,15 +339,13 @@ public class FenpanService { if (o.getName().endsWith(".pdf")) { Pieces pieces = new Pieces(FileTypeEnum.PDF, o.getAbsolutePath(), o.getName()); allDirectory.add(pieces); - } else if (o.getName().endsWith(".docx") ) { + } else if (o.getName().endsWith(".docx")) { Pieces pieces = new Pieces(FileTypeEnum.DOCX, o.getAbsolutePath(), o.getName()); allDirectory.add(pieces); - } else if ( o.getName().endsWith(".doc")) { + } else if (o.getName().endsWith(".doc")) { Pieces pieces = new Pieces(FileTypeEnum.DOC, o.getAbsolutePath(), o.getName()); allDirectory.add(pieces); - } else if (o.getName().endsWith(".jpg") || o.getName().endsWith(".png") - || o.getName().endsWith(".jpeg") || o.getName().endsWith(".tif") - || o.getName().endsWith(".tiff")) { + } else if (o.getName().endsWith(".jpg") || o.getName().endsWith(".png") || o.getName().endsWith(".jpeg") || o.getName().endsWith(".tif") || o.getName().endsWith(".tiff")) { Pieces pieces = new Pieces(FileTypeEnum.JPG, o.getParentFile().getAbsolutePath(), o.getParentFile().getName()); allDirectory.add(pieces); break; diff --git a/src/main/java/com/docus/sw/souyin/SuoyinService.java b/src/main/java/com/docus/sw/souyin/SuoyinService.java index e063e22..1431213 100644 --- a/src/main/java/com/docus/sw/souyin/SuoyinService.java +++ b/src/main/java/com/docus/sw/souyin/SuoyinService.java @@ -23,10 +23,7 @@ import java.io.IOException; import java.nio.file.CopyOption; import java.nio.file.Files; import java.nio.file.StandardCopyOption; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; +import java.util.*; public class SuoyinService { @@ -78,7 +75,8 @@ public class SuoyinService { if(piece.getName().endsWith(".doc")){ Pieces pieces = new Pieces(FileTypeEnum.DOC, piece.getAbsolutePath(), piece.getName()); - File temp = new File("temp"); + String teamName = "tempIndex/"+ UUID.randomUUID(); + File temp = new File(teamName); if(!temp.exists()){ temp.mkdirs(); } @@ -93,7 +91,8 @@ public class SuoyinService { }else if(piece.getName().endsWith(".docx")){ Pieces pieces = new Pieces(FileTypeEnum.DOCX, piece.getAbsolutePath(), piece.getName()); - File temp = new File("temp"); + String teamName = "tempIndex/"+ UUID.randomUUID(); + File temp = new File(teamName); if(!temp.exists()){ temp.mkdirs(); } @@ -107,7 +106,8 @@ public class SuoyinService { }else if(piece.getName().endsWith(".pdf")){ Pieces pieces = new Pieces(FileTypeEnum.DOC, piece.getAbsolutePath(), piece.getName()); - File temp = new File("temp"); + String teamName = "tempIndex/"+ UUID.randomUUID(); + File temp = new File(teamName); if(!temp.exists()){ temp.mkdirs(); } @@ -146,7 +146,7 @@ public class SuoyinService { } - + FileUtils.delete(new File("tempIndex")); //生成索引目录 for(IndexPlate indexPlate : map.values()){ diff --git a/src/main/java/com/docus/sw/word/PdfBoxUtils.java b/src/main/java/com/docus/sw/word/PdfBoxUtils.java index 4ab758c..638da8b 100644 --- a/src/main/java/com/docus/sw/word/PdfBoxUtils.java +++ b/src/main/java/com/docus/sw/word/PdfBoxUtils.java @@ -143,12 +143,20 @@ public class PdfBoxUtils { */ public static void pdf2image(String src, String des) throws IOException { File pdfFile = new File(src); - PDDocument load = PDDocument.load(pdfFile, MemoryUsageSetting.setupMixed(1024 * 1024)); - DefPdfToImageEngine imageEngine = new DefPdfToImageEngine(pdfFile.getName().substring(0, pdfFile.getName().lastIndexOf(".")), des); - for (PDPage page : load.getPages()) { - imageEngine.processPage(page); + PDDocument load = null; + try{ + load = PDDocument.load(pdfFile, MemoryUsageSetting.setupMixed(512 * 1024)); + DefPdfToImageEngine imageEngine = new DefPdfToImageEngine(pdfFile.getName().substring(0, pdfFile.getName().lastIndexOf(".")), des); + for (PDPage page : load.getPages()) { + imageEngine.processPage(page); + } + }finally { + if(load!=null){ + load.close(); + } } - load.close(); + + }