diff --git a/pom.xml b/pom.xml index 5852ff2..c6ec445 100644 --- a/pom.xml +++ b/pom.xml @@ -60,6 +60,32 @@ mybatis-plus-generator ${mybatisPlus.version} + + + + + org.ofdrw + ofdrw-layout + 1.20.2 + + + + org.apache.pdfbox + pdfbox + 2.0.27 + + + org.apache.pdfbox + jbig2-imageio + 3.0.3 + + + + net.sourceforge.tess4j + tess4j + 5.10.0 + + mysql mysql-connector-java diff --git a/src/main/java/com/example/duplicate/service/impl/MedicalAdviceServiceImpl.java b/src/main/java/com/example/duplicate/service/impl/MedicalAdviceServiceImpl.java index c52e0eb..f807b9d 100644 --- a/src/main/java/com/example/duplicate/service/impl/MedicalAdviceServiceImpl.java +++ b/src/main/java/com/example/duplicate/service/impl/MedicalAdviceServiceImpl.java @@ -88,12 +88,14 @@ public class MedicalAdviceServiceImpl implements MedicalAdviceService { GenerateQueue(tasks, printParam, 5, collectId); log.info("轮询增加任务成功,id为:" + tasks.getId()); } - }else { - //存在任务将任务表状态改为0为开始 - archiveOtherExtMapper.updateStatic(tasks.getId(), collectId); - //存放队列消息 - GenerateQueue(tasks, printParam, 5, collectId); } + //定时增加的任务理论上不存在有任务 +// else { +// //存在任务将任务表状态改为0为开始 +// archiveOtherExtMapper.updateStatic(tasks.getId(), collectId); +// //存放队列消息 +// GenerateQueue(tasks, printParam, 5, collectId); +// } log.info("轮询增加任务成功,id为:" + tasks.getId()); } catch (Exception e) { log.error("轮询增加任务失败,id为:" + tasks.getId(), e); diff --git a/src/main/java/com/example/utils/PdfToDoubleLayerOFD.java b/src/main/java/com/example/utils/PdfToDoubleLayerOFD.java new file mode 100644 index 0000000..64412ff --- /dev/null +++ b/src/main/java/com/example/utils/PdfToDoubleLayerOFD.java @@ -0,0 +1,100 @@ +package com.example.utils; + +import org.apache.pdfbox.pdmodel.PDDocument; +import org.apache.pdfbox.rendering.PDFRenderer; +import org.ofdrw.layout.OFDDoc; +import org.ofdrw.layout.PageLayout; +import org.ofdrw.layout.element.*; +import org.ofdrw.layout.element.canvas.Canvas; +import org.ofdrw.core.basicStructure.pageObj.Page; +import net.sourceforge.tess4j.ITesseract; +import net.sourceforge.tess4j.Tesseract; +import net.sourceforge.tess4j.Word; +import org.ofdrw.layout.element.canvas.Drawer; +import org.ofdrw.layout.element.canvas.FontSetting; + +import javax.imageio.ImageIO; +import java.awt.image.BufferedImage; +import java.io.File; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.List; + +/** + * @ClassName PdfToDoubleLayerOFD + * @Description + * @Author linjj + * @Date 2025/8/4 14:53 + * @Version 1.0 + */ +public class PdfToDoubleLayerOFD { + + public static void main(String[] args) throws Exception { + String pdfPath = "E:\\work\\pdf\\1.pdf"; // 源 PDF + String ofdPath = "E:\\work\\pdf\\2.ofd"; // 目标 OFD + String tessPath = "E:\\work\\ocr"; // Tesseract 语言包目录 + + //加载 PDF + PDDocument pdfDoc = PDDocument.load(new File(pdfPath)); + PDFRenderer renderer = new PDFRenderer(pdfDoc); + //初始化 OCR + ITesseract tesseract = new Tesseract(); + tesseract.setDatapath(tessPath); + tesseract.setLanguage("chi_sim+eng"); + + try (OFDDoc ofdDoc = new OFDDoc(Paths.get(ofdPath))) { + for (int i = 0; i < pdfDoc.getNumberOfPages(); i++) { + try { + BufferedImage image = renderer.renderImageWithDPI(i, 150); + if (image == null) { + System.err.println("警告: 第 " + (i+1) + " 页渲染失败"); + continue; + } + + File tempImage = File.createTempFile("page", ".png"); + ImageIO.write(image, "png", tempImage); + + PageLayout layout = new PageLayout((double) image.getWidth(), (double) image.getHeight()); + ofdDoc.setDefaultPageLayout(layout); + + // 添加图像层 + ofdDoc.add(new Img(tempImage.toPath()) + .setPosition(Position.Absolute) + .setX(0d).setY(0d) + .setWidth((double) image.getWidth()) + .setHeight((double) image.getHeight())); + + // 添加文字层 + Canvas canvas = new Canvas((double) image.getWidth(), (double) image.getHeight()); + int finalI = i; + canvas.setDrawer(ctx -> { + try { + List words = tesseract.getWords(image, ITesseract.RenderedFormat.HOCR.ordinal()); + if (words.isEmpty()) { + System.err.println("警告: 第 " + (finalI +1) + " 页OCR未识别到文字"); + } + for (Word word : words) { + double x = word.getBoundingBox().getX(); + double y = image.getHeight() - word.getBoundingBox().getY() - word.getBoundingBox().getHeight(); + ctx.setFont(FontSetting.getInstance(8.0)); + ctx.setFillColor(0, 0, 0); + ctx.fillText(word.getText(), x, y); + } + } catch (Exception e) { + System.err.println("OCR处理错误: " + e.getMessage()); + } + }); + ofdDoc.add(canvas); + + } catch (Exception e) { + System.err.println("处理第 " + (i+1) + " 页时出错: " + e.getMessage()); + e.printStackTrace(); + } + } + } + pdfDoc.close(); + System.out.println("✅ 双层 OFD 完成:" + ofdPath); + } +} + + diff --git a/src/main/resources/mapper/ArchiveMasterMapper.xml b/src/main/resources/mapper/ArchiveMasterMapper.xml index 31f1051..59ad21a 100644 --- a/src/main/resources/mapper/ArchiveMasterMapper.xml +++ b/src/main/resources/mapper/ArchiveMasterMapper.xml @@ -6,8 +6,8 @@