修改定时轮询任务查询当前时间前3天当天的数据
parent
d1f42eff87
commit
2a6dddcdb4
@ -0,0 +1,100 @@
|
||||
package com.example.utils;
|
||||
|
||||
import org.apache.pdfbox.pdmodel.PDDocument;
|
||||
import org.apache.pdfbox.rendering.PDFRenderer;
|
||||
import org.ofdrw.layout.OFDDoc;
|
||||
import org.ofdrw.layout.PageLayout;
|
||||
import org.ofdrw.layout.element.*;
|
||||
import org.ofdrw.layout.element.canvas.Canvas;
|
||||
import org.ofdrw.core.basicStructure.pageObj.Page;
|
||||
import net.sourceforge.tess4j.ITesseract;
|
||||
import net.sourceforge.tess4j.Tesseract;
|
||||
import net.sourceforge.tess4j.Word;
|
||||
import org.ofdrw.layout.element.canvas.Drawer;
|
||||
import org.ofdrw.layout.element.canvas.FontSetting;
|
||||
|
||||
import javax.imageio.ImageIO;
|
||||
import java.awt.image.BufferedImage;
|
||||
import java.io.File;
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.Paths;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* @ClassName PdfToDoubleLayerOFD
|
||||
* @Description
|
||||
* @Author linjj
|
||||
* @Date 2025/8/4 14:53
|
||||
* @Version 1.0
|
||||
*/
|
||||
public class PdfToDoubleLayerOFD {
|
||||
|
||||
public static void main(String[] args) throws Exception {
|
||||
String pdfPath = "E:\\work\\pdf\\1.pdf"; // 源 PDF
|
||||
String ofdPath = "E:\\work\\pdf\\2.ofd"; // 目标 OFD
|
||||
String tessPath = "E:\\work\\ocr"; // Tesseract 语言包目录
|
||||
|
||||
//加载 PDF
|
||||
PDDocument pdfDoc = PDDocument.load(new File(pdfPath));
|
||||
PDFRenderer renderer = new PDFRenderer(pdfDoc);
|
||||
//初始化 OCR
|
||||
ITesseract tesseract = new Tesseract();
|
||||
tesseract.setDatapath(tessPath);
|
||||
tesseract.setLanguage("chi_sim+eng");
|
||||
|
||||
try (OFDDoc ofdDoc = new OFDDoc(Paths.get(ofdPath))) {
|
||||
for (int i = 0; i < pdfDoc.getNumberOfPages(); i++) {
|
||||
try {
|
||||
BufferedImage image = renderer.renderImageWithDPI(i, 150);
|
||||
if (image == null) {
|
||||
System.err.println("警告: 第 " + (i+1) + " 页渲染失败");
|
||||
continue;
|
||||
}
|
||||
|
||||
File tempImage = File.createTempFile("page", ".png");
|
||||
ImageIO.write(image, "png", tempImage);
|
||||
|
||||
PageLayout layout = new PageLayout((double) image.getWidth(), (double) image.getHeight());
|
||||
ofdDoc.setDefaultPageLayout(layout);
|
||||
|
||||
// 添加图像层
|
||||
ofdDoc.add(new Img(tempImage.toPath())
|
||||
.setPosition(Position.Absolute)
|
||||
.setX(0d).setY(0d)
|
||||
.setWidth((double) image.getWidth())
|
||||
.setHeight((double) image.getHeight()));
|
||||
|
||||
// 添加文字层
|
||||
Canvas canvas = new Canvas((double) image.getWidth(), (double) image.getHeight());
|
||||
int finalI = i;
|
||||
canvas.setDrawer(ctx -> {
|
||||
try {
|
||||
List<Word> words = tesseract.getWords(image, ITesseract.RenderedFormat.HOCR.ordinal());
|
||||
if (words.isEmpty()) {
|
||||
System.err.println("警告: 第 " + (finalI +1) + " 页OCR未识别到文字");
|
||||
}
|
||||
for (Word word : words) {
|
||||
double x = word.getBoundingBox().getX();
|
||||
double y = image.getHeight() - word.getBoundingBox().getY() - word.getBoundingBox().getHeight();
|
||||
ctx.setFont(FontSetting.getInstance(8.0));
|
||||
ctx.setFillColor(0, 0, 0);
|
||||
ctx.fillText(word.getText(), x, y);
|
||||
}
|
||||
} catch (Exception e) {
|
||||
System.err.println("OCR处理错误: " + e.getMessage());
|
||||
}
|
||||
});
|
||||
ofdDoc.add(canvas);
|
||||
|
||||
} catch (Exception e) {
|
||||
System.err.println("处理第 " + (i+1) + " 页时出错: " + e.getMessage());
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
}
|
||||
pdfDoc.close();
|
||||
System.out.println("✅ 双层 OFD 完成:" + ofdPath);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Loading…
Reference in New Issue