编写所有基础工具,提供word util pdf util

master
zhanghai 2 years ago
parent 7628e55f43
commit f5a56078ef

@ -2,6 +2,7 @@ package com.docus.sw.fenpan;
import com.alibaba.excel.util.FileUtils;
import com.docus.sw.Config;
import com.docus.sw.word.GetPicsDocx;
import com.docus.sw.word.PdfBoxUtils;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.imaging.ImageInfo;
@ -102,10 +103,52 @@ public class FenpanService {
for (Pieces piece : pieces) {
//根据文件类型
if (piece.getFileTypeEnum() == FileTypeEnum.WORD) {
if (piece.getFileTypeEnum() == FileTypeEnum.DOC) {
//从word 直接提取图片
//提取图片为document ,然后用于后面判断
File file = new File("temp");
if(!file.exists()){
file.mkdirs();
}
GetPicsDocx.getPics(piece.getAbsolutePath(),file.getAbsolutePath());
List<Document> documentList = new ArrayList<>();
try {
PdfBoxUtils.pdf2image(piece.getAbsolutePath(),file.getAbsolutePath());
File[] files = file.listFiles();
for(File pdfImg:files){
getDocumentList(documentList, pdfImg);
}
} catch (IOException e) {
log.error(e.getMessage(),e);
}
piece.put(documentList);
//删除对应的temp 文件
file.delete();
} else if (piece.getFileTypeEnum() == FileTypeEnum.DOCX) {
//从word 直接提取图片
//提取图片为document ,然后用于后面判断
File file = new File("temp");
if(!file.exists()){
file.mkdirs();
}
GetPicsDocx.getPics(piece.getAbsolutePath(),file.getAbsolutePath());
List<Document> documentList = new ArrayList<>();
try {
PdfBoxUtils.pdf2image(piece.getAbsolutePath(),file.getAbsolutePath());
File[] files = file.listFiles();
for(File pdfImg:files){
getDocumentList(documentList, pdfImg);
}
} catch (IOException e) {
log.error(e.getMessage(),e);
}
piece.put(documentList);
//删除对应的temp 文件
file.delete();
} else if (piece.getFileTypeEnum() == FileTypeEnum.PDF) {
// 从pdf 提取图片,
//提取图片为document ,然后用于后面判断
@ -247,8 +290,11 @@ public class FenpanService {
if (o.getName().endsWith(".pdf")) {
Pieces pieces = new Pieces(FileTypeEnum.PDF, o.getAbsolutePath(), o.getName());
allDirectory.add(pieces);
} else if (o.getName().endsWith(".docx") || o.getName().endsWith(".doc")) {
Pieces pieces = new Pieces(FileTypeEnum.WORD, o.getAbsolutePath(), o.getName());
} else if (o.getName().endsWith(".docx") ) {
Pieces pieces = new Pieces(FileTypeEnum.DOCX, o.getAbsolutePath(), o.getName());
allDirectory.add(pieces);
} else if ( o.getName().endsWith(".doc")) {
Pieces pieces = new Pieces(FileTypeEnum.DOC, o.getAbsolutePath(), o.getName());
allDirectory.add(pieces);
} else if (o.getName().endsWith(".jpg") || o.getName().endsWith(".png")
|| o.getName().endsWith(".jpeg") || o.getName().endsWith(".tif")

@ -1,7 +1,8 @@
package com.docus.sw.fenpan;
public enum FileTypeEnum {
WORD,
DOC,
DOCX,
PDF,
JPG
}

@ -1,15 +1,15 @@
package com.docus.sw.word;
import org.apache.poi.xwpf.extractor.XWPFWordExtractor;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.apache.poi.xwpf.usermodel.XWPFPictureData;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.List;
import org.apache.poi.xwpf.extractor.XWPFWordExtractor;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.apache.poi.xwpf.usermodel.XWPFPictureData;
public class GetPicsDocx {
public static void main(String[] args) {
String path = "E:\\上海项目测试\\文档\\35.docx";
@ -32,4 +32,35 @@ public class GetPicsDocx {
e.printStackTrace();
}
}
public static void getPics(String fromPath, String toPath) {
File file = new File(fromPath);
FileInputStream fis = null;
try {
fis = new FileInputStream(file);
XWPFDocument document = new XWPFDocument(fis);
XWPFWordExtractor xwpfWordExtractor = new XWPFWordExtractor(document);
String text = xwpfWordExtractor.getText();
System.out.println(text);
List<XWPFPictureData> picList = document.getAllPictures();
int i = 1;
for (XWPFPictureData pic : picList) {
byte[] bytev = pic.getData();
FileOutputStream fos = new FileOutputStream(toPath +i+ pic.getFileName());
fos.write(bytev);
}
} catch (IOException e) {
e.printStackTrace();
} finally {
if (fis != null) {
try {
fis.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
}
}

@ -1,38 +1,40 @@
package com.docus.sw.word;
import java.io.*;
import java.util.*;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.model.PicturesTable;
import org.apache.poi.hwpf.usermodel.CharacterRun;
import org.apache.poi.hwpf.usermodel.Picture;
import org.apache.poi.hwpf.usermodel.Range;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.OutputStream;
import java.util.UUID;
public class ReadImgDoc {
public static void main(String[] args) throws Exception {
new ReadImgDoc().readPicture("C:\\Users\\zhanghai\\Desktop\\桌面\\test\\a.doc");
}
private void readPicture(String path)throws Exception{
FileInputStream in=new FileInputStream(new File(path));
HWPFDocument doc=new HWPFDocument(in);
int length=doc.characterLength();
PicturesTable pTable=doc.getPicturesTable();
private void readPicture(String path) throws Exception {
FileInputStream in = new FileInputStream(new File(path));
HWPFDocument doc = new HWPFDocument(in);
int length = doc.characterLength();
PicturesTable pTable = doc.getPicturesTable();
// int TitleLength=doc.getSummaryInformation().getTitle().length();
// System.out.println(TitleLength);
// System.out.println(length);
for (int i=0;i<length;i++){
Range range=new Range(i, i+1,doc);
CharacterRun cr=range.getCharacterRun(0);
if(pTable.hasPicture(cr)){
Picture pic=pTable.extractPicture(cr, false);
String afileName=pic.suggestFullFileName();
OutputStream out=new FileOutputStream(new File("C:\\Users\\zhanghai\\Desktop\\桌面\\test\\"+UUID.randomUUID()+afileName));
for (int i = 0; i < length; i++) {
Range range = new Range(i, i + 1, doc);
CharacterRun cr = range.getCharacterRun(0);
if (pTable.hasPicture(cr)) {
Picture pic = pTable.extractPicture(cr, false);
String afileName = pic.suggestFullFileName();
OutputStream out = new FileOutputStream(new File("C:\\Users\\zhanghai\\Desktop\\桌面\\test\\" + UUID.randomUUID() + afileName));
pic.writeImageContent(out);
}
@ -40,4 +42,28 @@ public class ReadImgDoc {
}
private static void readPicture(String path, String toPath) throws Exception {
FileInputStream in = new FileInputStream(new File(path));
HWPFDocument doc = new HWPFDocument(in);
int length = doc.characterLength();
PicturesTable pTable = doc.getPicturesTable();
// int TitleLength=doc.getSummaryInformation().getTitle().length();
// System.out.println(TitleLength);
// System.out.println(length);
for (int i = 0; i < length; i++) {
Range range = new Range(i, i + 1, doc);
CharacterRun cr = range.getCharacterRun(0);
if (pTable.hasPicture(cr)) {
Picture pic = pTable.extractPicture(cr, false);
String afileName = pic.suggestFullFileName();
OutputStream out = new FileOutputStream(new File(toPath + i + afileName));
pic.writeImageContent(out);
}
}
}
}
Loading…
Cancel
Save