|
|
@ -2,6 +2,7 @@ package com.docus.sw.fenpan;
|
|
|
|
|
|
|
|
|
|
|
|
import com.alibaba.excel.util.FileUtils;
|
|
|
|
import com.alibaba.excel.util.FileUtils;
|
|
|
|
import com.docus.sw.Config;
|
|
|
|
import com.docus.sw.Config;
|
|
|
|
|
|
|
|
import com.docus.sw.word.GetPicsDocx;
|
|
|
|
import com.docus.sw.word.PdfBoxUtils;
|
|
|
|
import com.docus.sw.word.PdfBoxUtils;
|
|
|
|
import lombok.extern.slf4j.Slf4j;
|
|
|
|
import lombok.extern.slf4j.Slf4j;
|
|
|
|
import org.apache.commons.imaging.ImageInfo;
|
|
|
|
import org.apache.commons.imaging.ImageInfo;
|
|
|
@ -102,10 +103,52 @@ public class FenpanService {
|
|
|
|
|
|
|
|
|
|
|
|
for (Pieces piece : pieces) {
|
|
|
|
for (Pieces piece : pieces) {
|
|
|
|
//根据文件类型
|
|
|
|
//根据文件类型
|
|
|
|
if (piece.getFileTypeEnum() == FileTypeEnum.WORD) {
|
|
|
|
if (piece.getFileTypeEnum() == FileTypeEnum.DOC) {
|
|
|
|
//从word 直接提取图片
|
|
|
|
//从word 直接提取图片
|
|
|
|
//提取图片为document ,然后用于后面判断
|
|
|
|
//提取图片为document ,然后用于后面判断
|
|
|
|
|
|
|
|
File file = new File("temp");
|
|
|
|
|
|
|
|
if(!file.exists()){
|
|
|
|
|
|
|
|
file.mkdirs();
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
GetPicsDocx.getPics(piece.getAbsolutePath(),file.getAbsolutePath());
|
|
|
|
|
|
|
|
List<Document> documentList = new ArrayList<>();
|
|
|
|
|
|
|
|
try {
|
|
|
|
|
|
|
|
PdfBoxUtils.pdf2image(piece.getAbsolutePath(),file.getAbsolutePath());
|
|
|
|
|
|
|
|
File[] files = file.listFiles();
|
|
|
|
|
|
|
|
for(File pdfImg:files){
|
|
|
|
|
|
|
|
getDocumentList(documentList, pdfImg);
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
} catch (IOException e) {
|
|
|
|
|
|
|
|
log.error(e.getMessage(),e);
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
piece.put(documentList);
|
|
|
|
|
|
|
|
//删除对应的temp 文件
|
|
|
|
|
|
|
|
file.delete();
|
|
|
|
|
|
|
|
} else if (piece.getFileTypeEnum() == FileTypeEnum.DOCX) {
|
|
|
|
|
|
|
|
//从word 直接提取图片
|
|
|
|
|
|
|
|
//提取图片为document ,然后用于后面判断
|
|
|
|
|
|
|
|
File file = new File("temp");
|
|
|
|
|
|
|
|
if(!file.exists()){
|
|
|
|
|
|
|
|
file.mkdirs();
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
GetPicsDocx.getPics(piece.getAbsolutePath(),file.getAbsolutePath());
|
|
|
|
|
|
|
|
List<Document> documentList = new ArrayList<>();
|
|
|
|
|
|
|
|
try {
|
|
|
|
|
|
|
|
PdfBoxUtils.pdf2image(piece.getAbsolutePath(),file.getAbsolutePath());
|
|
|
|
|
|
|
|
File[] files = file.listFiles();
|
|
|
|
|
|
|
|
for(File pdfImg:files){
|
|
|
|
|
|
|
|
getDocumentList(documentList, pdfImg);
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
} catch (IOException e) {
|
|
|
|
|
|
|
|
log.error(e.getMessage(),e);
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
piece.put(documentList);
|
|
|
|
|
|
|
|
//删除对应的temp 文件
|
|
|
|
|
|
|
|
file.delete();
|
|
|
|
} else if (piece.getFileTypeEnum() == FileTypeEnum.PDF) {
|
|
|
|
} else if (piece.getFileTypeEnum() == FileTypeEnum.PDF) {
|
|
|
|
// 从pdf 提取图片,
|
|
|
|
// 从pdf 提取图片,
|
|
|
|
//提取图片为document ,然后用于后面判断
|
|
|
|
//提取图片为document ,然后用于后面判断
|
|
|
@ -247,8 +290,11 @@ public class FenpanService {
|
|
|
|
if (o.getName().endsWith(".pdf")) {
|
|
|
|
if (o.getName().endsWith(".pdf")) {
|
|
|
|
Pieces pieces = new Pieces(FileTypeEnum.PDF, o.getAbsolutePath(), o.getName());
|
|
|
|
Pieces pieces = new Pieces(FileTypeEnum.PDF, o.getAbsolutePath(), o.getName());
|
|
|
|
allDirectory.add(pieces);
|
|
|
|
allDirectory.add(pieces);
|
|
|
|
} else if (o.getName().endsWith(".docx") || o.getName().endsWith(".doc")) {
|
|
|
|
} else if (o.getName().endsWith(".docx") ) {
|
|
|
|
Pieces pieces = new Pieces(FileTypeEnum.WORD, o.getAbsolutePath(), o.getName());
|
|
|
|
Pieces pieces = new Pieces(FileTypeEnum.DOCX, o.getAbsolutePath(), o.getName());
|
|
|
|
|
|
|
|
allDirectory.add(pieces);
|
|
|
|
|
|
|
|
} else if ( o.getName().endsWith(".doc")) {
|
|
|
|
|
|
|
|
Pieces pieces = new Pieces(FileTypeEnum.DOC, o.getAbsolutePath(), o.getName());
|
|
|
|
allDirectory.add(pieces);
|
|
|
|
allDirectory.add(pieces);
|
|
|
|
} else if (o.getName().endsWith(".jpg") || o.getName().endsWith(".png")
|
|
|
|
} else if (o.getName().endsWith(".jpg") || o.getName().endsWith(".png")
|
|
|
|
|| o.getName().endsWith(".jpeg") || o.getName().endsWith(".tif")
|
|
|
|
|| o.getName().endsWith(".jpeg") || o.getName().endsWith(".tif")
|
|
|
|