• 徐俊's avatar
    xujun · 7d44a1f7
    徐俊 authored
    7d44a1f7
WordToPdfConverter.java 7.99 KB
package com.yiboshi.science.utils;

import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.PDPageContentStream;
import org.apache.pdfbox.pdmodel.font.PDType0Font;
import org.apache.poi.xwpf.usermodel.XWPFParagraph;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.*;
import java.util.List;

public class WordToPdfConverter {
    private static final Logger logger = LoggerFactory.getLogger(WordToPdfConverter.class);

    /**
     * 将Word文档转换为PDF
     * @param wordPath Word文档路径
     * @param pdfPath 输出PDF路径
     * @return 转换是否成功
     */
    public static boolean convertToPdf(String wordPath, String pdfPath) {
        // 检查文件是否存在
        File wordFile = new File(wordPath);
        if (!wordFile.exists()) {
            logger.error("Word文件不存在: {}", wordPath);
            return false;
        }

        // 检查输出目录是否存在
        File pdfFile = new File(pdfPath);
        File parentDir = pdfFile.getParentFile();
        if (!parentDir.exists()) {
            parentDir.mkdirs();
        }

        try {
            if (wordPath.toLowerCase().endsWith(".docx")) {
                return convertDocxToPdf(wordPath, pdfPath);
            } else if (wordPath.toLowerCase().endsWith(".doc")) {
                return convertDocToPdf(wordPath, pdfPath);
            } else {
                logger.error("不支持的文件格式: {}", wordPath);
                return false;
            }
        } catch (Exception e) {
            logger.error("Word转PDF失败", e);
            return false;
        }
    }

    /**
     * 将Word文档转换为PDF(字节数组版本)
     * @param wordBytes Word文档的字节数组
     * @param pdfPath 输出PDF路径
     * @return 转换是否成功
     */
    public static boolean convertToPdf(byte[] wordBytes, String pdfPath) {
        if (wordBytes == null || wordBytes.length == 0) {
            logger.error("Word文档字节数组为空");
            return false;
        }

        // 检查输出目录是否存在
        File pdfFile = new File(pdfPath);
        File parentDir = pdfFile.getParentFile();
        if (!parentDir.exists()) {
            parentDir.mkdirs();
        }

        try {
            // 创建临时文件
            File tempFile = File.createTempFile("temp", ".docx");
            try (FileOutputStream fos = new FileOutputStream(tempFile)) {
                fos.write(wordBytes);
            }

            // 转换文件
            boolean result = convertToPdf(tempFile.getAbsolutePath(), pdfPath);

            // 删除临时文件
            tempFile.delete();

            return result;
        } catch (Exception e) {
            logger.error("Word转PDF失败", e);
            return false;
        }
    }

    /**
     * 转换DOCX文件到PDF
     */
    private static boolean convertDocxToPdf(String docxPath, String pdfPath) throws IOException {
        try (FileInputStream fis = new FileInputStream(docxPath);
             XWPFDocument document = new XWPFDocument(fis);
             PDDocument pdfDocument = new PDDocument()) {
            
            // 获取文档内容
            List<XWPFParagraph> paragraphs = document.getParagraphs();
            
            // 创建PDF页面
            PDPage page = new PDPage();
            pdfDocument.addPage(page);
            
            // 加载字体
            PDType0Font font = PDType0Font.load(pdfDocument, 
                WordToPdfConverter.class.getResourceAsStream("/fonts/simsun.ttc"));
            
            // 创建内容流
            try (PDPageContentStream contentStream = 
                     new PDPageContentStream(pdfDocument, page)) {
                
                float y = page.getMediaBox().getHeight() - 50;
                
                // 写入段落
                for (XWPFParagraph para : paragraphs) {
                    String text = para.getText();
                    contentStream.beginText();
                    contentStream.setFont(font, 12);
                    contentStream.newLineAtOffset(50, y);
                    contentStream.showText(text);
                    contentStream.endText();
                    y -= 15;
                }
            }
            
            // 保存PDF
            pdfDocument.save(pdfPath);
            return true;
        }
    }

    /**
     * 转换DOC文件到PDF
     */
    private static boolean convertDocToPdf(String docPath, String pdfPath) throws IOException {
        try (FileInputStream fis = new FileInputStream(docPath);
             HWPFDocument document = new HWPFDocument(fis);
             PDDocument pdfDocument = new PDDocument()) {
            
            // 获取文档内容
            String text = document.getDocumentText();
            
            // 创建PDF页面
            PDPage page = new PDPage();
            pdfDocument.addPage(page);
            
            // 加载字体
            PDType0Font font = PDType0Font.load(pdfDocument, 
                WordToPdfConverter.class.getResourceAsStream("/fonts/simsun.ttc"));
            
            // 创建内容流
            try (PDPageContentStream contentStream = 
                     new PDPageContentStream(pdfDocument, page)) {
                
                contentStream.beginText();
                contentStream.setFont(font, 12);
                contentStream.newLineAtOffset(50, page.getMediaBox().getHeight() - 50);
                contentStream.showText(text);
                contentStream.endText();
            }
            
            // 保存PDF
            pdfDocument.save(pdfPath);
            return true;
        }
    }

    /**
     * 检查文件是否为Word文档
     */
    public static boolean isWordFile(String fileName) {
        if (fileName == null || fileName.trim().isEmpty()) {
            return false;
        }
        String lowerFileName = fileName.toLowerCase();
        return lowerFileName.endsWith(".doc") || lowerFileName.endsWith(".docx");
    }

    /**
     * 从远程URL下载Word文档并转换为PDF
     * @param wordUrl Word文档的URL地址
     * @param pdfPath 输出PDF路径
     * @return 转换是否成功
     */
    public static boolean convertUrlToPdf(String wordUrl, String pdfPath) {
        if (wordUrl == null || wordUrl.trim().isEmpty()) {
            logger.error("Word文档URL为空");
            return false;
        }

        InputStream inputStream = null;
        try {
            // 创建URL连接
            java.net.URL url = new java.net.URL(wordUrl);
            java.net.HttpURLConnection conn = (java.net.HttpURLConnection) url.openConnection();
            conn.setRequestMethod("GET");
            conn.setConnectTimeout(5000);
            conn.setReadTimeout(5000);

            // 检查响应码
            if (conn.getResponseCode() != java.net.HttpURLConnection.HTTP_OK) {
                logger.error("下载Word文档失败,HTTP响应码: {}", conn.getResponseCode());
                return false;
            }

            // 读取文件内容
            inputStream = conn.getInputStream();
            ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
            byte[] buffer = new byte[4096];
            int bytesRead;
            while ((bytesRead = inputStream.read(buffer)) != -1) {
                outputStream.write(buffer, 0, bytesRead);
            }

            // 转换为PDF
            byte[] wordBytes = outputStream.toByteArray();
            return convertToPdf(wordBytes, pdfPath);

        } catch (Exception e) {
            logger.error("从URL下载Word文档失败: {}", wordUrl, e);
            return false;
        } finally {
            if (inputStream != null) {
                try {
                    inputStream.close();
                } catch (IOException e) {
                    logger.error("关闭输入流失败", e);
                }
            }
        }
    }
}