|
@@ -0,0 +1,192 @@
|
|
|
|
|
+package com.xjrsoft.xjrsoftboot;
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+import org.apache.poi.hwpf.HWPFDocument;
|
|
|
|
|
+import org.apache.poi.hwpf.extractor.WordExtractor;
|
|
|
|
|
+import org.apache.poi.xwpf.usermodel.*;
|
|
|
|
|
+import org.junit.jupiter.api.Test;
|
|
|
|
|
+
|
|
|
|
|
+import java.io.*;
|
|
|
|
|
+import java.util.HashMap;
|
|
|
|
|
+import java.util.List;
|
|
|
|
|
+import java.util.Map;
|
|
|
|
|
+
|
|
|
|
|
+public class WordTest {
|
|
|
|
|
+ @Test
|
|
|
|
|
+ public void WordTest() throws IOException {
|
|
|
|
|
+ InputStream is = new FileInputStream("/Users/fanxp/Downloads/2023年春期 班级总课程表.docx");
|
|
|
|
|
+ XWPFDocument doc = new XWPFDocument(is);
|
|
|
|
|
+
|
|
|
|
|
+ List<XWPFParagraph> paras = doc.getParagraphs(); //将得到包含段落列表
|
|
|
|
|
+ //获取标题
|
|
|
|
|
+ for (XWPFParagraph para : paras) {
|
|
|
|
|
+// System.out.println(para.getText());
|
|
|
|
|
+
|
|
|
|
|
+ //获取文档中所有的表格
|
|
|
|
|
+ List<XWPFTable> tables = doc.getTables();
|
|
|
|
|
+
|
|
|
|
|
+ List<XWPFTableRow> rows;
|
|
|
|
|
+
|
|
|
|
|
+ List<XWPFTableCell> cells;
|
|
|
|
|
+
|
|
|
|
|
+ for (XWPFTable table : tables) {
|
|
|
|
|
+ rows = table.getRows();
|
|
|
|
|
+ for (XWPFTableRow row : rows) {
|
|
|
|
|
+ //获取行对应的单元格
|
|
|
|
|
+ cells = row.getTableCells();
|
|
|
|
|
+ for (XWPFTableCell cell : cells) {
|
|
|
|
|
+ System.out.println(cell.getText());;
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+ close(is);
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ /**
|
|
|
|
|
+
|
|
|
|
|
+ * 关闭输入流
|
|
|
|
|
+
|
|
|
|
|
+ * @param is
|
|
|
|
|
+
|
|
|
|
|
+ */
|
|
|
|
|
+
|
|
|
|
|
+ private void close(InputStream is) {
|
|
|
|
|
+ if (is != null) {
|
|
|
|
|
+ try {
|
|
|
|
|
+ is.close();
|
|
|
|
|
+ } catch (IOException e) {
|
|
|
|
|
+ e.printStackTrace();
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ /**
|
|
|
|
|
+ * 获取正文文件内容,docx方法
|
|
|
|
|
+ *
|
|
|
|
|
+ * @param path
|
|
|
|
|
+ * @return
|
|
|
|
|
+ */
|
|
|
|
|
+ public Map<String, String> getContentDocx(String path) {
|
|
|
|
|
+ Map<String, String> map = new HashMap();
|
|
|
|
|
+ StringBuffer content = new StringBuffer("");
|
|
|
|
|
+ String result = "0"; // 0表示获取正常,1表示获取异常
|
|
|
|
|
+ InputStream is = null;
|
|
|
|
|
+ try {
|
|
|
|
|
+ is = new FileInputStream(new File(path));
|
|
|
|
|
+ // 2007版本的word
|
|
|
|
|
+ XWPFDocument xwpf = new XWPFDocument(is); // 2007版本,仅支持docx文件处理
|
|
|
|
|
+ List<XWPFParagraph> paragraphs = xwpf.getParagraphs();
|
|
|
|
|
+ if (paragraphs != null && paragraphs.size() > 0) {
|
|
|
|
|
+ for (XWPFParagraph paragraph : paragraphs) {
|
|
|
|
|
+ if (!paragraph.getParagraphText().startsWith(" ")) {
|
|
|
|
|
+ content.append(" ").append(paragraph.getParagraphText().trim()).append("\r\n");
|
|
|
|
|
+ } else {
|
|
|
|
|
+ content.append(paragraph.getParagraphText());
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+ } catch (Exception e) {
|
|
|
|
|
+ System.out.println("docx解析正文异常:" + e);
|
|
|
|
|
+ result = "1"; // 出现异常
|
|
|
|
|
+ } finally {
|
|
|
|
|
+ if (is != null) {
|
|
|
|
|
+ try {
|
|
|
|
|
+ is.close();
|
|
|
|
|
+ } catch (IOException e) {
|
|
|
|
|
+ System.out.println("" + e);
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+ map.put("result", result);
|
|
|
|
|
+ map.put("content", content.toString());
|
|
|
|
|
+ }
|
|
|
|
|
+ return map;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ /**
|
|
|
|
|
+ * 获取正文文件内容,doc方法
|
|
|
|
|
+ *
|
|
|
|
|
+ * @param path
|
|
|
|
|
+ * @return
|
|
|
|
|
+ */
|
|
|
|
|
+ public Map<String, String> getContentDoc(String path) {
|
|
|
|
|
+ Map<String, String> map = new HashMap();
|
|
|
|
|
+ StringBuffer content = new StringBuffer("");
|
|
|
|
|
+ String result = "0"; // 0表示获取正常,1表示获取异常
|
|
|
|
|
+ InputStream is = null;
|
|
|
|
|
+ try {
|
|
|
|
|
+ is = new FileInputStream(new File(path));
|
|
|
|
|
+ // 2003版本的word
|
|
|
|
|
+ WordExtractor extractor = new WordExtractor(is); // 2003版本 仅doc格式文件可处理,docx文件不可处理
|
|
|
|
|
+ String[] paragraphText = extractor.getParagraphText(); // 获取段落,段落缩进无法获取,可以在前添加空格填充
|
|
|
|
|
+ if (paragraphText != null && paragraphText.length > 0) {
|
|
|
|
|
+ for (String paragraph : paragraphText) {
|
|
|
|
|
+ if (!paragraph.startsWith(" ")) {
|
|
|
|
|
+ content.append(" ").append(paragraph.trim()).append("\r\n");
|
|
|
|
|
+ } else {
|
|
|
|
|
+ content.append(paragraph);
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+ } catch (Exception e) {
|
|
|
|
|
+ System.out.println("doc解析正文异常:" + e);
|
|
|
|
|
+ result = "1"; // 出现异常
|
|
|
|
|
+ } finally {
|
|
|
|
|
+ if (is != null) {
|
|
|
|
|
+ try {
|
|
|
|
|
+ is.close();
|
|
|
|
|
+ } catch (IOException e) {
|
|
|
|
|
+ System.out.println("" + e);
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+ map.put("result", result);
|
|
|
|
|
+ map.put("content", content.toString());
|
|
|
|
|
+ }
|
|
|
|
|
+ return map;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ /**
|
|
|
|
|
+ * 获取正文文件内容,wps方法
|
|
|
|
|
+ *
|
|
|
|
|
+ * @param path
|
|
|
|
|
+ * @return
|
|
|
|
|
+ */
|
|
|
|
|
+ public Map<String, String> getContentWps(String path) {
|
|
|
|
|
+ Map<String, String> map = new HashMap();
|
|
|
|
|
+ StringBuffer content = new StringBuffer("");
|
|
|
|
|
+ String result = "0"; // 0表示获取正常,1表示获取异常
|
|
|
|
|
+ InputStream is = null;
|
|
|
|
|
+ try {
|
|
|
|
|
+ is = new FileInputStream(new File(path));
|
|
|
|
|
+ // wps版本word
|
|
|
|
|
+ HWPFDocument hwpf = new HWPFDocument(is);
|
|
|
|
|
+ WordExtractor wordExtractor = new WordExtractor(hwpf);
|
|
|
|
|
+ // 文档文本内容
|
|
|
|
|
+ String[] paragraphText1 = wordExtractor.getParagraphText();
|
|
|
|
|
+ if (paragraphText1 != null && paragraphText1.length > 0) {
|
|
|
|
|
+ for (String paragraph : paragraphText1) {
|
|
|
|
|
+ if (!paragraph.startsWith(" ")) {
|
|
|
|
|
+ content.append(" ").append(paragraph.trim()).append("\r\n");
|
|
|
|
|
+ } else {
|
|
|
|
|
+ content.append(paragraph);
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+ } catch (Exception e) {
|
|
|
|
|
+ System.out.println("wps解析正文异常:" + e);
|
|
|
|
|
+ result = "1"; // 出现异常
|
|
|
|
|
+ } finally {
|
|
|
|
|
+ if (is != null) {
|
|
|
|
|
+ try {
|
|
|
|
|
+ is.close();
|
|
|
|
|
+ } catch (IOException e) {
|
|
|
|
|
+ System.out.println("" + e);
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+ map.put("result", result);
|
|
|
|
|
+ map.put("content", content.toString());
|
|
|
|
|
+ }
|
|
|
|
|
+ return map;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+}
|