|
@@ -1,15 +1,16 @@
|
|
|
package com.xjrsoft.xjrsoftboot;
|
|
|
|
|
|
|
|
|
+import com.xjrsoft.common.enums.TimeNumberEnum;
|
|
|
+import com.xjrsoft.common.enums.TimePeriodEnum;
|
|
|
+import com.xjrsoft.module.courseTable.dto.CourseTableParse;
|
|
|
import org.apache.poi.hwpf.HWPFDocument;
|
|
|
import org.apache.poi.hwpf.extractor.WordExtractor;
|
|
|
import org.apache.poi.xwpf.usermodel.*;
|
|
|
import org.junit.jupiter.api.Test;
|
|
|
|
|
|
import java.io.*;
|
|
|
-import java.util.HashMap;
|
|
|
-import java.util.List;
|
|
|
-import java.util.Map;
|
|
|
+import java.util.*;
|
|
|
|
|
|
public class WordTest {
|
|
|
@Test
|
|
@@ -21,174 +22,132 @@ public class WordTest {
|
|
|
//获取标题
|
|
|
for (XWPFParagraph para : paras) {
|
|
|
System.out.println(para.getText());
|
|
|
+ System.out.println("--------------------");
|
|
|
+ }
|
|
|
+ //获取文档中所有的表格
|
|
|
+ List<XWPFTable> tables = doc.getTables();
|
|
|
|
|
|
- //获取文档中所有的表格
|
|
|
- List<XWPFTable> tables = doc.getTables();
|
|
|
-
|
|
|
- List<XWPFTableRow> rows;
|
|
|
+ List<XWPFTableRow> rows;
|
|
|
|
|
|
- List<XWPFTableCell> cells;
|
|
|
+ List<XWPFTableCell> cells;
|
|
|
|
|
|
- for (XWPFTable table : tables) {
|
|
|
- rows = table.getRows();
|
|
|
- for (XWPFTableRow row : rows) {
|
|
|
- //获取行对应的单元格
|
|
|
- cells = row.getTableCells();
|
|
|
- for (XWPFTableCell cell : cells) {
|
|
|
- System.out.println(cell.getText());
|
|
|
- }
|
|
|
+ for (XWPFTable table : tables) {
|
|
|
+ rows = table.getRows();
|
|
|
+ for (XWPFTableRow row : rows) {
|
|
|
+ //获取行对应的单元格
|
|
|
+ cells = row.getTableCells();
|
|
|
+ for (XWPFTableCell cell : cells) {
|
|
|
+ System.out.println(cell.getText());
|
|
|
}
|
|
|
- break;
|
|
|
+ System.out.println("--------------------");
|
|
|
}
|
|
|
break;
|
|
|
}
|
|
|
close(is);
|
|
|
}
|
|
|
|
|
|
- /**
|
|
|
+ @Test
|
|
|
+ public void WordTest1() throws IOException {
|
|
|
+ InputStream is = new FileInputStream("/Users/fanxp/Downloads/2023年春期 班级总课程表.docx");
|
|
|
+ List<CourseTableParse> result = courseTableParses(is);
|
|
|
|
|
|
- * 关闭输入流
|
|
|
+ System.out.println(result);
|
|
|
+ }
|
|
|
|
|
|
- * @param is
|
|
|
|
|
|
- */
|
|
|
+ public List<CourseTableParse> courseTableParses(InputStream inputStream) throws IOException {
|
|
|
+ XWPFDocument doc = new XWPFDocument(inputStream);
|
|
|
|
|
|
- private void close(InputStream is) {
|
|
|
- if (is != null) {
|
|
|
- try {
|
|
|
- is.close();
|
|
|
- } catch (IOException e) {
|
|
|
- e.printStackTrace();
|
|
|
+ List<XWPFParagraph> paras = doc.getParagraphs();
|
|
|
+ String semester = null;
|
|
|
+ List<String> cNames = new ArrayList<>();
|
|
|
+ //获取标题
|
|
|
+ for (int i = 0; i < paras.size(); i++) {
|
|
|
+ String txt = paras.get(i).getText();
|
|
|
+ if (i == 0) {
|
|
|
+ semester = txt;
|
|
|
+ continue;
|
|
|
}
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- /**
|
|
|
- * 获取正文文件内容,docx方法
|
|
|
- *
|
|
|
- * @param path
|
|
|
- * @return
|
|
|
- */
|
|
|
- public Map<String, String> getContentDocx(String path) {
|
|
|
- Map<String, String> map = new HashMap();
|
|
|
- StringBuffer content = new StringBuffer("");
|
|
|
- String result = "0"; // 0表示获取正常,1表示获取异常
|
|
|
- InputStream is = null;
|
|
|
- try {
|
|
|
- is = new FileInputStream(new File(path));
|
|
|
- // 2007版本的word
|
|
|
- XWPFDocument xwpf = new XWPFDocument(is); // 2007版本,仅支持docx文件处理
|
|
|
- List<XWPFParagraph> paragraphs = xwpf.getParagraphs();
|
|
|
- if (paragraphs != null && paragraphs.size() > 0) {
|
|
|
- for (XWPFParagraph paragraph : paragraphs) {
|
|
|
- if (!paragraph.getParagraphText().startsWith(" ")) {
|
|
|
- content.append(" ").append(paragraph.getParagraphText().trim()).append("\r\n");
|
|
|
- } else {
|
|
|
- content.append(paragraph.getParagraphText());
|
|
|
- }
|
|
|
- }
|
|
|
+ txt = txt.replaceAll("总课程表", "").replace("\n", "").trim();
|
|
|
+ if (txt != null && !txt.equals("") && !txt.equals(semester)) {
|
|
|
+ cNames.add(txt);
|
|
|
}
|
|
|
- } catch (Exception e) {
|
|
|
- System.out.println("docx解析正文异常:" + e);
|
|
|
- result = "1"; // 出现异常
|
|
|
- } finally {
|
|
|
- if (is != null) {
|
|
|
- try {
|
|
|
- is.close();
|
|
|
- } catch (IOException e) {
|
|
|
- System.out.println("" + e);
|
|
|
- }
|
|
|
- }
|
|
|
- map.put("result", result);
|
|
|
- map.put("content", content.toString());
|
|
|
}
|
|
|
- return map;
|
|
|
- }
|
|
|
-
|
|
|
- /**
|
|
|
- * 获取正文文件内容,doc方法
|
|
|
- *
|
|
|
- * @param path
|
|
|
- * @return
|
|
|
- */
|
|
|
- public Map<String, String> getContentDoc(String path) {
|
|
|
- Map<String, String> map = new HashMap();
|
|
|
- StringBuffer content = new StringBuffer("");
|
|
|
- String result = "0"; // 0表示获取正常,1表示获取异常
|
|
|
- InputStream is = null;
|
|
|
- try {
|
|
|
- is = new FileInputStream(new File(path));
|
|
|
- // 2003版本的word
|
|
|
- WordExtractor extractor = new WordExtractor(is); // 2003版本 仅doc格式文件可处理,docx文件不可处理
|
|
|
- String[] paragraphText = extractor.getParagraphText(); // 获取段落,段落缩进无法获取,可以在前添加空格填充
|
|
|
- if (paragraphText != null && paragraphText.length > 0) {
|
|
|
- for (String paragraph : paragraphText) {
|
|
|
- if (!paragraph.startsWith(" ")) {
|
|
|
- content.append(" ").append(paragraph.trim()).append("\r\n");
|
|
|
- } else {
|
|
|
- content.append(paragraph);
|
|
|
+ List<CourseTableParse> result = new ArrayList<>();
|
|
|
+ //获取文档中所有的表格
|
|
|
+ List<XWPFTable> tables = doc.getTables();
|
|
|
+ int tNum = 0;
|
|
|
+ for (XWPFTable table : tables) {
|
|
|
+ int rNum = 0;
|
|
|
+ String timePeriod = null;
|
|
|
+ List<String> weeks = new ArrayList<>();
|
|
|
+ List<XWPFTableRow> rows = table.getRows();
|
|
|
+ for (XWPFTableRow row : rows) {
|
|
|
+ //获取行对应的单元格
|
|
|
+ List<XWPFTableCell> cells = row.getTableCells();
|
|
|
+ String timeNumber = null;
|
|
|
+ for (int i = 0; i < cells.size(); i++) {
|
|
|
+ String cellText = cells.get(i).getText();
|
|
|
+ if (cellText.equals("") || rNum < 1) continue;
|
|
|
+
|
|
|
+ if (rNum == 1) {
|
|
|
+ weeks.add(cellText);
|
|
|
+ continue;
|
|
|
}
|
|
|
+ if (i == 0) {
|
|
|
+ timePeriod = cellText;
|
|
|
+ continue;
|
|
|
+ }
|
|
|
+ if (i == 1) {
|
|
|
+ timeNumber = cellText;
|
|
|
+ continue;
|
|
|
+ }
|
|
|
+ List<XWPFParagraph> cParagraph = cells.get(i).getParagraphs();
|
|
|
+ CourseTableParse item = new CourseTableParse();
|
|
|
+ item.setSemester(semester);
|
|
|
+ item.setTimePeriod(TimePeriodEnum.getCode(timePeriod));
|
|
|
+ item.setTimeNumber(TimeNumberEnum.getCode(timeNumber));
|
|
|
+ String week = weeks.get(Math.max(i - 2, 0));
|
|
|
+ item.setWeeksCn(week);
|
|
|
+ item.setClassName(cNames.get(tNum));
|
|
|
+ for (int j = 0; j < cParagraph.size(); j++) {
|
|
|
+ cellText = cParagraph.get(j).getText().trim();
|
|
|
+ switch (j) {
|
|
|
+ case 0:
|
|
|
+ item.setCourseName(cellText);
|
|
|
+ break;
|
|
|
+ case 1:
|
|
|
+ item.setTeacherName(cellText);
|
|
|
+ break;
|
|
|
+ case 2:
|
|
|
+ item.setSiteName(cellText);
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ result.add(item);
|
|
|
+ System.out.println(item);
|
|
|
}
|
|
|
+ rNum++;
|
|
|
}
|
|
|
- } catch (Exception e) {
|
|
|
- System.out.println("doc解析正文异常:" + e);
|
|
|
- result = "1"; // 出现异常
|
|
|
- } finally {
|
|
|
- if (is != null) {
|
|
|
- try {
|
|
|
- is.close();
|
|
|
- } catch (IOException e) {
|
|
|
- System.out.println("" + e);
|
|
|
- }
|
|
|
- }
|
|
|
- map.put("result", result);
|
|
|
- map.put("content", content.toString());
|
|
|
+ tNum++;
|
|
|
}
|
|
|
- return map;
|
|
|
+ inputStream.close();
|
|
|
+ return result;
|
|
|
}
|
|
|
|
|
|
/**
|
|
|
- * 获取正文文件内容,wps方法
|
|
|
+ * 关闭输入流
|
|
|
*
|
|
|
- * @param path
|
|
|
- * @return
|
|
|
+ * @param is
|
|
|
*/
|
|
|
- public Map<String, String> getContentWps(String path) {
|
|
|
- Map<String, String> map = new HashMap();
|
|
|
- StringBuffer content = new StringBuffer("");
|
|
|
- String result = "0"; // 0表示获取正常,1表示获取异常
|
|
|
- InputStream is = null;
|
|
|
- try {
|
|
|
- is = new FileInputStream(new File(path));
|
|
|
- // wps版本word
|
|
|
- HWPFDocument hwpf = new HWPFDocument(is);
|
|
|
- WordExtractor wordExtractor = new WordExtractor(hwpf);
|
|
|
- // 文档文本内容
|
|
|
- String[] paragraphText1 = wordExtractor.getParagraphText();
|
|
|
- if (paragraphText1 != null && paragraphText1.length > 0) {
|
|
|
- for (String paragraph : paragraphText1) {
|
|
|
- if (!paragraph.startsWith(" ")) {
|
|
|
- content.append(" ").append(paragraph.trim()).append("\r\n");
|
|
|
- } else {
|
|
|
- content.append(paragraph);
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- } catch (Exception e) {
|
|
|
- System.out.println("wps解析正文异常:" + e);
|
|
|
- result = "1"; // 出现异常
|
|
|
- } finally {
|
|
|
- if (is != null) {
|
|
|
- try {
|
|
|
- is.close();
|
|
|
- } catch (IOException e) {
|
|
|
- System.out.println("" + e);
|
|
|
- }
|
|
|
+
|
|
|
+ private void close(InputStream is) {
|
|
|
+ if (is != null) {
|
|
|
+ try {
|
|
|
+ is.close();
|
|
|
+ } catch (IOException e) {
|
|
|
+ e.printStackTrace();
|
|
|
}
|
|
|
- map.put("result", result);
|
|
|
- map.put("content", content.toString());
|
|
|
}
|
|
|
- return map;
|
|
|
}
|
|
|
-
|
|
|
}
|