Replace altChunk html to docx with direct parsing (manual)

This commit is contained in:
George Kalampokis 2021-10-06 15:15:41 +03:00
parent ef5e6a92e0
commit cb1d7b5340
3 changed files with 124 additions and 3 deletions

View File

@ -186,6 +186,12 @@
<artifactId>fop</artifactId>
<version>2.3</version>
</dependency>
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.14.3</version>
</dependency>
<!-- https://mvnrepository.com/artifact/fr.opensagres.xdocreport/org.apache.poi.xwpf.converter.pdf -->
<dependency>
<groupId>fr.opensagres.xdocreport</groupId>

View File

@ -0,0 +1,109 @@
package eu.eudat.logic.utilities.documents.word;
import org.apache.poi.xwpf.usermodel.*;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.nodes.Node;
import org.jsoup.nodes.TextNode;
import org.jsoup.select.Elements;
import org.jsoup.select.NodeTraversor;
import org.jsoup.select.NodeVisitor;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.STHighlightColor;
import java.util.LinkedHashMap;
import java.util.Map;
public class HtmlToWorldBuilder implements NodeVisitor {
private final Map<String, Boolean> properties = new LinkedHashMap<>();
private final XWPFParagraph paragraph;
private XWPFRun run;
private Boolean dumpRun;
private final String identation;
private Boolean isIdentationUsed;
public static void convert(XWPFDocument document, Document htmlDocument, String identation) {
Elements htmlParagraphs = htmlDocument.select("p");
if (!htmlParagraphs.isEmpty()) {
for (Element htmlParagraph : htmlParagraphs) {
XWPFParagraph paragraph = document.createParagraph();
HtmlToWorldBuilder htmlToWorldBuilder = new HtmlToWorldBuilder(paragraph, identation);
NodeTraversor.traverse(htmlToWorldBuilder, htmlParagraph);
}
} else {
XWPFParagraph paragraph = document.createParagraph();
HtmlToWorldBuilder htmlToWorldBuilder = new HtmlToWorldBuilder(paragraph, identation);
NodeTraversor.traverse(htmlToWorldBuilder, htmlDocument);
}
}
public HtmlToWorldBuilder(XWPFParagraph paragraph, String identation) {
this.paragraph = paragraph;
this.run = this.paragraph.createRun();
this.dumpRun = false;
this.identation = identation;
this.isIdentationUsed = false;
this.run.setFontSize(11);
}
@Override
public void head(Node node, int i) {
String name = node.nodeName();
if (name.equals("#text")) {
String text = !isIdentationUsed ? identation + ((TextNode)node).text() : ((TextNode)node).text();
this.run.setText(text);
this.isIdentationUsed = true;
this.dumpRun = true;
} else {
properties.put(name, true);
}
if (dumpRun) {
this.run = this.paragraph.createRun();
this.run.setFontSize(11);
this.dumpRun = false;
}
parseProperties();
}
private void parseProperties() {
properties.entrySet().forEach(stringBooleanEntry -> {
switch (stringBooleanEntry.getKey()) {
case "i" :
case "em":
this.run.setItalic(stringBooleanEntry.getValue());
break;
case "b":
case "strong":
this.run.setBold(stringBooleanEntry.getValue());
break;
case "u":
case "ins":
this.run.setUnderline(stringBooleanEntry.getValue() ? UnderlinePatterns.SINGLE : UnderlinePatterns.NONE);
break;
case "small":
this.run.setFontSize(stringBooleanEntry.getValue() ? 8 : 11);
break;
case "del":
this.run.setStrikeThrough(stringBooleanEntry.getValue());
break;
case "mark":
this.run.setTextHighlightColor(stringBooleanEntry.getValue() ? STHighlightColor.YELLOW.toString() : STHighlightColor.NONE.toString());
break;
case "sub":
this.run.setSubscript(stringBooleanEntry.getValue() ? VerticalAlign.SUBSCRIPT : VerticalAlign.BASELINE);
break;
case "sup":
this.run.setSubscript(stringBooleanEntry.getValue() ? VerticalAlign.SUPERSCRIPT : VerticalAlign.BASELINE);
break;
}
});
}
@Override
public void tail(Node node, int i) {
String name = node.nodeName();
properties.put(name, false);
parseProperties();
}
}

View File

@ -13,8 +13,9 @@ import eu.eudat.models.data.user.components.datasetprofile.FieldSet;
import eu.eudat.models.data.user.components.datasetprofile.Section;
import eu.eudat.models.data.user.composite.DatasetProfilePage;
import eu.eudat.models.data.user.composite.PagedDatasetProfile;
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import org.apache.poi.xwpf.usermodel.*;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTAbstractNum;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTDecimalNumber;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTLvl;
@ -50,7 +51,7 @@ public class WordBuilder {
return paragraph;
});
this.options.put(ParagraphStyle.HTML, (mainDocumentPart, item) -> {
try {
/*try {
XWPFHtmlDocument xwpfHtmlDocument = XWPFHtmlDocument.addHtmlDocument(mainDocumentPart);
if (item != null) {
xwpfHtmlDocument.setHtml(item);
@ -60,7 +61,12 @@ public class WordBuilder {
logger.error(e.getLocalizedMessage(), e);
}
return null;
return null;*/
XWPFParagraph paragraph = null;
Document htmlDoc = Jsoup.parse(item);
HtmlToWorldBuilder.convert(mainDocumentPart, htmlDoc, " ");
return paragraph;
});
this.options.put(ParagraphStyle.TITLE, (mainDocumentPart, item) -> {
XWPFParagraph paragraph = mainDocumentPart.createParagraph();