Replace altChunk html to docx with direct parsing (manual)
This commit is contained in:
parent
ef5e6a92e0
commit
cb1d7b5340
|
@ -186,6 +186,12 @@
|
|||
<artifactId>fop</artifactId>
|
||||
<version>2.3</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.jsoup</groupId>
|
||||
<artifactId>jsoup</artifactId>
|
||||
<version>1.14.3</version>
|
||||
</dependency>
|
||||
<!-- https://mvnrepository.com/artifact/fr.opensagres.xdocreport/org.apache.poi.xwpf.converter.pdf -->
|
||||
<dependency>
|
||||
<groupId>fr.opensagres.xdocreport</groupId>
|
||||
|
|
|
@ -0,0 +1,109 @@
|
|||
package eu.eudat.logic.utilities.documents.word;
|
||||
|
||||
import org.apache.poi.xwpf.usermodel.*;
|
||||
import org.jsoup.nodes.Document;
|
||||
import org.jsoup.nodes.Element;
|
||||
import org.jsoup.nodes.Node;
|
||||
import org.jsoup.nodes.TextNode;
|
||||
import org.jsoup.select.Elements;
|
||||
import org.jsoup.select.NodeTraversor;
|
||||
import org.jsoup.select.NodeVisitor;
|
||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.STHighlightColor;
|
||||
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.Map;
|
||||
|
||||
public class HtmlToWorldBuilder implements NodeVisitor {
|
||||
|
||||
private final Map<String, Boolean> properties = new LinkedHashMap<>();
|
||||
private final XWPFParagraph paragraph;
|
||||
private XWPFRun run;
|
||||
private Boolean dumpRun;
|
||||
private final String identation;
|
||||
private Boolean isIdentationUsed;
|
||||
|
||||
public static void convert(XWPFDocument document, Document htmlDocument, String identation) {
|
||||
Elements htmlParagraphs = htmlDocument.select("p");
|
||||
if (!htmlParagraphs.isEmpty()) {
|
||||
for (Element htmlParagraph : htmlParagraphs) {
|
||||
XWPFParagraph paragraph = document.createParagraph();
|
||||
HtmlToWorldBuilder htmlToWorldBuilder = new HtmlToWorldBuilder(paragraph, identation);
|
||||
NodeTraversor.traverse(htmlToWorldBuilder, htmlParagraph);
|
||||
}
|
||||
} else {
|
||||
XWPFParagraph paragraph = document.createParagraph();
|
||||
HtmlToWorldBuilder htmlToWorldBuilder = new HtmlToWorldBuilder(paragraph, identation);
|
||||
NodeTraversor.traverse(htmlToWorldBuilder, htmlDocument);
|
||||
}
|
||||
}
|
||||
|
||||
public HtmlToWorldBuilder(XWPFParagraph paragraph, String identation) {
|
||||
this.paragraph = paragraph;
|
||||
this.run = this.paragraph.createRun();
|
||||
this.dumpRun = false;
|
||||
this.identation = identation;
|
||||
this.isIdentationUsed = false;
|
||||
this.run.setFontSize(11);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void head(Node node, int i) {
|
||||
String name = node.nodeName();
|
||||
if (name.equals("#text")) {
|
||||
String text = !isIdentationUsed ? identation + ((TextNode)node).text() : ((TextNode)node).text();
|
||||
this.run.setText(text);
|
||||
this.isIdentationUsed = true;
|
||||
this.dumpRun = true;
|
||||
} else {
|
||||
properties.put(name, true);
|
||||
}
|
||||
if (dumpRun) {
|
||||
this.run = this.paragraph.createRun();
|
||||
this.run.setFontSize(11);
|
||||
this.dumpRun = false;
|
||||
}
|
||||
parseProperties();
|
||||
|
||||
}
|
||||
|
||||
private void parseProperties() {
|
||||
properties.entrySet().forEach(stringBooleanEntry -> {
|
||||
switch (stringBooleanEntry.getKey()) {
|
||||
case "i" :
|
||||
case "em":
|
||||
this.run.setItalic(stringBooleanEntry.getValue());
|
||||
break;
|
||||
case "b":
|
||||
case "strong":
|
||||
this.run.setBold(stringBooleanEntry.getValue());
|
||||
break;
|
||||
case "u":
|
||||
case "ins":
|
||||
this.run.setUnderline(stringBooleanEntry.getValue() ? UnderlinePatterns.SINGLE : UnderlinePatterns.NONE);
|
||||
break;
|
||||
case "small":
|
||||
this.run.setFontSize(stringBooleanEntry.getValue() ? 8 : 11);
|
||||
break;
|
||||
case "del":
|
||||
this.run.setStrikeThrough(stringBooleanEntry.getValue());
|
||||
break;
|
||||
case "mark":
|
||||
this.run.setTextHighlightColor(stringBooleanEntry.getValue() ? STHighlightColor.YELLOW.toString() : STHighlightColor.NONE.toString());
|
||||
break;
|
||||
case "sub":
|
||||
this.run.setSubscript(stringBooleanEntry.getValue() ? VerticalAlign.SUBSCRIPT : VerticalAlign.BASELINE);
|
||||
break;
|
||||
case "sup":
|
||||
this.run.setSubscript(stringBooleanEntry.getValue() ? VerticalAlign.SUPERSCRIPT : VerticalAlign.BASELINE);
|
||||
break;
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
@Override
|
||||
public void tail(Node node, int i) {
|
||||
String name = node.nodeName();
|
||||
properties.put(name, false);
|
||||
parseProperties();
|
||||
}
|
||||
}
|
|
@ -13,8 +13,9 @@ import eu.eudat.models.data.user.components.datasetprofile.FieldSet;
|
|||
import eu.eudat.models.data.user.components.datasetprofile.Section;
|
||||
import eu.eudat.models.data.user.composite.DatasetProfilePage;
|
||||
import eu.eudat.models.data.user.composite.PagedDatasetProfile;
|
||||
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
|
||||
import org.apache.poi.xwpf.usermodel.*;
|
||||
import org.jsoup.Jsoup;
|
||||
import org.jsoup.nodes.Document;
|
||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTAbstractNum;
|
||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTDecimalNumber;
|
||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTLvl;
|
||||
|
@ -50,7 +51,7 @@ public class WordBuilder {
|
|||
return paragraph;
|
||||
});
|
||||
this.options.put(ParagraphStyle.HTML, (mainDocumentPart, item) -> {
|
||||
try {
|
||||
/*try {
|
||||
XWPFHtmlDocument xwpfHtmlDocument = XWPFHtmlDocument.addHtmlDocument(mainDocumentPart);
|
||||
if (item != null) {
|
||||
xwpfHtmlDocument.setHtml(item);
|
||||
|
@ -60,7 +61,12 @@ public class WordBuilder {
|
|||
logger.error(e.getLocalizedMessage(), e);
|
||||
}
|
||||
|
||||
return null;
|
||||
return null;*/
|
||||
XWPFParagraph paragraph = null;
|
||||
Document htmlDoc = Jsoup.parse(item);
|
||||
HtmlToWorldBuilder.convert(mainDocumentPart, htmlDoc, " ");
|
||||
|
||||
return paragraph;
|
||||
});
|
||||
this.options.put(ParagraphStyle.TITLE, (mainDocumentPart, item) -> {
|
||||
XWPFParagraph paragraph = mainDocumentPart.createParagraph();
|
||||
|
|
Loading…
Reference in New Issue