Replace altChunk html to docx with direct parsing (manual)
This commit is contained in:
parent
ef5e6a92e0
commit
cb1d7b5340
|
@ -186,6 +186,12 @@
|
||||||
<artifactId>fop</artifactId>
|
<artifactId>fop</artifactId>
|
||||||
<version>2.3</version>
|
<version>2.3</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.jsoup</groupId>
|
||||||
|
<artifactId>jsoup</artifactId>
|
||||||
|
<version>1.14.3</version>
|
||||||
|
</dependency>
|
||||||
<!-- https://mvnrepository.com/artifact/fr.opensagres.xdocreport/org.apache.poi.xwpf.converter.pdf -->
|
<!-- https://mvnrepository.com/artifact/fr.opensagres.xdocreport/org.apache.poi.xwpf.converter.pdf -->
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>fr.opensagres.xdocreport</groupId>
|
<groupId>fr.opensagres.xdocreport</groupId>
|
||||||
|
|
|
@ -0,0 +1,109 @@
|
||||||
|
package eu.eudat.logic.utilities.documents.word;
|
||||||
|
|
||||||
|
import org.apache.poi.xwpf.usermodel.*;
|
||||||
|
import org.jsoup.nodes.Document;
|
||||||
|
import org.jsoup.nodes.Element;
|
||||||
|
import org.jsoup.nodes.Node;
|
||||||
|
import org.jsoup.nodes.TextNode;
|
||||||
|
import org.jsoup.select.Elements;
|
||||||
|
import org.jsoup.select.NodeTraversor;
|
||||||
|
import org.jsoup.select.NodeVisitor;
|
||||||
|
import org.openxmlformats.schemas.wordprocessingml.x2006.main.STHighlightColor;
|
||||||
|
|
||||||
|
import java.util.LinkedHashMap;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
public class HtmlToWorldBuilder implements NodeVisitor {
|
||||||
|
|
||||||
|
private final Map<String, Boolean> properties = new LinkedHashMap<>();
|
||||||
|
private final XWPFParagraph paragraph;
|
||||||
|
private XWPFRun run;
|
||||||
|
private Boolean dumpRun;
|
||||||
|
private final String identation;
|
||||||
|
private Boolean isIdentationUsed;
|
||||||
|
|
||||||
|
public static void convert(XWPFDocument document, Document htmlDocument, String identation) {
|
||||||
|
Elements htmlParagraphs = htmlDocument.select("p");
|
||||||
|
if (!htmlParagraphs.isEmpty()) {
|
||||||
|
for (Element htmlParagraph : htmlParagraphs) {
|
||||||
|
XWPFParagraph paragraph = document.createParagraph();
|
||||||
|
HtmlToWorldBuilder htmlToWorldBuilder = new HtmlToWorldBuilder(paragraph, identation);
|
||||||
|
NodeTraversor.traverse(htmlToWorldBuilder, htmlParagraph);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
XWPFParagraph paragraph = document.createParagraph();
|
||||||
|
HtmlToWorldBuilder htmlToWorldBuilder = new HtmlToWorldBuilder(paragraph, identation);
|
||||||
|
NodeTraversor.traverse(htmlToWorldBuilder, htmlDocument);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public HtmlToWorldBuilder(XWPFParagraph paragraph, String identation) {
|
||||||
|
this.paragraph = paragraph;
|
||||||
|
this.run = this.paragraph.createRun();
|
||||||
|
this.dumpRun = false;
|
||||||
|
this.identation = identation;
|
||||||
|
this.isIdentationUsed = false;
|
||||||
|
this.run.setFontSize(11);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void head(Node node, int i) {
|
||||||
|
String name = node.nodeName();
|
||||||
|
if (name.equals("#text")) {
|
||||||
|
String text = !isIdentationUsed ? identation + ((TextNode)node).text() : ((TextNode)node).text();
|
||||||
|
this.run.setText(text);
|
||||||
|
this.isIdentationUsed = true;
|
||||||
|
this.dumpRun = true;
|
||||||
|
} else {
|
||||||
|
properties.put(name, true);
|
||||||
|
}
|
||||||
|
if (dumpRun) {
|
||||||
|
this.run = this.paragraph.createRun();
|
||||||
|
this.run.setFontSize(11);
|
||||||
|
this.dumpRun = false;
|
||||||
|
}
|
||||||
|
parseProperties();
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private void parseProperties() {
|
||||||
|
properties.entrySet().forEach(stringBooleanEntry -> {
|
||||||
|
switch (stringBooleanEntry.getKey()) {
|
||||||
|
case "i" :
|
||||||
|
case "em":
|
||||||
|
this.run.setItalic(stringBooleanEntry.getValue());
|
||||||
|
break;
|
||||||
|
case "b":
|
||||||
|
case "strong":
|
||||||
|
this.run.setBold(stringBooleanEntry.getValue());
|
||||||
|
break;
|
||||||
|
case "u":
|
||||||
|
case "ins":
|
||||||
|
this.run.setUnderline(stringBooleanEntry.getValue() ? UnderlinePatterns.SINGLE : UnderlinePatterns.NONE);
|
||||||
|
break;
|
||||||
|
case "small":
|
||||||
|
this.run.setFontSize(stringBooleanEntry.getValue() ? 8 : 11);
|
||||||
|
break;
|
||||||
|
case "del":
|
||||||
|
this.run.setStrikeThrough(stringBooleanEntry.getValue());
|
||||||
|
break;
|
||||||
|
case "mark":
|
||||||
|
this.run.setTextHighlightColor(stringBooleanEntry.getValue() ? STHighlightColor.YELLOW.toString() : STHighlightColor.NONE.toString());
|
||||||
|
break;
|
||||||
|
case "sub":
|
||||||
|
this.run.setSubscript(stringBooleanEntry.getValue() ? VerticalAlign.SUBSCRIPT : VerticalAlign.BASELINE);
|
||||||
|
break;
|
||||||
|
case "sup":
|
||||||
|
this.run.setSubscript(stringBooleanEntry.getValue() ? VerticalAlign.SUPERSCRIPT : VerticalAlign.BASELINE);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void tail(Node node, int i) {
|
||||||
|
String name = node.nodeName();
|
||||||
|
properties.put(name, false);
|
||||||
|
parseProperties();
|
||||||
|
}
|
||||||
|
}
|
|
@ -13,8 +13,9 @@ import eu.eudat.models.data.user.components.datasetprofile.FieldSet;
|
||||||
import eu.eudat.models.data.user.components.datasetprofile.Section;
|
import eu.eudat.models.data.user.components.datasetprofile.Section;
|
||||||
import eu.eudat.models.data.user.composite.DatasetProfilePage;
|
import eu.eudat.models.data.user.composite.DatasetProfilePage;
|
||||||
import eu.eudat.models.data.user.composite.PagedDatasetProfile;
|
import eu.eudat.models.data.user.composite.PagedDatasetProfile;
|
||||||
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
|
|
||||||
import org.apache.poi.xwpf.usermodel.*;
|
import org.apache.poi.xwpf.usermodel.*;
|
||||||
|
import org.jsoup.Jsoup;
|
||||||
|
import org.jsoup.nodes.Document;
|
||||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTAbstractNum;
|
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTAbstractNum;
|
||||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTDecimalNumber;
|
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTDecimalNumber;
|
||||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTLvl;
|
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTLvl;
|
||||||
|
@ -50,7 +51,7 @@ public class WordBuilder {
|
||||||
return paragraph;
|
return paragraph;
|
||||||
});
|
});
|
||||||
this.options.put(ParagraphStyle.HTML, (mainDocumentPart, item) -> {
|
this.options.put(ParagraphStyle.HTML, (mainDocumentPart, item) -> {
|
||||||
try {
|
/*try {
|
||||||
XWPFHtmlDocument xwpfHtmlDocument = XWPFHtmlDocument.addHtmlDocument(mainDocumentPart);
|
XWPFHtmlDocument xwpfHtmlDocument = XWPFHtmlDocument.addHtmlDocument(mainDocumentPart);
|
||||||
if (item != null) {
|
if (item != null) {
|
||||||
xwpfHtmlDocument.setHtml(item);
|
xwpfHtmlDocument.setHtml(item);
|
||||||
|
@ -60,7 +61,12 @@ public class WordBuilder {
|
||||||
logger.error(e.getLocalizedMessage(), e);
|
logger.error(e.getLocalizedMessage(), e);
|
||||||
}
|
}
|
||||||
|
|
||||||
return null;
|
return null;*/
|
||||||
|
XWPFParagraph paragraph = null;
|
||||||
|
Document htmlDoc = Jsoup.parse(item);
|
||||||
|
HtmlToWorldBuilder.convert(mainDocumentPart, htmlDoc, " ");
|
||||||
|
|
||||||
|
return paragraph;
|
||||||
});
|
});
|
||||||
this.options.put(ParagraphStyle.TITLE, (mainDocumentPart, item) -> {
|
this.options.put(ParagraphStyle.TITLE, (mainDocumentPart, item) -> {
|
||||||
XWPFParagraph paragraph = mainDocumentPart.createParagraph();
|
XWPFParagraph paragraph = mainDocumentPart.createParagraph();
|
||||||
|
|
Loading…
Reference in New Issue