2021-10-06 14:15:41 +02:00
|
|
|
package eu.eudat.logic.utilities.documents.word;
|
|
|
|
|
|
|
|
import org.apache.poi.xwpf.usermodel.*;
|
|
|
|
import org.jsoup.nodes.Document;
|
|
|
|
import org.jsoup.nodes.Element;
|
|
|
|
import org.jsoup.nodes.Node;
|
|
|
|
import org.jsoup.nodes.TextNode;
|
|
|
|
import org.jsoup.select.Elements;
|
|
|
|
import org.jsoup.select.NodeTraversor;
|
|
|
|
import org.jsoup.select.NodeVisitor;
|
2021-10-26 15:21:28 +02:00
|
|
|
import org.openxmlformats.schemas.wordprocessingml.x2006.main.*;
|
2021-10-06 14:15:41 +02:00
|
|
|
|
2021-10-26 15:21:28 +02:00
|
|
|
import java.math.BigInteger;
|
|
|
|
import java.util.*;
|
2021-10-06 14:15:41 +02:00
|
|
|
|
|
|
|
public class HtmlToWorldBuilder implements NodeVisitor {
|
|
|
|
|
|
|
|
private final Map<String, Boolean> properties = new LinkedHashMap<>();
|
2021-10-26 15:21:28 +02:00
|
|
|
private XWPFParagraph paragraph;
|
2021-10-06 14:15:41 +02:00
|
|
|
private XWPFRun run;
|
|
|
|
private Boolean dumpRun;
|
|
|
|
private final String identation;
|
|
|
|
private Boolean isIdentationUsed;
|
2021-10-26 15:21:28 +02:00
|
|
|
private XWPFNumbering numbering;
|
|
|
|
private Queue<BigInteger> abstractNumId;
|
|
|
|
private BigInteger numberingLevel;
|
2021-10-06 14:15:41 +02:00
|
|
|
|
|
|
|
public static void convert(XWPFDocument document, Document htmlDocument, String identation) {
|
2021-10-26 15:21:28 +02:00
|
|
|
XWPFParagraph paragraph = document.createParagraph();
|
|
|
|
HtmlToWorldBuilder htmlToWorldBuilder = new HtmlToWorldBuilder(paragraph, identation);
|
|
|
|
NodeTraversor.traverse(htmlToWorldBuilder, htmlDocument);
|
2021-10-06 14:15:41 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
public HtmlToWorldBuilder(XWPFParagraph paragraph, String identation) {
|
|
|
|
this.paragraph = paragraph;
|
|
|
|
this.run = this.paragraph.createRun();
|
|
|
|
this.dumpRun = false;
|
|
|
|
this.identation = identation;
|
|
|
|
this.isIdentationUsed = false;
|
|
|
|
this.run.setFontSize(11);
|
2021-10-26 15:21:28 +02:00
|
|
|
this.abstractNumId = new ArrayDeque<>();
|
|
|
|
this.numberingLevel = BigInteger.valueOf(-1);
|
2021-10-06 14:15:41 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
@Override
|
|
|
|
public void head(Node node, int i) {
|
|
|
|
String name = node.nodeName();
|
|
|
|
if (name.equals("#text")) {
|
|
|
|
String text = !isIdentationUsed ? identation + ((TextNode)node).text() : ((TextNode)node).text();
|
|
|
|
this.run.setText(text);
|
|
|
|
this.isIdentationUsed = true;
|
|
|
|
this.dumpRun = true;
|
|
|
|
} else {
|
|
|
|
properties.put(name, true);
|
|
|
|
}
|
|
|
|
if (dumpRun) {
|
|
|
|
this.run = this.paragraph.createRun();
|
|
|
|
this.run.setFontSize(11);
|
|
|
|
this.dumpRun = false;
|
|
|
|
}
|
2021-10-26 15:21:28 +02:00
|
|
|
parseProperties(node);
|
2021-10-13 16:47:50 +02:00
|
|
|
properties.clear();
|
2021-10-06 14:15:41 +02:00
|
|
|
}
|
|
|
|
|
2021-10-26 15:21:28 +02:00
|
|
|
private void parseProperties(Node node) {
|
2021-10-06 14:15:41 +02:00
|
|
|
properties.entrySet().forEach(stringBooleanEntry -> {
|
|
|
|
switch (stringBooleanEntry.getKey()) {
|
|
|
|
case "i" :
|
|
|
|
case "em":
|
|
|
|
this.run.setItalic(stringBooleanEntry.getValue());
|
|
|
|
break;
|
|
|
|
case "b":
|
|
|
|
case "strong":
|
|
|
|
this.run.setBold(stringBooleanEntry.getValue());
|
|
|
|
break;
|
|
|
|
case "u":
|
|
|
|
case "ins":
|
|
|
|
this.run.setUnderline(stringBooleanEntry.getValue() ? UnderlinePatterns.SINGLE : UnderlinePatterns.NONE);
|
|
|
|
break;
|
|
|
|
case "small":
|
|
|
|
this.run.setFontSize(stringBooleanEntry.getValue() ? 8 : 11);
|
|
|
|
break;
|
|
|
|
case "del":
|
2021-10-26 15:21:28 +02:00
|
|
|
case "strike":
|
|
|
|
case "strikethrough":
|
|
|
|
case "s":
|
2021-10-06 14:15:41 +02:00
|
|
|
this.run.setStrikeThrough(stringBooleanEntry.getValue());
|
|
|
|
break;
|
|
|
|
case "mark":
|
|
|
|
this.run.setTextHighlightColor(stringBooleanEntry.getValue() ? STHighlightColor.YELLOW.toString() : STHighlightColor.NONE.toString());
|
|
|
|
break;
|
|
|
|
case "sub":
|
|
|
|
this.run.setSubscript(stringBooleanEntry.getValue() ? VerticalAlign.SUBSCRIPT : VerticalAlign.BASELINE);
|
|
|
|
break;
|
|
|
|
case "sup":
|
|
|
|
this.run.setSubscript(stringBooleanEntry.getValue() ? VerticalAlign.SUPERSCRIPT : VerticalAlign.BASELINE);
|
|
|
|
break;
|
2021-10-26 15:21:28 +02:00
|
|
|
case "div":
|
|
|
|
case "p":
|
|
|
|
this.paragraph = this.paragraph.getDocument().createParagraph();
|
|
|
|
this.run = this.paragraph.createRun();
|
|
|
|
if (stringBooleanEntry.getValue()) {
|
|
|
|
if (node.hasAttr("align")) {
|
|
|
|
String alignment = node.attr("align");
|
|
|
|
this.paragraph.setAlignment(ParagraphAlignment.valueOf(alignment.toUpperCase(Locale.ROOT)));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case "blockquote":
|
|
|
|
this.paragraph = this.paragraph.getDocument().createParagraph();
|
|
|
|
this.run = this.paragraph.createRun();
|
|
|
|
if (stringBooleanEntry.getValue()) {
|
|
|
|
this.paragraph.setIndentationLeft(720);
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case "ul":
|
|
|
|
if (stringBooleanEntry.getValue()) {
|
|
|
|
createNumbering(STNumberFormat.BULLET);
|
|
|
|
} else {
|
|
|
|
this.paragraph = this.paragraph.getDocument().createParagraph();
|
|
|
|
this.run = this.paragraph.createRun();
|
|
|
|
this.numberingLevel = this.numberingLevel.subtract(BigInteger.ONE);
|
|
|
|
((ArrayDeque)this.abstractNumId).removeLast();
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case "ol":
|
|
|
|
if (stringBooleanEntry.getValue()) {
|
|
|
|
createNumbering(STNumberFormat.DECIMAL);
|
|
|
|
} else {
|
|
|
|
this.paragraph = this.paragraph.getDocument().createParagraph();
|
|
|
|
this.run = this.paragraph.createRun();
|
|
|
|
this.numberingLevel = this.numberingLevel.subtract(BigInteger.ONE);
|
|
|
|
((ArrayDeque)this.abstractNumId).removeLast();
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case "li":
|
|
|
|
if (stringBooleanEntry.getValue()) {
|
|
|
|
this.paragraph = this.paragraph.getDocument().createParagraph();
|
|
|
|
this.paragraph.setIndentationLeft(720 * numberingLevel.intValue());
|
|
|
|
this.run = this.paragraph.createRun();
|
|
|
|
this.paragraph.setNumID(((ArrayDeque<BigInteger>)abstractNumId).getLast());
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case "font":
|
|
|
|
if (stringBooleanEntry.getValue()) {
|
|
|
|
if (node.hasAttr("color")) {
|
|
|
|
this.run.setColor(node.attr("color").substring(1));
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
this.run.setColor("000000");
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case "a":
|
|
|
|
if (stringBooleanEntry.getValue()) {
|
|
|
|
if (node.hasAttr("href")) {
|
|
|
|
this.run = createHyperLinkRun(node.attr("href"));
|
|
|
|
this.run.setColor("0000FF");
|
|
|
|
this.run.setUnderline(UnderlinePatterns.SINGLE);
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
this.run = paragraph.createRun();
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case "br":
|
|
|
|
this.run.addCarriageReturn();
|
|
|
|
break;
|
2021-10-06 14:15:41 +02:00
|
|
|
}
|
|
|
|
});
|
|
|
|
}
|
|
|
|
|
|
|
|
@Override
|
|
|
|
public void tail(Node node, int i) {
|
|
|
|
String name = node.nodeName();
|
|
|
|
properties.put(name, false);
|
2021-10-26 15:21:28 +02:00
|
|
|
parseProperties(node);
|
2021-10-13 16:47:50 +02:00
|
|
|
properties.clear();
|
2021-10-06 14:15:41 +02:00
|
|
|
}
|
2021-10-26 15:21:28 +02:00
|
|
|
|
|
|
|
//GK: This function creates one numbering.xml for the word document and adds a specific format.
|
|
|
|
//It imitates the numbering.xml that is usually generated by word editors like LibreOffice
|
|
|
|
private void createNumbering(STNumberFormat.Enum format) {
|
|
|
|
CTAbstractNum ctAbstractNum = CTAbstractNum.Factory.newInstance();
|
|
|
|
if (this.numbering == null) this.numbering = this.paragraph.getDocument().createNumbering();
|
|
|
|
BigInteger tempNumId = BigInteger.ONE;
|
|
|
|
boolean found = false;
|
|
|
|
while (!found) {
|
|
|
|
Object o = numbering.getAbstractNum(tempNumId);
|
|
|
|
found = (o == null);
|
|
|
|
if (!found) tempNumId = tempNumId.add(BigInteger.ONE);
|
|
|
|
}
|
|
|
|
ctAbstractNum.setAbstractNumId(tempNumId);
|
|
|
|
CTLvl ctLvl = ctAbstractNum.addNewLvl();
|
|
|
|
this.numberingLevel = numberingLevel.add(BigInteger.ONE);
|
|
|
|
ctLvl.setIlvl(numberingLevel);
|
|
|
|
ctLvl.addNewNumFmt().setVal(format);
|
|
|
|
ctLvl.addNewStart().setVal(BigInteger.ONE);
|
|
|
|
if (format == STNumberFormat.BULLET) {
|
|
|
|
ctLvl.addNewLvlText().setVal("\u2022");
|
|
|
|
} else {
|
|
|
|
ctLvl.addNewLvlText().setVal("%1.");
|
|
|
|
}
|
|
|
|
XWPFAbstractNum xwpfAbstractNum = new XWPFAbstractNum(ctAbstractNum);
|
|
|
|
|
|
|
|
this.abstractNumId.add(this.numbering.addAbstractNum(xwpfAbstractNum));
|
|
|
|
this.numbering.addNum(((ArrayDeque<BigInteger>)abstractNumId).getLast());
|
|
|
|
}
|
|
|
|
|
|
|
|
private XWPFHyperlinkRun createHyperLinkRun(String uri) {
|
|
|
|
String rId = this.paragraph.getDocument().getPackagePart().addExternalRelationship(uri, XWPFRelation.HYPERLINK.getRelation()).getId();
|
|
|
|
|
|
|
|
CTHyperlink cthyperLink=paragraph.getCTP().addNewHyperlink();
|
|
|
|
cthyperLink.setId(rId);
|
|
|
|
cthyperLink.addNewR();
|
|
|
|
|
|
|
|
return new XWPFHyperlinkRun(
|
|
|
|
cthyperLink,
|
|
|
|
cthyperLink.getRArray(0),
|
|
|
|
paragraph
|
|
|
|
);
|
|
|
|
}
|
2021-10-06 14:15:41 +02:00
|
|
|
}
|