argos/dmp-backend/web/src/main/java/eu/eudat/logic/utilities/documents/word/HtmlToWorldBuilder.java

249 lines
10 KiB
Java

package eu.eudat.logic.utilities.documents.word;
import org.apache.poi.xwpf.usermodel.*;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Node;
import org.jsoup.nodes.TextNode;
import org.jsoup.select.NodeTraversor;
import org.jsoup.select.NodeVisitor;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.*;
import java.math.BigInteger;
import java.util.*;
public class HtmlToWorldBuilder implements NodeVisitor {
private final Map<String, Boolean> properties = new LinkedHashMap<>();
private XWPFParagraph paragraph;
private XWPFRun run;
private Boolean dumpRun;
private final float indentation;
private Boolean isIdentationUsed;
private XWPFNumbering numbering;
private Queue<BigInteger> abstractNumId;
private BigInteger numberingLevel;
public static HtmlToWorldBuilder convert(XWPFDocument document, Document htmlDocument, float indentation) {
XWPFParagraph paragraph = document.createParagraph();
HtmlToWorldBuilder htmlToWorldBuilder = new HtmlToWorldBuilder(paragraph, indentation);
NodeTraversor.traverse(htmlToWorldBuilder, htmlDocument);
return htmlToWorldBuilder;
}
public HtmlToWorldBuilder(XWPFParagraph paragraph, float indentation) {
this.paragraph = paragraph;
this.run = this.paragraph.createRun();
this.dumpRun = false;
this.indentation = indentation;
this.isIdentationUsed = false;
this.run.setFontSize(11);
this.abstractNumId = new ArrayDeque<>();
this.numberingLevel = BigInteger.valueOf(-1);
this.setDefaultIndentation();
}
@Override
public void head(Node node, int i) {
String name = node.nodeName();
if (name.equals("#text")) {
String text = ((TextNode)node).text();
this.run.setText(text);
this.dumpRun = true;
} else {
properties.put(name, true);
}
if (dumpRun) {
this.run = this.paragraph.createRun();
this.run.setFontSize(11);
this.dumpRun = false;
}
parseProperties(node);
properties.clear();
}
private void parseProperties(Node node) {
properties.entrySet().forEach(stringBooleanEntry -> {
switch (stringBooleanEntry.getKey()) {
case "i" :
case "em":
this.run.setItalic(stringBooleanEntry.getValue());
break;
case "b":
case "strong":
this.run.setBold(stringBooleanEntry.getValue());
break;
case "u":
case "ins":
this.run.setUnderline(stringBooleanEntry.getValue() ? UnderlinePatterns.SINGLE : UnderlinePatterns.NONE);
break;
case "small":
this.run.setFontSize(stringBooleanEntry.getValue() ? 8 : 11);
break;
case "del":
case "strike":
case "strikethrough":
case "s":
this.run.setStrikeThrough(stringBooleanEntry.getValue());
break;
case "mark":
this.run.setTextHighlightColor(stringBooleanEntry.getValue() ? STHighlightColor.YELLOW.toString() : STHighlightColor.NONE.toString());
break;
case "sub":
this.run.setSubscript(stringBooleanEntry.getValue() ? VerticalAlign.SUBSCRIPT : VerticalAlign.BASELINE);
break;
case "sup":
this.run.setSubscript(stringBooleanEntry.getValue() ? VerticalAlign.SUPERSCRIPT : VerticalAlign.BASELINE);
break;
case "div":
case "p":
this.paragraph = this.paragraph.getDocument().createParagraph();
this.run = this.paragraph.createRun();
this.isIdentationUsed = false;
this.setDefaultIndentation();
if (stringBooleanEntry.getValue()) {
if (node.hasAttr("align")) {
String alignment = node.attr("align");
this.paragraph.setAlignment(ParagraphAlignment.valueOf(alignment.toUpperCase(Locale.ROOT)));
}
}
break;
case "blockquote":
this.paragraph = this.paragraph.getDocument().createParagraph();
this.run = this.paragraph.createRun();
if (stringBooleanEntry.getValue()) {
this.paragraph.setIndentationLeft(720);
} else {
this.isIdentationUsed = false;
this.setDefaultIndentation();
}
break;
case "ul":
if (stringBooleanEntry.getValue()) {
createNumbering(STNumberFormat.BULLET);
} else {
this.paragraph = this.paragraph.getDocument().createParagraph();
this.run = this.paragraph.createRun();
this.isIdentationUsed = false;
this.setDefaultIndentation();
this.numberingLevel = this.numberingLevel.subtract(BigInteger.ONE);
((ArrayDeque)this.abstractNumId).removeLast();
}
break;
case "ol":
if (stringBooleanEntry.getValue()) {
createNumbering(STNumberFormat.DECIMAL);
} else {
this.paragraph = this.paragraph.getDocument().createParagraph();
this.run = this.paragraph.createRun();
this.isIdentationUsed = false;
this.setDefaultIndentation();
this.numberingLevel = this.numberingLevel.subtract(BigInteger.ONE);
((ArrayDeque)this.abstractNumId).removeLast();
}
break;
case "li":
if (stringBooleanEntry.getValue()) {
this.paragraph = this.paragraph.getDocument().createParagraph();
this.paragraph.setIndentationLeft(Math.round(indentation * 720) * (numberingLevel.intValue() + 1));
this.run = this.paragraph.createRun();
this.paragraph.setNumID(((ArrayDeque<BigInteger>)abstractNumId).getLast());
}
break;
case "font":
if (stringBooleanEntry.getValue()) {
if (node.hasAttr("color")) {
this.run.setColor(node.attr("color").substring(1));
}
} else {
this.run.setColor("000000");
}
break;
case "a":
if (stringBooleanEntry.getValue()) {
if (node.hasAttr("href")) {
this.run = createHyperLinkRun(node.attr("href"));
this.run.setColor("0000FF");
this.run.setUnderline(UnderlinePatterns.SINGLE);
}
} else {
this.run = paragraph.createRun();
}
break;
case "br":
if (stringBooleanEntry.getValue()) {
this.run.addBreak();
}
break;
}
});
}
@Override
public void tail(Node node, int i) {
String name = node.nodeName();
properties.put(name, false);
parseProperties(node);
properties.clear();
}
//GK: This function creates one numbering.xml for the word document and adds a specific format.
//It imitates the numbering.xml that is usually generated by word editors like LibreOffice
private void createNumbering(STNumberFormat.Enum format) {
CTAbstractNum ctAbstractNum = CTAbstractNum.Factory.newInstance();
if (this.numbering == null) this.numbering = this.paragraph.getDocument().createNumbering();
BigInteger tempNumId = BigInteger.ONE;
boolean found = false;
while (!found) {
Object o = numbering.getAbstractNum(tempNumId);
found = (o == null);
if (!found) tempNumId = tempNumId.add(BigInteger.ONE);
}
ctAbstractNum.setAbstractNumId(tempNumId);
CTLvl ctLvl = ctAbstractNum.addNewLvl();
this.numberingLevel = numberingLevel.add(BigInteger.ONE);
ctLvl.setIlvl(numberingLevel);
ctLvl.addNewNumFmt().setVal(format);
ctLvl.addNewStart().setVal(BigInteger.ONE);
if (format == STNumberFormat.BULLET) {
ctLvl.addNewLvlJc().setVal(STJc.LEFT);
ctLvl.addNewLvlText().setVal("\u2022");
ctLvl.addNewRPr(); //Set the Symbol font
CTFonts f = ctLvl.getRPr().addNewRFonts();
f.setAscii("Symbol");
f.setHAnsi("Symbol");
f.setCs("Symbol");
} else {
ctLvl.addNewLvlText().setVal("%1.");
}
XWPFAbstractNum xwpfAbstractNum = new XWPFAbstractNum(ctAbstractNum);
this.abstractNumId.add(this.numbering.addAbstractNum(xwpfAbstractNum));
this.numbering.addNum(((ArrayDeque<BigInteger>)abstractNumId).getLast());
}
private XWPFHyperlinkRun createHyperLinkRun(String uri) {
String rId = this.paragraph.getDocument().getPackagePart().addExternalRelationship(uri, XWPFRelation.HYPERLINK.getRelation()).getId();
CTHyperlink cthyperLink=paragraph.getCTP().addNewHyperlink();
cthyperLink.setId(rId);
cthyperLink.addNewR();
return new XWPFHyperlinkRun(
cthyperLink,
cthyperLink.getRArray(0),
paragraph
);
}
private void setDefaultIndentation() {
if (!isIdentationUsed) {
this.paragraph.setIndentationLeft(Math.round(indentation * 720.0F));
this.isIdentationUsed = true;
}
}
public XWPFParagraph getParagraph() {
return paragraph;
}
}