package eu.eudat.logic.utilities.documents.word; import org.apache.poi.xwpf.usermodel.*; import org.jsoup.nodes.Document; import org.jsoup.nodes.Node; import org.jsoup.nodes.TextNode; import org.jsoup.select.NodeTraversor; import org.jsoup.select.NodeVisitor; import org.openxmlformats.schemas.wordprocessingml.x2006.main.*; import java.math.BigInteger; import java.util.*; public class HtmlToWorldBuilder implements NodeVisitor { private final Map properties = new LinkedHashMap<>(); private XWPFParagraph paragraph; private XWPFRun run; private Boolean dumpRun; private final float indentation; private Boolean isIdentationUsed; private XWPFNumbering numbering; private Queue abstractNumId; private BigInteger numberingLevel; public static HtmlToWorldBuilder convert(XWPFDocument document, Document htmlDocument, float indentation) { XWPFParagraph paragraph = document.createParagraph(); HtmlToWorldBuilder htmlToWorldBuilder = new HtmlToWorldBuilder(paragraph, indentation); NodeTraversor.traverse(htmlToWorldBuilder, htmlDocument); return htmlToWorldBuilder; } public HtmlToWorldBuilder(XWPFParagraph paragraph, float indentation) { this.paragraph = paragraph; this.run = this.paragraph.createRun(); this.dumpRun = false; this.indentation = indentation; this.isIdentationUsed = false; this.run.setFontSize(11); this.abstractNumId = new ArrayDeque<>(); this.numberingLevel = BigInteger.valueOf(-1); this.setDefaultIndentation(); } @Override public void head(Node node, int i) { String name = node.nodeName(); if (name.equals("#text")) { String text = ((TextNode)node).text(); this.run.setText(text); this.dumpRun = true; } else { properties.put(name, true); } if (dumpRun) { this.run = this.paragraph.createRun(); this.run.setFontSize(11); this.dumpRun = false; } parseProperties(node); properties.clear(); } private void parseProperties(Node node) { properties.entrySet().forEach(stringBooleanEntry -> { switch (stringBooleanEntry.getKey()) { case "i" : case "em": this.run.setItalic(stringBooleanEntry.getValue()); break; case "b": case "strong": this.run.setBold(stringBooleanEntry.getValue()); break; case "u": case "ins": this.run.setUnderline(stringBooleanEntry.getValue() ? UnderlinePatterns.SINGLE : UnderlinePatterns.NONE); break; case "small": this.run.setFontSize(stringBooleanEntry.getValue() ? 8 : 11); break; case "del": case "strike": case "strikethrough": case "s": this.run.setStrikeThrough(stringBooleanEntry.getValue()); break; case "mark": this.run.setTextHighlightColor(stringBooleanEntry.getValue() ? STHighlightColor.YELLOW.toString() : STHighlightColor.NONE.toString()); break; case "sub": this.run.setSubscript(stringBooleanEntry.getValue() ? VerticalAlign.SUBSCRIPT : VerticalAlign.BASELINE); break; case "sup": this.run.setSubscript(stringBooleanEntry.getValue() ? VerticalAlign.SUPERSCRIPT : VerticalAlign.BASELINE); break; case "div": case "p": this.paragraph = this.paragraph.getDocument().createParagraph(); this.run = this.paragraph.createRun(); this.isIdentationUsed = false; this.setDefaultIndentation(); if (stringBooleanEntry.getValue()) { if (node.hasAttr("align")) { String alignment = node.attr("align"); this.paragraph.setAlignment(ParagraphAlignment.valueOf(alignment.toUpperCase(Locale.ROOT))); } } break; case "blockquote": this.paragraph = this.paragraph.getDocument().createParagraph(); this.run = this.paragraph.createRun(); if (stringBooleanEntry.getValue()) { this.paragraph.setIndentationLeft(720); } else { this.isIdentationUsed = false; this.setDefaultIndentation(); } break; case "ul": if (stringBooleanEntry.getValue()) { createNumbering(STNumberFormat.BULLET); } else { this.paragraph = this.paragraph.getDocument().createParagraph(); this.run = this.paragraph.createRun(); this.isIdentationUsed = false; this.setDefaultIndentation(); this.numberingLevel = this.numberingLevel.subtract(BigInteger.ONE); ((ArrayDeque)this.abstractNumId).removeLast(); } break; case "ol": if (stringBooleanEntry.getValue()) { createNumbering(STNumberFormat.DECIMAL); } else { this.paragraph = this.paragraph.getDocument().createParagraph(); this.run = this.paragraph.createRun(); this.isIdentationUsed = false; this.setDefaultIndentation(); this.numberingLevel = this.numberingLevel.subtract(BigInteger.ONE); ((ArrayDeque)this.abstractNumId).removeLast(); } break; case "li": if (stringBooleanEntry.getValue()) { this.paragraph = this.paragraph.getDocument().createParagraph(); this.paragraph.setIndentationLeft(Math.round(indentation * 720) * (numberingLevel.intValue() + 1)); this.run = this.paragraph.createRun(); this.paragraph.setNumID(((ArrayDeque)abstractNumId).getLast()); } break; case "font": if (stringBooleanEntry.getValue()) { if (node.hasAttr("color")) { this.run.setColor(node.attr("color").substring(1)); } } else { this.run.setColor("000000"); } break; case "a": if (stringBooleanEntry.getValue()) { if (node.hasAttr("href")) { this.run = createHyperLinkRun(node.attr("href")); this.run.setColor("0000FF"); this.run.setUnderline(UnderlinePatterns.SINGLE); } } else { this.run = paragraph.createRun(); } break; case "br": if (stringBooleanEntry.getValue()) { this.run.addBreak(); } break; } }); } @Override public void tail(Node node, int i) { String name = node.nodeName(); properties.put(name, false); parseProperties(node); properties.clear(); } //GK: This function creates one numbering.xml for the word document and adds a specific format. //It imitates the numbering.xml that is usually generated by word editors like LibreOffice private void createNumbering(STNumberFormat.Enum format) { CTAbstractNum ctAbstractNum = CTAbstractNum.Factory.newInstance(); if (this.numbering == null) this.numbering = this.paragraph.getDocument().createNumbering(); BigInteger tempNumId = BigInteger.ONE; boolean found = false; while (!found) { Object o = numbering.getAbstractNum(tempNumId); found = (o == null); if (!found) tempNumId = tempNumId.add(BigInteger.ONE); } ctAbstractNum.setAbstractNumId(tempNumId); CTLvl ctLvl = ctAbstractNum.addNewLvl(); this.numberingLevel = numberingLevel.add(BigInteger.ONE); ctLvl.setIlvl(numberingLevel); ctLvl.addNewNumFmt().setVal(format); ctLvl.addNewStart().setVal(BigInteger.ONE); if (format == STNumberFormat.BULLET) { ctLvl.addNewLvlJc().setVal(STJc.LEFT); ctLvl.addNewLvlText().setVal("\u2022"); ctLvl.addNewRPr(); //Set the Symbol font CTFonts f = ctLvl.getRPr().addNewRFonts(); f.setAscii("Symbol"); f.setHAnsi("Symbol"); f.setCs("Symbol"); } else { ctLvl.addNewLvlText().setVal("%1."); } XWPFAbstractNum xwpfAbstractNum = new XWPFAbstractNum(ctAbstractNum); this.abstractNumId.add(this.numbering.addAbstractNum(xwpfAbstractNum)); this.numbering.addNum(((ArrayDeque)abstractNumId).getLast()); } private XWPFHyperlinkRun createHyperLinkRun(String uri) { String rId = this.paragraph.getDocument().getPackagePart().addExternalRelationship(uri, XWPFRelation.HYPERLINK.getRelation()).getId(); CTHyperlink cthyperLink=paragraph.getCTP().addNewHyperlink(); cthyperLink.setId(rId); cthyperLink.addNewR(); return new XWPFHyperlinkRun( cthyperLink, cthyperLink.getRArray(0), paragraph ); } private void setDefaultIndentation() { if (!isIdentationUsed) { this.paragraph.setIndentationLeft(Math.round(indentation * 720.0F)); this.isIdentationUsed = true; } } public XWPFParagraph getParagraph() { return paragraph; } }