diff --git a/dmp-backend/web/src/main/java/eu/eudat/logic/utilities/documents/word/HtmlToWorldBuilder.java b/dmp-backend/web/src/main/java/eu/eudat/logic/utilities/documents/word/HtmlToWorldBuilder.java index ddcba3bca..6506b5418 100644 --- a/dmp-backend/web/src/main/java/eu/eudat/logic/utilities/documents/word/HtmlToWorldBuilder.java +++ b/dmp-backend/web/src/main/java/eu/eudat/logic/utilities/documents/word/HtmlToWorldBuilder.java @@ -8,33 +8,27 @@ import org.jsoup.nodes.TextNode; import org.jsoup.select.Elements; import org.jsoup.select.NodeTraversor; import org.jsoup.select.NodeVisitor; -import org.openxmlformats.schemas.wordprocessingml.x2006.main.STHighlightColor; +import org.openxmlformats.schemas.wordprocessingml.x2006.main.*; -import java.util.LinkedHashMap; -import java.util.Map; +import java.math.BigInteger; +import java.util.*; public class HtmlToWorldBuilder implements NodeVisitor { private final Map properties = new LinkedHashMap<>(); - private final XWPFParagraph paragraph; + private XWPFParagraph paragraph; private XWPFRun run; private Boolean dumpRun; private final String identation; private Boolean isIdentationUsed; + private XWPFNumbering numbering; + private Queue abstractNumId; + private BigInteger numberingLevel; public static void convert(XWPFDocument document, Document htmlDocument, String identation) { - Elements htmlParagraphs = htmlDocument.select("p"); - if (!htmlParagraphs.isEmpty()) { - for (Element htmlParagraph : htmlParagraphs) { - XWPFParagraph paragraph = document.createParagraph(); - HtmlToWorldBuilder htmlToWorldBuilder = new HtmlToWorldBuilder(paragraph, identation); - NodeTraversor.traverse(htmlToWorldBuilder, htmlParagraph); - } - } else { - XWPFParagraph paragraph = document.createParagraph(); - HtmlToWorldBuilder htmlToWorldBuilder = new HtmlToWorldBuilder(paragraph, identation); - NodeTraversor.traverse(htmlToWorldBuilder, htmlDocument); - } + XWPFParagraph paragraph = document.createParagraph(); + HtmlToWorldBuilder htmlToWorldBuilder = new HtmlToWorldBuilder(paragraph, identation); + NodeTraversor.traverse(htmlToWorldBuilder, htmlDocument); } public HtmlToWorldBuilder(XWPFParagraph paragraph, String identation) { @@ -44,6 +38,8 @@ public class HtmlToWorldBuilder implements NodeVisitor { this.identation = identation; this.isIdentationUsed = false; this.run.setFontSize(11); + this.abstractNumId = new ArrayDeque<>(); + this.numberingLevel = BigInteger.valueOf(-1); } @Override @@ -62,11 +58,11 @@ public class HtmlToWorldBuilder implements NodeVisitor { this.run.setFontSize(11); this.dumpRun = false; } - parseProperties(); + parseProperties(node); properties.clear(); } - private void parseProperties() { + private void parseProperties(Node node) { properties.entrySet().forEach(stringBooleanEntry -> { switch (stringBooleanEntry.getKey()) { case "i" : @@ -85,6 +81,9 @@ public class HtmlToWorldBuilder implements NodeVisitor { this.run.setFontSize(stringBooleanEntry.getValue() ? 8 : 11); break; case "del": + case "strike": + case "strikethrough": + case "s": this.run.setStrikeThrough(stringBooleanEntry.getValue()); break; case "mark": @@ -96,6 +95,75 @@ public class HtmlToWorldBuilder implements NodeVisitor { case "sup": this.run.setSubscript(stringBooleanEntry.getValue() ? VerticalAlign.SUPERSCRIPT : VerticalAlign.BASELINE); break; + case "div": + case "p": + this.paragraph = this.paragraph.getDocument().createParagraph(); + this.run = this.paragraph.createRun(); + if (stringBooleanEntry.getValue()) { + if (node.hasAttr("align")) { + String alignment = node.attr("align"); + this.paragraph.setAlignment(ParagraphAlignment.valueOf(alignment.toUpperCase(Locale.ROOT))); + } + } + break; + case "blockquote": + this.paragraph = this.paragraph.getDocument().createParagraph(); + this.run = this.paragraph.createRun(); + if (stringBooleanEntry.getValue()) { + this.paragraph.setIndentationLeft(720); + } + break; + case "ul": + if (stringBooleanEntry.getValue()) { + createNumbering(STNumberFormat.BULLET); + } else { + this.paragraph = this.paragraph.getDocument().createParagraph(); + this.run = this.paragraph.createRun(); + this.numberingLevel = this.numberingLevel.subtract(BigInteger.ONE); + ((ArrayDeque)this.abstractNumId).removeLast(); + } + break; + case "ol": + if (stringBooleanEntry.getValue()) { + createNumbering(STNumberFormat.DECIMAL); + } else { + this.paragraph = this.paragraph.getDocument().createParagraph(); + this.run = this.paragraph.createRun(); + this.numberingLevel = this.numberingLevel.subtract(BigInteger.ONE); + ((ArrayDeque)this.abstractNumId).removeLast(); + } + break; + case "li": + if (stringBooleanEntry.getValue()) { + this.paragraph = this.paragraph.getDocument().createParagraph(); + this.paragraph.setIndentationLeft(720 * numberingLevel.intValue()); + this.run = this.paragraph.createRun(); + this.paragraph.setNumID(((ArrayDeque)abstractNumId).getLast()); + } + break; + case "font": + if (stringBooleanEntry.getValue()) { + if (node.hasAttr("color")) { + this.run.setColor(node.attr("color").substring(1)); + } + } else { + this.run.setColor("000000"); + } + break; + case "a": + if (stringBooleanEntry.getValue()) { + if (node.hasAttr("href")) { + this.run = createHyperLinkRun(node.attr("href")); + this.run.setColor("0000FF"); + this.run.setUnderline(UnderlinePatterns.SINGLE); + } + } else { + this.run = paragraph.createRun(); + } + break; + case "br": + this.run.addCarriageReturn(); + break; } }); } @@ -104,7 +172,50 @@ public class HtmlToWorldBuilder implements NodeVisitor { public void tail(Node node, int i) { String name = node.nodeName(); properties.put(name, false); - parseProperties(); + parseProperties(node); properties.clear(); } + + //GK: This function creates one numbering.xml for the word document and adds a specific format. + //It imitates the numbering.xml that is usually generated by word editors like LibreOffice + private void createNumbering(STNumberFormat.Enum format) { + CTAbstractNum ctAbstractNum = CTAbstractNum.Factory.newInstance(); + if (this.numbering == null) this.numbering = this.paragraph.getDocument().createNumbering(); + BigInteger tempNumId = BigInteger.ONE; + boolean found = false; + while (!found) { + Object o = numbering.getAbstractNum(tempNumId); + found = (o == null); + if (!found) tempNumId = tempNumId.add(BigInteger.ONE); + } + ctAbstractNum.setAbstractNumId(tempNumId); + CTLvl ctLvl = ctAbstractNum.addNewLvl(); + this.numberingLevel = numberingLevel.add(BigInteger.ONE); + ctLvl.setIlvl(numberingLevel); + ctLvl.addNewNumFmt().setVal(format); + ctLvl.addNewStart().setVal(BigInteger.ONE); + if (format == STNumberFormat.BULLET) { + ctLvl.addNewLvlText().setVal("\u2022"); + } else { + ctLvl.addNewLvlText().setVal("%1."); + } + XWPFAbstractNum xwpfAbstractNum = new XWPFAbstractNum(ctAbstractNum); + + this.abstractNumId.add(this.numbering.addAbstractNum(xwpfAbstractNum)); + this.numbering.addNum(((ArrayDeque)abstractNumId).getLast()); + } + + private XWPFHyperlinkRun createHyperLinkRun(String uri) { + String rId = this.paragraph.getDocument().getPackagePart().addExternalRelationship(uri, XWPFRelation.HYPERLINK.getRelation()).getId(); + + CTHyperlink cthyperLink=paragraph.getCTP().addNewHyperlink(); + cthyperLink.setId(rId); + cthyperLink.addNewR(); + + return new XWPFHyperlinkRun( + cthyperLink, + cthyperLink.getRArray(0), + paragraph + ); + } }