When exporting to word/PDF HTML styled text, it will be preperly indented and it can read the change line character

This commit is contained in:
George Kalampokis 2021-11-01 11:39:43 +02:00
parent 5a66754cbd
commit 0010ad846a
2 changed files with 33 additions and 26 deletions

View File

@ -2,10 +2,8 @@ package eu.eudat.logic.utilities.documents.word;
import org.apache.poi.xwpf.usermodel.*; import org.apache.poi.xwpf.usermodel.*;
import org.jsoup.nodes.Document; import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.nodes.Node; import org.jsoup.nodes.Node;
import org.jsoup.nodes.TextNode; import org.jsoup.nodes.TextNode;
import org.jsoup.select.Elements;
import org.jsoup.select.NodeTraversor; import org.jsoup.select.NodeTraversor;
import org.jsoup.select.NodeVisitor; import org.jsoup.select.NodeVisitor;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.*; import org.openxmlformats.schemas.wordprocessingml.x2006.main.*;
@ -19,36 +17,37 @@ public class HtmlToWorldBuilder implements NodeVisitor {
private XWPFParagraph paragraph; private XWPFParagraph paragraph;
private XWPFRun run; private XWPFRun run;
private Boolean dumpRun; private Boolean dumpRun;
private final String identation; private final float indentation;
private Boolean isIdentationUsed; private Boolean isIdentationUsed;
private XWPFNumbering numbering; private XWPFNumbering numbering;
private Queue<BigInteger> abstractNumId; private Queue<BigInteger> abstractNumId;
private BigInteger numberingLevel; private BigInteger numberingLevel;
public static void convert(XWPFDocument document, Document htmlDocument, String identation) { public static HtmlToWorldBuilder convert(XWPFDocument document, Document htmlDocument, float indentation) {
XWPFParagraph paragraph = document.createParagraph(); XWPFParagraph paragraph = document.createParagraph();
HtmlToWorldBuilder htmlToWorldBuilder = new HtmlToWorldBuilder(paragraph, identation); HtmlToWorldBuilder htmlToWorldBuilder = new HtmlToWorldBuilder(paragraph, indentation);
NodeTraversor.traverse(htmlToWorldBuilder, htmlDocument); NodeTraversor.traverse(htmlToWorldBuilder, htmlDocument);
return htmlToWorldBuilder;
} }
public HtmlToWorldBuilder(XWPFParagraph paragraph, String identation) { public HtmlToWorldBuilder(XWPFParagraph paragraph, float indentation) {
this.paragraph = paragraph; this.paragraph = paragraph;
this.run = this.paragraph.createRun(); this.run = this.paragraph.createRun();
this.dumpRun = false; this.dumpRun = false;
this.identation = identation; this.indentation = indentation;
this.isIdentationUsed = false; this.isIdentationUsed = false;
this.run.setFontSize(11); this.run.setFontSize(11);
this.abstractNumId = new ArrayDeque<>(); this.abstractNumId = new ArrayDeque<>();
this.numberingLevel = BigInteger.valueOf(-1); this.numberingLevel = BigInteger.valueOf(-1);
this.setDefaultIndentation();
} }
@Override @Override
public void head(Node node, int i) { public void head(Node node, int i) {
String name = node.nodeName(); String name = node.nodeName();
if (name.equals("#text")) { if (name.equals("#text")) {
String text = !isIdentationUsed ? identation + ((TextNode)node).text() : ((TextNode)node).text(); String text = ((TextNode)node).text();
this.run.setText(text); this.run.setText(text);
this.isIdentationUsed = true;
this.dumpRun = true; this.dumpRun = true;
} else { } else {
properties.put(name, true); properties.put(name, true);
@ -99,6 +98,8 @@ public class HtmlToWorldBuilder implements NodeVisitor {
case "p": case "p":
this.paragraph = this.paragraph.getDocument().createParagraph(); this.paragraph = this.paragraph.getDocument().createParagraph();
this.run = this.paragraph.createRun(); this.run = this.paragraph.createRun();
this.isIdentationUsed = false;
this.setDefaultIndentation();
if (stringBooleanEntry.getValue()) { if (stringBooleanEntry.getValue()) {
if (node.hasAttr("align")) { if (node.hasAttr("align")) {
String alignment = node.attr("align"); String alignment = node.attr("align");
@ -111,6 +112,9 @@ public class HtmlToWorldBuilder implements NodeVisitor {
this.run = this.paragraph.createRun(); this.run = this.paragraph.createRun();
if (stringBooleanEntry.getValue()) { if (stringBooleanEntry.getValue()) {
this.paragraph.setIndentationLeft(720); this.paragraph.setIndentationLeft(720);
} else {
this.isIdentationUsed = false;
this.setDefaultIndentation();
} }
break; break;
case "ul": case "ul":
@ -119,6 +123,8 @@ public class HtmlToWorldBuilder implements NodeVisitor {
} else { } else {
this.paragraph = this.paragraph.getDocument().createParagraph(); this.paragraph = this.paragraph.getDocument().createParagraph();
this.run = this.paragraph.createRun(); this.run = this.paragraph.createRun();
this.isIdentationUsed = false;
this.setDefaultIndentation();
this.numberingLevel = this.numberingLevel.subtract(BigInteger.ONE); this.numberingLevel = this.numberingLevel.subtract(BigInteger.ONE);
((ArrayDeque)this.abstractNumId).removeLast(); ((ArrayDeque)this.abstractNumId).removeLast();
} }
@ -129,6 +135,8 @@ public class HtmlToWorldBuilder implements NodeVisitor {
} else { } else {
this.paragraph = this.paragraph.getDocument().createParagraph(); this.paragraph = this.paragraph.getDocument().createParagraph();
this.run = this.paragraph.createRun(); this.run = this.paragraph.createRun();
this.isIdentationUsed = false;
this.setDefaultIndentation();
this.numberingLevel = this.numberingLevel.subtract(BigInteger.ONE); this.numberingLevel = this.numberingLevel.subtract(BigInteger.ONE);
((ArrayDeque)this.abstractNumId).removeLast(); ((ArrayDeque)this.abstractNumId).removeLast();
} }
@ -136,7 +144,7 @@ public class HtmlToWorldBuilder implements NodeVisitor {
case "li": case "li":
if (stringBooleanEntry.getValue()) { if (stringBooleanEntry.getValue()) {
this.paragraph = this.paragraph.getDocument().createParagraph(); this.paragraph = this.paragraph.getDocument().createParagraph();
this.paragraph.setIndentationLeft(720 * numberingLevel.intValue()); this.paragraph.setIndentationLeft(Math.round(indentation * 720) * (numberingLevel.intValue() + 1));
this.run = this.paragraph.createRun(); this.run = this.paragraph.createRun();
this.paragraph.setNumID(((ArrayDeque<BigInteger>)abstractNumId).getLast()); this.paragraph.setNumID(((ArrayDeque<BigInteger>)abstractNumId).getLast());
} }
@ -218,4 +226,15 @@ public class HtmlToWorldBuilder implements NodeVisitor {
paragraph paragraph
); );
} }
private void setDefaultIndentation() {
if (!isIdentationUsed) {
this.paragraph.setIndentationLeft(Math.round(indentation * 720.0F));
this.isIdentationUsed = true;
}
}
public XWPFParagraph getParagraph() {
return paragraph;
}
} }

View File

@ -51,22 +51,10 @@ public class WordBuilder {
return paragraph; return paragraph;
}); });
this.options.put(ParagraphStyle.HTML, (mainDocumentPart, item) -> { this.options.put(ParagraphStyle.HTML, (mainDocumentPart, item) -> {
/*try { Document htmlDoc = Jsoup.parse(item.replaceAll("\n", "<br>"));
XWPFHtmlDocument xwpfHtmlDocument = XWPFHtmlDocument.addHtmlDocument(mainDocumentPart); HtmlToWorldBuilder htmlToWorldBuilder = HtmlToWorldBuilder.convert(mainDocumentPart, htmlDoc, 0.5F);
if (item != null) { htmlToWorldBuilder.getParagraph().setIndentationLeft(0);
xwpfHtmlDocument.setHtml(item); return htmlToWorldBuilder.getParagraph();
}
mainDocumentPart.getDocument().getBody().addNewAltChunk().setId(xwpfHtmlDocument.getId());
} catch (InvalidFormatException e) {
logger.error(e.getLocalizedMessage(), e);
}
return null;*/
XWPFParagraph paragraph = null;
Document htmlDoc = Jsoup.parse(item);
HtmlToWorldBuilder.convert(mainDocumentPart, htmlDoc, " ");
return paragraph;
}); });
this.options.put(ParagraphStyle.TITLE, (mainDocumentPart, item) -> { this.options.put(ParagraphStyle.TITLE, (mainDocumentPart, item) -> {
XWPFParagraph paragraph = mainDocumentPart.createParagraph(); XWPFParagraph paragraph = mainDocumentPart.createParagraph();