diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/base/BaseCollectorIterator.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/base/BaseCollectorIterator.java index 1ed1059f7..21321dc27 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/base/BaseCollectorIterator.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/base/BaseCollectorIterator.java @@ -23,7 +23,7 @@ import org.slf4j.LoggerFactory; import eu.dnetlib.dhp.common.aggregation.AggregatorReport; -public class BaseCollectorIterator implements Iterator { +public class BaseCollectorIterator implements Iterator { private Object nextElement; @@ -51,13 +51,13 @@ public class BaseCollectorIterator implements Iterator { @Override public synchronized boolean hasNext() { - return (this.nextElement != null) && (this.nextElement instanceof Element); + return (this.nextElement != null) && (this.nextElement instanceof Document); } @Override - public synchronized Element next() { + public synchronized Document next() { try { - return this.nextElement instanceof Element ? (Element) this.nextElement : null; + return this.nextElement instanceof Document ? (Document) this.nextElement : null; } finally { try { this.nextElement = this.queue.take(); @@ -113,7 +113,9 @@ public class BaseCollectorIterator implements Iterator { for (final Object o : doc.selectNodes("//*[local-name()='ListRecords']/*[local-name()='record']")) { if (o instanceof Element) { - this.queue.put(((Element) o).detach()); + final Element newRoot = (Element) ((Element) o).detach(); + final Document newDoc = DocumentHelper.createDocument(newRoot); + this.queue.put(newDoc); count++; } } diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/base/BaseCollectorPlugin.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/base/BaseCollectorPlugin.java index d80144932..7a3dccb5f 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/base/BaseCollectorPlugin.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/base/BaseCollectorPlugin.java @@ -11,7 +11,7 @@ import java.util.stream.StreamSupport; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; -import org.dom4j.Element; +import org.dom4j.Document; import org.dom4j.io.OutputFormat; import org.dom4j.io.XMLWriter; import org.slf4j.Logger; @@ -51,14 +51,14 @@ public class BaseCollectorPlugin implements CollectorPlugin { throw new CollectorException(e); } - final Iterator iterator = new BaseCollectorIterator(this.fs, filePath, report); - final Spliterator spliterator = Spliterators.spliteratorUnknownSize(iterator, Spliterator.ORDERED); + final Iterator iterator = new BaseCollectorIterator(this.fs, filePath, report); + final Spliterator spliterator = Spliterators.spliteratorUnknownSize(iterator, Spliterator.ORDERED); return StreamSupport.stream(spliterator, false) - .filter(elem -> filterXml(elem, report)) - .map(elem -> xmlToString(report, elem)); + .filter(doc -> filterXml(doc, report)) + .map(doc -> xmlToString(doc, report)); } - private boolean filterXml(final Element elem, final AggregatorReport report) { + private boolean filterXml(final Document doc, final AggregatorReport report) { // TODO Auto-generated method stub // HERE THE FILTERS ACCORDING TO THE DOCUMENTATION @@ -66,10 +66,10 @@ public class BaseCollectorPlugin implements CollectorPlugin { return true; } - private String xmlToString(final AggregatorReport report, final Element elem) { + private String xmlToString(final Document doc, final AggregatorReport report) { try (final StringWriter sw = new StringWriter()) { final XMLWriter writer = new XMLWriter(sw, OutputFormat.createPrettyPrint()); - writer.write(elem); + writer.write(doc); return writer.toString(); } catch (final IOException e) { report.put(e.getClass().getName(), e.getMessage()); diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/plugin/base/BaseCollectorIteratorTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/plugin/base/BaseCollectorIteratorTest.java index 2e4cfdd0b..195cebcf4 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/plugin/base/BaseCollectorIteratorTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/plugin/base/BaseCollectorIteratorTest.java @@ -4,14 +4,18 @@ import static org.junit.jupiter.api.Assertions.assertEquals; import java.util.HashMap; import java.util.Map; +import java.util.Map.Entry; import org.apache.commons.lang3.StringUtils; import org.dom4j.Attribute; +import org.dom4j.Document; import org.dom4j.Element; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.extension.ExtendWith; import org.mockito.junit.jupiter.MockitoExtension; +import com.fasterxml.jackson.databind.ObjectMapper; + import eu.dnetlib.dhp.common.aggregation.AggregatorReport; @ExtendWith(MockitoExtension.class) @@ -27,7 +31,7 @@ public class BaseCollectorIteratorTest { final Map> collections = new HashMap<>(); while (iterator.hasNext()) { - final Element record = iterator.next(); + final Document record = iterator.next(); count++; @@ -35,7 +39,10 @@ public class BaseCollectorIteratorTest { System.out.println("# Read records: " + count); } + // System.out.println(record.asXML()); + for (final Object o : record.selectNodes("//*[local-name() = 'collection']")) { + final Element n = (Element) o; final String collName = n.getText().trim(); if (StringUtils.isNotBlank(collName) && !collections.containsKey(collName)) { @@ -46,14 +53,16 @@ public class BaseCollectorIteratorTest { } collections.put(collName, collAttrs); + } } } - collections.forEach((k, v) -> { - System.out.println(k); - v.forEach((ak, av) -> System.out.println(" - " + ak + "=" + av)); - }); + final ObjectMapper mapper = new ObjectMapper(); + for (final Entry> e : collections.entrySet()) { + System.out.println(e.getKey() + ": " + mapper.writeValueAsString(e.getValue())); + + } assertEquals(30000, count); }