diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/base/BaseCollectorIterator.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/base/BaseCollectorIterator.java index 579ef6127..80881acba 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/base/BaseCollectorIterator.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/base/BaseCollectorIterator.java @@ -27,7 +27,7 @@ public class BaseCollectorIterator implements Iterator { private Object nextElement; - private final BlockingQueue queue = new LinkedBlockingQueue<>(); + private final BlockingQueue queue = new LinkedBlockingQueue<>(20); private static final Logger log = LoggerFactory.getLogger(BaseCollectorIterator.class); @@ -78,8 +78,6 @@ public class BaseCollectorIterator implements Iterator { log.error("Error processing BASE records", e); report.put(e.getClass().getName(), e.getMessage()); throw new RuntimeException("Error processing BASE records", e); - } finally { - this.queue.add("__END__"); // I ADD A NOT ELEMENT OBJECT TO INDICATE THE END OF THE QUEUE } } @@ -91,8 +89,6 @@ public class BaseCollectorIterator implements Iterator { log.error("Error processing BASE records", e); report.put(e.getClass().getName(), e.getMessage()); throw new RuntimeException("Error processing BASE records", e); - } finally { - this.queue.add("__END__"); // I ADD A NOT ELEMENT OBJECT TO INDICATE THE END OF THE QUEUE } } @@ -119,13 +115,16 @@ public class BaseCollectorIterator implements Iterator { for (final Object o : doc.selectNodes("//*[local-name()='ListRecords']/*[local-name()='record']")) { if (o instanceof Element) { - this.queue.add(o); + this.queue.put(((Element) o).detach()); count++; } } } } } + + this.queue.put("__END__"); // I ADD A NOT ELEMENT OBJECT TO INDICATE THE END OF THE QUEUE + log.info("Total records (written in queue): " + count); } diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/plugin/base/BaseCollectorIteratorTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/plugin/base/BaseCollectorIteratorTest.java index dfefa082f..2e4cfdd0b 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/plugin/base/BaseCollectorIteratorTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/plugin/base/BaseCollectorIteratorTest.java @@ -2,6 +2,11 @@ package eu.dnetlib.dhp.collection.plugin.base; import static org.junit.jupiter.api.Assertions.assertEquals; +import java.util.HashMap; +import java.util.Map; + +import org.apache.commons.lang3.StringUtils; +import org.dom4j.Attribute; import org.dom4j.Element; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.extension.ExtendWith; @@ -19,11 +24,37 @@ public class BaseCollectorIteratorTest { final BaseCollectorIterator iterator = new BaseCollectorIterator("base-sample.tar", new AggregatorReport()); + final Map> collections = new HashMap<>(); + while (iterator.hasNext()) { final Element record = iterator.next(); - // System.out.println(record.asXML()); + count++; + + if ((count % 1000) == 0) { + System.out.println("# Read records: " + count); + } + + for (final Object o : record.selectNodes("//*[local-name() = 'collection']")) { + final Element n = (Element) o; + final String collName = n.getText().trim(); + if (StringUtils.isNotBlank(collName) && !collections.containsKey(collName)) { + final Map collAttrs = new HashMap<>(); + + for (final Object ao : n.attributes()) { + collAttrs.put(((Attribute) ao).getName(), ((Attribute) ao).getValue()); + } + + collections.put(collName, collAttrs); + } + } } + + collections.forEach((k, v) -> { + System.out.println(k); + v.forEach((ak, av) -> System.out.println(" - " + ak + "=" + av)); + }); + assertEquals(30000, count); }