diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/osf/OsfPreprintsIterator.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/osf/OsfPreprintsIterator.java index 9484297d0..292e1c322 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/osf/OsfPreprintsIterator.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/osf/OsfPreprintsIterator.java @@ -5,6 +5,7 @@ import java.util.Iterator; import java.util.Queue; import java.util.concurrent.PriorityBlockingQueue; +import org.apache.commons.lang3.StringUtils; import org.dom4j.Document; import org.dom4j.DocumentHelper; import org.dom4j.Element; @@ -46,13 +47,14 @@ public class OsfPreprintsIterator implements Iterator { private void initQueue() { this.currentUrl = this.baseUrl + "?filter:is_published:d=true&format=json&page[size]=" + this.pageSize; + log.info("REST calls starting with {}", this.currentUrl); } @Override public boolean hasNext() { synchronized (this.recordQueue) { - while (this.recordQueue.isEmpty() && !this.currentUrl.isEmpty()) { + while (this.recordQueue.isEmpty() && StringUtils.isNotBlank(this.currentUrl) && this.currentUrl.startsWith("http")) { try { this.currentUrl = downloadPage(this.currentUrl); } catch (final CollectorException e) { @@ -88,12 +90,18 @@ public class OsfPreprintsIterator implements Iterator { group.addElement("preprint").add(n); for (final Object o1 : n.selectNodes(".//contributors//href")) { - final Document doc1 = downloadUrl(((Node) o1).getText(), 0); - group.addElement("contributors").add(doc1.getRootElement().detach()); + final String href = ((Node) o1).getText(); + if (StringUtils.isNotBlank(href) && href.startsWith("http")) { + final Document doc1 = downloadUrl(href, 0); + group.addElement("contributors").add(doc1.getRootElement().detach()); + } } for (final Object o1 : n.selectNodes(".//primary_file//href")) { - final Document doc1 = downloadUrl(((Node) o1).getText(), 0); - group.addElement("primary_file").add(doc1.getRootElement().detach()); + final String href = ((Node) o1).getText(); + if (StringUtils.isNotBlank(href) && href.startsWith("http")) { + final Document doc1 = downloadUrl(href, 0); + group.addElement("primary_file").add(doc1.getRootElement().detach()); + } } this.recordQueue.add(DocumentHelper.createDocument(group).asXML()); diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/plugin/osf/OsfPreprintsCollectorPluginTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/plugin/osf/OsfPreprintsCollectorPluginTest.java index efba0c72e..fe2274c89 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/plugin/osf/OsfPreprintsCollectorPluginTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/plugin/osf/OsfPreprintsCollectorPluginTest.java @@ -18,6 +18,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import eu.dnetlib.dhp.collection.ApiDescriptor; +import eu.dnetlib.dhp.collection.plugin.utils.JsonUtils; import eu.dnetlib.dhp.common.aggregation.AggregatorReport; import eu.dnetlib.dhp.common.collection.CollectorException; import eu.dnetlib.dhp.common.collection.HttpClientParams; @@ -110,4 +111,10 @@ public class OsfPreprintsCollectorPluginTest { } + @Test + void testXML() { + final String xml = JsonUtils.convertToXML("{'next':null}"); + System.out.println(xml); + } + }