fixed a bug with 'null' string

This commit is contained in:
Michele Artini 2024-09-24 15:18:54 +02:00
parent 2d7a7a962d
commit b35d046fd2
2 changed files with 20 additions and 5 deletions

View File

@ -5,6 +5,7 @@ import java.util.Iterator;
import java.util.Queue; import java.util.Queue;
import java.util.concurrent.PriorityBlockingQueue; import java.util.concurrent.PriorityBlockingQueue;
import org.apache.commons.lang3.StringUtils;
import org.dom4j.Document; import org.dom4j.Document;
import org.dom4j.DocumentHelper; import org.dom4j.DocumentHelper;
import org.dom4j.Element; import org.dom4j.Element;
@ -46,13 +47,14 @@ public class OsfPreprintsIterator implements Iterator<String> {
private void initQueue() { private void initQueue() {
this.currentUrl = this.baseUrl + "?filter:is_published:d=true&format=json&page[size]=" + this.pageSize; this.currentUrl = this.baseUrl + "?filter:is_published:d=true&format=json&page[size]=" + this.pageSize;
log.info("REST calls starting with {}", this.currentUrl); log.info("REST calls starting with {}", this.currentUrl);
} }
@Override @Override
public boolean hasNext() { public boolean hasNext() {
synchronized (this.recordQueue) { synchronized (this.recordQueue) {
while (this.recordQueue.isEmpty() && !this.currentUrl.isEmpty()) { while (this.recordQueue.isEmpty() && StringUtils.isNotBlank(this.currentUrl) && this.currentUrl.startsWith("http")) {
try { try {
this.currentUrl = downloadPage(this.currentUrl); this.currentUrl = downloadPage(this.currentUrl);
} catch (final CollectorException e) { } catch (final CollectorException e) {
@ -88,12 +90,18 @@ public class OsfPreprintsIterator implements Iterator<String> {
group.addElement("preprint").add(n); group.addElement("preprint").add(n);
for (final Object o1 : n.selectNodes(".//contributors//href")) { for (final Object o1 : n.selectNodes(".//contributors//href")) {
final Document doc1 = downloadUrl(((Node) o1).getText(), 0); final String href = ((Node) o1).getText();
group.addElement("contributors").add(doc1.getRootElement().detach()); if (StringUtils.isNotBlank(href) && href.startsWith("http")) {
final Document doc1 = downloadUrl(href, 0);
group.addElement("contributors").add(doc1.getRootElement().detach());
}
} }
for (final Object o1 : n.selectNodes(".//primary_file//href")) { for (final Object o1 : n.selectNodes(".//primary_file//href")) {
final Document doc1 = downloadUrl(((Node) o1).getText(), 0); final String href = ((Node) o1).getText();
group.addElement("primary_file").add(doc1.getRootElement().detach()); if (StringUtils.isNotBlank(href) && href.startsWith("http")) {
final Document doc1 = downloadUrl(href, 0);
group.addElement("primary_file").add(doc1.getRootElement().detach());
}
} }
this.recordQueue.add(DocumentHelper.createDocument(group).asXML()); this.recordQueue.add(DocumentHelper.createDocument(group).asXML());

View File

@ -18,6 +18,7 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import eu.dnetlib.dhp.collection.ApiDescriptor; import eu.dnetlib.dhp.collection.ApiDescriptor;
import eu.dnetlib.dhp.collection.plugin.utils.JsonUtils;
import eu.dnetlib.dhp.common.aggregation.AggregatorReport; import eu.dnetlib.dhp.common.aggregation.AggregatorReport;
import eu.dnetlib.dhp.common.collection.CollectorException; import eu.dnetlib.dhp.common.collection.CollectorException;
import eu.dnetlib.dhp.common.collection.HttpClientParams; import eu.dnetlib.dhp.common.collection.HttpClientParams;
@ -110,4 +111,10 @@ public class OsfPreprintsCollectorPluginTest {
} }
@Test
void testXML() {
final String xml = JsonUtils.convertToXML("{'next':null}");
System.out.println(xml);
}
} }