partial implementation of osfPreprints plugin
This commit is contained in:
parent
bfd05cdab2
commit
dcf09811a2
|
@ -26,6 +26,7 @@ import eu.dnetlib.dhp.collection.plugin.gtr2.Gtr2PublicationsCollectorPlugin;
|
|||
import eu.dnetlib.dhp.collection.plugin.mongodb.MDStoreCollectorPlugin;
|
||||
import eu.dnetlib.dhp.collection.plugin.mongodb.MongoDbDumpCollectorPlugin;
|
||||
import eu.dnetlib.dhp.collection.plugin.oai.OaiCollectorPlugin;
|
||||
import eu.dnetlib.dhp.collection.plugin.osf.OsfPreprintsCollectorPlugin;
|
||||
import eu.dnetlib.dhp.collection.plugin.rest.RestCollectorPlugin;
|
||||
import eu.dnetlib.dhp.common.aggregation.AggregatorReport;
|
||||
import eu.dnetlib.dhp.common.collection.CollectorException;
|
||||
|
@ -45,11 +46,11 @@ public class CollectorWorker extends ReportingJob {
|
|||
private final HttpClientParams clientParams;
|
||||
|
||||
public CollectorWorker(
|
||||
final ApiDescriptor api,
|
||||
final FileSystem fileSystem,
|
||||
final MDStoreVersion mdStoreVersion,
|
||||
final HttpClientParams clientParams,
|
||||
final AggregatorReport report) {
|
||||
final ApiDescriptor api,
|
||||
final FileSystem fileSystem,
|
||||
final MDStoreVersion mdStoreVersion,
|
||||
final HttpClientParams clientParams,
|
||||
final AggregatorReport report) {
|
||||
super(report);
|
||||
this.api = api;
|
||||
this.fileSystem = fileSystem;
|
||||
|
@ -68,25 +69,22 @@ public class CollectorWorker extends ReportingJob {
|
|||
scheduleReport(counter);
|
||||
|
||||
try (SequenceFile.Writer writer = SequenceFile
|
||||
.createWriter(
|
||||
this.fileSystem.getConf(), SequenceFile.Writer.file(new Path(outputPath)), SequenceFile.Writer
|
||||
.keyClass(IntWritable.class),
|
||||
SequenceFile.Writer
|
||||
.valueClass(Text.class),
|
||||
SequenceFile.Writer.compression(SequenceFile.CompressionType.BLOCK, new DeflateCodec()))) {
|
||||
.createWriter(this.fileSystem.getConf(), SequenceFile.Writer.file(new Path(outputPath)), SequenceFile.Writer
|
||||
.keyClass(IntWritable.class), SequenceFile.Writer
|
||||
.valueClass(Text.class), SequenceFile.Writer.compression(SequenceFile.CompressionType.BLOCK, new DeflateCodec()))) {
|
||||
final IntWritable key = new IntWritable(counter.get());
|
||||
final Text value = new Text();
|
||||
plugin
|
||||
.collect(this.api, this.report)
|
||||
.forEach(content -> {
|
||||
key.set(counter.getAndIncrement());
|
||||
value.set(content);
|
||||
try {
|
||||
writer.append(key, value);
|
||||
} catch (final Throwable e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
});
|
||||
.collect(this.api, this.report)
|
||||
.forEach(content -> {
|
||||
key.set(counter.getAndIncrement());
|
||||
value.set(content);
|
||||
try {
|
||||
writer.append(key, value);
|
||||
} catch (final Throwable e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
});
|
||||
} catch (final Throwable e) {
|
||||
this.report.put(e.getClass().getName(), e.getMessage());
|
||||
throw new CollectorException(e);
|
||||
|
@ -114,34 +112,36 @@ public class CollectorWorker extends ReportingJob {
|
|||
private CollectorPlugin getCollectorPlugin() throws UnknownCollectorPluginException {
|
||||
|
||||
switch (CollectorPlugin.NAME.valueOf(this.api.getProtocol())) {
|
||||
case oai:
|
||||
return new OaiCollectorPlugin(this.clientParams);
|
||||
case rest_json2xml:
|
||||
return new RestCollectorPlugin(this.clientParams);
|
||||
case file:
|
||||
return new FileCollectorPlugin(this.fileSystem);
|
||||
case fileGzip:
|
||||
return new FileGZipCollectorPlugin(this.fileSystem);
|
||||
case baseDump:
|
||||
return new BaseCollectorPlugin(this.fileSystem);
|
||||
case gtr2Publications:
|
||||
return new Gtr2PublicationsCollectorPlugin(this.clientParams);
|
||||
case other:
|
||||
final CollectorPlugin.NAME.OTHER_NAME plugin = Optional
|
||||
case oai:
|
||||
return new OaiCollectorPlugin(this.clientParams);
|
||||
case rest_json2xml:
|
||||
return new RestCollectorPlugin(this.clientParams);
|
||||
case file:
|
||||
return new FileCollectorPlugin(this.fileSystem);
|
||||
case fileGzip:
|
||||
return new FileGZipCollectorPlugin(this.fileSystem);
|
||||
case baseDump:
|
||||
return new BaseCollectorPlugin(this.fileSystem);
|
||||
case gtr2Publications:
|
||||
return new Gtr2PublicationsCollectorPlugin(this.clientParams);
|
||||
case osfPreprints:
|
||||
return new OsfPreprintsCollectorPlugin(this.clientParams);
|
||||
case other:
|
||||
final CollectorPlugin.NAME.OTHER_NAME plugin = Optional
|
||||
.ofNullable(this.api.getParams().get("other_plugin_type"))
|
||||
.map(CollectorPlugin.NAME.OTHER_NAME::valueOf)
|
||||
.orElseThrow(() -> new IllegalArgumentException("invalid other_plugin_type"));
|
||||
|
||||
switch (plugin) {
|
||||
case mdstore_mongodb_dump:
|
||||
return new MongoDbDumpCollectorPlugin(this.fileSystem);
|
||||
case mdstore_mongodb:
|
||||
return new MDStoreCollectorPlugin();
|
||||
default:
|
||||
throw new UnknownCollectorPluginException("plugin is not managed: " + plugin);
|
||||
}
|
||||
switch (plugin) {
|
||||
case mdstore_mongodb_dump:
|
||||
return new MongoDbDumpCollectorPlugin(this.fileSystem);
|
||||
case mdstore_mongodb:
|
||||
return new MDStoreCollectorPlugin();
|
||||
default:
|
||||
throw new UnknownCollectorPluginException("protocol is not managed: " + this.api.getProtocol());
|
||||
throw new UnknownCollectorPluginException("plugin is not managed: " + plugin);
|
||||
}
|
||||
default:
|
||||
throw new UnknownCollectorPluginException("protocol is not managed: " + this.api.getProtocol());
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -11,7 +11,7 @@ public interface CollectorPlugin {
|
|||
|
||||
enum NAME {
|
||||
|
||||
oai, other, rest_json2xml, file, fileGzip, baseDump, gtr2Publications;
|
||||
oai, other, rest_json2xml, file, fileGzip, baseDump, gtr2Publications, osfPreprints;
|
||||
|
||||
public enum OTHER_NAME {
|
||||
mdstore_mongodb_dump, mdstore_mongodb
|
||||
|
|
|
@ -29,9 +29,12 @@ public class Gtr2PublicationsCollectorPlugin implements CollectorPlugin {
|
|||
final String endPage = api.getParams().get("endPage");
|
||||
final String fromDate = api.getParams().get("fromDate");
|
||||
|
||||
if ((fromDate != null) && !fromDate.matches("\\d{4}-\\d{2}-\\d{2}")) { throw new CollectorException("Invalid date (YYYY-MM-DD): " + fromDate); }
|
||||
if ((fromDate != null) && !fromDate.matches("\\d{4}-\\d{2}-\\d{2}")) {
|
||||
throw new CollectorException("Invalid date (YYYY-MM-DD): " + fromDate);
|
||||
}
|
||||
|
||||
final Iterator<String> iterator = new Gtr2PublicationsIterator(baseUrl, fromDate, startPage, endPage, this.clientParams);
|
||||
final Iterator<String> iterator = new Gtr2PublicationsIterator(baseUrl, fromDate, startPage, endPage,
|
||||
this.clientParams);
|
||||
final Spliterator<String> spliterator = Spliterators.spliteratorUnknownSize(iterator, Spliterator.ORDERED);
|
||||
|
||||
return StreamSupport.stream(spliterator, false);
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
|
||||
package eu.dnetlib.dhp.collection.plugin.gtr2;
|
||||
|
||||
import java.util.ArrayList;
|
||||
|
@ -48,9 +49,10 @@ public class Gtr2PublicationsIterator implements Iterator<String> {
|
|||
|
||||
private String nextElement;
|
||||
|
||||
public Gtr2PublicationsIterator(final String baseUrl, final String fromDate, final String startPage, final String endPage,
|
||||
final HttpClientParams clientParams)
|
||||
throws CollectorException {
|
||||
public Gtr2PublicationsIterator(final String baseUrl, final String fromDate, final String startPage,
|
||||
final String endPage,
|
||||
final HttpClientParams clientParams)
|
||||
throws CollectorException {
|
||||
|
||||
this.baseUrl = baseUrl;
|
||||
this.currPage = NumberUtils.toInt(startPage, 1);
|
||||
|
@ -120,7 +122,8 @@ public class Gtr2PublicationsIterator implements Iterator<String> {
|
|||
return res;
|
||||
}
|
||||
|
||||
private void addLinkedEntities(final Element master, final String relType, final Element newRoot, final Function<Document, Element> mapper) {
|
||||
private void addLinkedEntities(final Element master, final String relType, final Element newRoot,
|
||||
final Function<Document, Element> mapper) {
|
||||
|
||||
for (final Object o : master.selectNodes(".//*[local-name()='link']")) {
|
||||
final String rel = ((Element) o).valueOf("@*[local-name()='rel']");
|
||||
|
@ -149,7 +152,7 @@ public class Gtr2PublicationsIterator implements Iterator<String> {
|
|||
|
||||
private boolean filterIncremental(final Element e) {
|
||||
if (!this.incremental || isAfter(e.valueOf("@*[local-name() = 'created']"), this.fromDate)
|
||||
|| isAfter(e.valueOf("@*[local-name() = 'updated']"), this.fromDate)) {
|
||||
|| isAfter(e.valueOf("@*[local-name() = 'updated']"), this.fromDate)) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
|
@ -165,7 +168,9 @@ public class Gtr2PublicationsIterator implements Iterator<String> {
|
|||
private Element asProjectElement(final Document doc) {
|
||||
final Element newOrg = DocumentHelper.createElement("project");
|
||||
newOrg.addElement("id").setText(doc.valueOf("/*/@*[local-name()='id']"));
|
||||
newOrg.addElement("code").setText(doc.valueOf("//*[local-name()='identifier' and @*[local-name()='type'] = 'RCUK']"));
|
||||
newOrg
|
||||
.addElement("code")
|
||||
.setText(doc.valueOf("//*[local-name()='identifier' and @*[local-name()='type'] = 'RCUK']"));
|
||||
newOrg.addElement("title").setText(doc.valueOf("//*[local-name()='title']"));
|
||||
return newOrg;
|
||||
}
|
||||
|
@ -188,7 +193,9 @@ public class Gtr2PublicationsIterator implements Iterator<String> {
|
|||
return DocumentHelper.parseText(new String(bytes));
|
||||
} catch (final Throwable e) {
|
||||
log.error("Error dowloading url: " + cleanUrl + ", attempt = " + attempt, e);
|
||||
if (attempt >= MAX_ATTEMPTS) { throw new RuntimeException("Error dowloading url: " + cleanUrl, e); }
|
||||
if (attempt >= MAX_ATTEMPTS) {
|
||||
throw new RuntimeException("Error dowloading url: " + cleanUrl, e);
|
||||
}
|
||||
try {
|
||||
Thread.sleep(60000); // I wait for a minute
|
||||
} catch (final InterruptedException e1) {
|
||||
|
|
|
@ -0,0 +1,54 @@
|
|||
|
||||
package eu.dnetlib.dhp.collection.plugin.osf;
|
||||
|
||||
import java.util.Optional;
|
||||
import java.util.Spliterator;
|
||||
import java.util.Spliterators;
|
||||
import java.util.stream.Stream;
|
||||
import java.util.stream.StreamSupport;
|
||||
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.apache.commons.lang3.math.NumberUtils;
|
||||
|
||||
import com.google.gson.Gson;
|
||||
|
||||
import eu.dnetlib.dhp.collection.ApiDescriptor;
|
||||
import eu.dnetlib.dhp.collection.plugin.CollectorPlugin;
|
||||
import eu.dnetlib.dhp.common.aggregation.AggregatorReport;
|
||||
import eu.dnetlib.dhp.common.collection.CollectorException;
|
||||
import eu.dnetlib.dhp.common.collection.HttpClientParams;
|
||||
|
||||
public class OsfPreprintsCollectorPlugin implements CollectorPlugin {
|
||||
|
||||
public static final int PAGE_SIZE_VALUE_DEFAULT = 100;
|
||||
|
||||
private final HttpClientParams clientParams;
|
||||
|
||||
public OsfPreprintsCollectorPlugin(final HttpClientParams clientParams) {
|
||||
this.clientParams = clientParams;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Stream<String> collect(final ApiDescriptor api, final AggregatorReport report) throws CollectorException {
|
||||
final String baseUrl = api.getBaseUrl();
|
||||
|
||||
final Gson gson = new Gson();
|
||||
|
||||
final int pageSize = Optional
|
||||
.ofNullable(api.getParams().get("pageSize"))
|
||||
.filter(StringUtils::isNotBlank)
|
||||
.map(s -> NumberUtils.toInt(s, PAGE_SIZE_VALUE_DEFAULT))
|
||||
.orElse(PAGE_SIZE_VALUE_DEFAULT);
|
||||
|
||||
if (StringUtils.isBlank(baseUrl)) { throw new CollectorException("Param 'baseUrl' is null or empty"); }
|
||||
|
||||
final OsfPreprintsIterator it = new OsfPreprintsIterator(baseUrl, pageSize, getClientParams());
|
||||
|
||||
return StreamSupport
|
||||
.stream(Spliterators.spliteratorUnknownSize(it, Spliterator.ORDERED), false);
|
||||
}
|
||||
|
||||
public HttpClientParams getClientParams() {
|
||||
return this.clientParams;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,232 @@
|
|||
|
||||
package eu.dnetlib.dhp.collection.plugin.osf;
|
||||
|
||||
import java.io.InputStream;
|
||||
import java.io.StringWriter;
|
||||
import java.net.HttpURLConnection;
|
||||
import java.net.URL;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.Iterator;
|
||||
import java.util.Queue;
|
||||
import java.util.concurrent.PriorityBlockingQueue;
|
||||
|
||||
import javax.xml.transform.OutputKeys;
|
||||
import javax.xml.transform.Transformer;
|
||||
import javax.xml.transform.TransformerFactory;
|
||||
import javax.xml.transform.dom.DOMSource;
|
||||
import javax.xml.transform.stream.StreamResult;
|
||||
import javax.xml.xpath.XPath;
|
||||
import javax.xml.xpath.XPathConstants;
|
||||
import javax.xml.xpath.XPathExpression;
|
||||
import javax.xml.xpath.XPathFactory;
|
||||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.w3c.dom.Node;
|
||||
import org.w3c.dom.NodeList;
|
||||
import org.xml.sax.InputSource;
|
||||
|
||||
import eu.dnetlib.dhp.collection.plugin.utils.JsonUtils;
|
||||
import eu.dnetlib.dhp.common.collection.CollectorException;
|
||||
import eu.dnetlib.dhp.common.collection.HttpClientParams;
|
||||
|
||||
public class OsfPreprintsIterator implements Iterator<String> {
|
||||
|
||||
private static final Logger log = LoggerFactory.getLogger(OsfPreprintsIterator.class);
|
||||
public static final String UTF_8 = "UTF-8";
|
||||
private static final int MAX_ATTEMPTS = 5;
|
||||
|
||||
private final HttpClientParams clientParams;
|
||||
|
||||
private static final String XML_HEADER = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>";
|
||||
private static final String EMPTY_XML = XML_HEADER + "<" + JsonUtils.XML_WRAP_TAG + "></" + JsonUtils.XML_WRAP_TAG
|
||||
+ ">";
|
||||
|
||||
private final String baseUrl;
|
||||
private final int pageSize;
|
||||
|
||||
private int resumptionInt = 0; // integer resumption token (first record to harvest)
|
||||
private int resultTotal = -1;
|
||||
private String resumptionStr = Integer.toString(this.resumptionInt); // string resumption token (first record to
|
||||
// harvest
|
||||
// or token scanned from results)
|
||||
private InputStream resultStream;
|
||||
private Transformer transformer;
|
||||
private XPath xpath;
|
||||
private String query;
|
||||
private XPathExpression xprResultTotalPath;
|
||||
private XPathExpression xprResumptionPath;
|
||||
private XPathExpression xprEntity;
|
||||
private final Queue<String> recordQueue = new PriorityBlockingQueue<>();
|
||||
|
||||
public OsfPreprintsIterator(
|
||||
final String baseUrl,
|
||||
final int pageSize,
|
||||
final HttpClientParams clientParams) {
|
||||
|
||||
this.clientParams = clientParams;
|
||||
this.baseUrl = baseUrl;
|
||||
this.pageSize = pageSize;
|
||||
|
||||
try {
|
||||
final TransformerFactory factory = TransformerFactory.newInstance();
|
||||
this.transformer = factory.newTransformer();
|
||||
this.transformer.setOutputProperty(OutputKeys.INDENT, "yes");
|
||||
this.transformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "3");
|
||||
this.xpath = XPathFactory.newInstance().newXPath();
|
||||
this.xprResultTotalPath = this.xpath.compile("/*/*[local-name()='links']/*[local-name()='meta']/*[local-name()='total']");
|
||||
this.xprResumptionPath = this.xpath.compile("substring-before(substring-after(/*/*[local-name()='links']/*[local-name()='next'], 'page='), '&')");
|
||||
this.xprEntity = this.xpath.compile("/*/*[local-name()='data']");
|
||||
} catch (final Exception e) {
|
||||
throw new IllegalStateException("xml transformation init failed: " + e.getMessage());
|
||||
}
|
||||
|
||||
initQueue();
|
||||
}
|
||||
|
||||
private void initQueue() {
|
||||
this.query = this.baseUrl + "?filter:is_published:d=true&format=json&page[size]=" + this.pageSize;
|
||||
log.info("REST calls starting with {}", this.query);
|
||||
}
|
||||
|
||||
private void disconnect() {
|
||||
// TODO close inputstream
|
||||
}
|
||||
|
||||
/*
|
||||
* (non-Javadoc)
|
||||
*
|
||||
* @see java.util.Iterator#hasNext()
|
||||
*/
|
||||
@Override
|
||||
public boolean hasNext() {
|
||||
synchronized (this.recordQueue) {
|
||||
while (this.recordQueue.isEmpty() && !this.query.isEmpty()) {
|
||||
try {
|
||||
this.query = downloadPage(this.query, 0);
|
||||
} catch (final CollectorException e) {
|
||||
log.debug("CollectorPlugin.next()-Exception: {}", e);
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
if (!this.recordQueue.isEmpty()) { return true; }
|
||||
|
||||
disconnect();
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* (non-Javadoc)
|
||||
*
|
||||
* @see java.util.Iterator#next()
|
||||
*/
|
||||
@Override
|
||||
public String next() {
|
||||
synchronized (this.recordQueue) {
|
||||
return this.recordQueue.poll();
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* download page and return nextQuery (with number of attempt)
|
||||
*/
|
||||
private String downloadPage(final String query, final int attempt) throws CollectorException {
|
||||
|
||||
if (attempt > MAX_ATTEMPTS) { throw new CollectorException("Max Number of attempts reached, query:" + query); }
|
||||
|
||||
if (attempt > 0) {
|
||||
final int delay = (attempt * 5000);
|
||||
log.debug("Attempt {} with delay {}", attempt, delay);
|
||||
try {
|
||||
Thread.sleep(delay);
|
||||
} catch (final InterruptedException e) {
|
||||
new CollectorException(e);
|
||||
}
|
||||
}
|
||||
|
||||
try {
|
||||
String resultJson;
|
||||
String resultXml = XML_HEADER;
|
||||
String nextQuery = "";
|
||||
Node resultNode = null;
|
||||
NodeList nodeList = null;
|
||||
|
||||
try {
|
||||
log.info("requesting URL [{}]", query);
|
||||
|
||||
final URL qUrl = new URL(query);
|
||||
|
||||
final HttpURLConnection conn = (HttpURLConnection) qUrl.openConnection();
|
||||
conn.setRequestMethod("GET");
|
||||
this.resultStream = conn.getInputStream();
|
||||
|
||||
resultJson = IOUtils.toString(this.resultStream, StandardCharsets.UTF_8);
|
||||
resultXml = JsonUtils.convertToXML(resultJson);
|
||||
this.resultStream = IOUtils.toInputStream(resultXml, UTF_8);
|
||||
|
||||
if (!isEmptyXml(resultXml)) {
|
||||
resultNode = (Node) this.xpath
|
||||
.evaluate("/", new InputSource(this.resultStream), XPathConstants.NODE);
|
||||
nodeList = (NodeList) this.xprEntity.evaluate(resultNode, XPathConstants.NODESET);
|
||||
log.debug("nodeList.length: {}", nodeList.getLength());
|
||||
for (int i = 0; i < nodeList.getLength(); i++) {
|
||||
final StringWriter sw = new StringWriter();
|
||||
this.transformer.transform(new DOMSource(nodeList.item(i)), new StreamResult(sw));
|
||||
final String toEnqueue = sw.toString();
|
||||
if ((toEnqueue == null) || StringUtils.isBlank(toEnqueue) || isEmptyXml(toEnqueue)) {
|
||||
log
|
||||
.warn("The following record resulted in empty item for the feeding queue: {}", resultXml);
|
||||
} else {
|
||||
this.recordQueue.add(sw.toString());
|
||||
}
|
||||
}
|
||||
} else {
|
||||
log.warn("resultXml is equal with emptyXml");
|
||||
}
|
||||
|
||||
this.resumptionInt += this.pageSize;
|
||||
|
||||
this.resumptionStr = this.xprResumptionPath.evaluate(resultNode);
|
||||
|
||||
} catch (final Exception e) {
|
||||
log.error(e.getMessage(), e);
|
||||
throw new IllegalStateException("collection failed: " + e.getMessage());
|
||||
}
|
||||
|
||||
try {
|
||||
if (this.resultTotal == -1) {
|
||||
this.resultTotal = Integer.parseInt(this.xprResultTotalPath.evaluate(resultNode));
|
||||
log.info("resultTotal was -1 is now: " + this.resultTotal);
|
||||
}
|
||||
} catch (final Exception e) {
|
||||
log.error(e.getMessage(), e);
|
||||
throw new IllegalStateException("downloadPage resultTotal couldn't parse: " + e.getMessage());
|
||||
}
|
||||
log.debug("resultTotal: " + this.resultTotal);
|
||||
log.debug("resInt: " + this.resumptionInt);
|
||||
if (this.resumptionInt <= this.resultTotal) {
|
||||
nextQuery = this.baseUrl + "?filter:is_published:d=true&format=json&page[size]=" + this.pageSize + "&page="
|
||||
+ this.resumptionStr;
|
||||
} else {
|
||||
nextQuery = "";
|
||||
// if (resumptionType.toLowerCase().equals("deep-cursor")) { resumptionInt -= 1; } // correct the
|
||||
// resumptionInt and prevent a NullPointer Exception at mdStore
|
||||
}
|
||||
log.debug("nextQueryUrl: " + nextQuery);
|
||||
return nextQuery;
|
||||
} catch (final Throwable e) {
|
||||
log.warn(e.getMessage(), e);
|
||||
return downloadPage(query, attempt + 1);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private boolean isEmptyXml(final String s) {
|
||||
return EMPTY_XML.equalsIgnoreCase(s);
|
||||
}
|
||||
|
||||
}
|
|
@ -1,3 +1,4 @@
|
|||
|
||||
package eu.dnetlib.dhp.collection.plugin.gtr2;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
|
@ -55,7 +56,8 @@ class Gtr2PublicationsIteratorTest {
|
|||
@Disabled
|
||||
public void testIncrementalHarvestingNoRecords() throws Exception {
|
||||
System.out.println("incremental Harvesting");
|
||||
final Iterator<String> iterator = new Gtr2PublicationsIterator(baseURL, "2050-12-12T", "11", "13", clientParams);
|
||||
final Iterator<String> iterator = new Gtr2PublicationsIterator(baseURL, "2050-12-12T", "11", "13",
|
||||
clientParams);
|
||||
final int count = iterateAndCount(iterator);
|
||||
assertEquals(0, count);
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue