diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/researchfi/ResearchFiIterator.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/researchfi/ResearchFiIterator.java index 269a89f711..cad4999628 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/researchfi/ResearchFiIterator.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/researchfi/ResearchFiIterator.java @@ -6,7 +6,7 @@ import java.util.Queue; import java.util.concurrent.PriorityBlockingQueue; import org.apache.commons.io.IOUtils; -import org.apache.commons.lang3.math.NumberUtils; +import org.apache.commons.lang3.StringUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.http.Header; @@ -27,25 +27,25 @@ public class ResearchFiIterator implements Iterator { private final String baseUrl; private final String authToken; - private int currPage; - private int nPages; + private String nextUrl; + private int nCalls = 0; private final Queue queue = new PriorityBlockingQueue<>(); public ResearchFiIterator(final String baseUrl, final String authToken) { this.baseUrl = baseUrl; this.authToken = authToken; - this.currPage = 0; - this.nPages = 0; + this.nextUrl = null; } private void verifyStarted() { - if (this.currPage == 0) { - try { - nextCall(); - } catch (final CollectorException e) { - throw new IllegalStateException(e); + + try { + if (this.nCalls == 0) { + this.nextUrl = invokeUrl(this.baseUrl); } + } catch (final CollectorException e) { + throw new IllegalStateException(e); } } @@ -62,9 +62,9 @@ public class ResearchFiIterator implements Iterator { synchronized (this.queue) { verifyStarted(); final String res = this.queue.poll(); - while (this.queue.isEmpty() && (this.currPage < this.nPages)) { + while (this.queue.isEmpty() && StringUtils.isNotBlank(this.nextUrl)) { try { - nextCall(); + this.nextUrl = invokeUrl(this.nextUrl); } catch (final CollectorException e) { throw new IllegalStateException(e); } @@ -73,18 +73,11 @@ public class ResearchFiIterator implements Iterator { } } - private void nextCall() throws CollectorException { + private String invokeUrl(final String url) throws CollectorException { - this.currPage += 1; + this.nCalls += 1; + String next = null; - final String url; - if (!this.baseUrl.contains("?")) { - url = String.format("%s?PageNumber=%d&PageSize=%d", this.baseUrl, this.currPage, PAGE_SIZE); - } else if (!this.baseUrl.contains("PageSize=")) { - url = String.format("%s&PageNumber=%d&PageSize=%d", this.baseUrl, this.currPage, PAGE_SIZE); - } else { - url = String.format("%s&PageNumber=%d", this.baseUrl, this.currPage); - } log.info("Calling url: " + url); try (final CloseableHttpClient client = HttpClients.createDefault()) { @@ -94,11 +87,15 @@ public class ResearchFiIterator implements Iterator { try (final CloseableHttpResponse response = client.execute(req)) { for (final Header header : response.getAllHeaders()) { log.debug("HEADER: " + header.getName() + " = " + header.getValue()); - if ("x-page-count".equals(header.getName())) { - final int totalPages = NumberUtils.toInt(header.getValue()); - if (this.nPages != totalPages) { - this.nPages = NumberUtils.toInt(header.getValue()); - log.info("Total pages: " + totalPages); + if ("link".equals(header.getName())) { + final String s = StringUtils.substringBetween(header.getValue(), "<", ">"); + final String token = StringUtils + .substringBefore(StringUtils.substringAfter(s, "NextPageToken="), "&"); + + if (this.baseUrl.contains("?")) { + next = this.baseUrl + "&NextPageToken=" + token; + } else { + next = this.baseUrl + "?NextPageToken=" + token; } } } @@ -108,6 +105,9 @@ public class ResearchFiIterator implements Iterator { jsonArray.forEach(obj -> this.queue.add(JsonUtils.convertToXML(obj.toString()))); } + + return next; + } catch (final Throwable e) { log.warn("Error calling url: " + url, e); throw new CollectorException("Error calling url: " + url, e);