forked from D-Net/dnet-hadoop
support of the new apis
This commit is contained in:
parent
cc6bbbb804
commit
65902a87e3
|
@ -6,7 +6,7 @@ import java.util.Queue;
|
|||
import java.util.concurrent.PriorityBlockingQueue;
|
||||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.commons.lang3.math.NumberUtils;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.http.Header;
|
||||
|
@ -27,27 +27,27 @@ public class ResearchFiIterator implements Iterator<String> {
|
|||
|
||||
private final String baseUrl;
|
||||
private final String authToken;
|
||||
private int currPage;
|
||||
private int nPages;
|
||||
private String nextUrl;
|
||||
private int nCalls = 0;
|
||||
|
||||
private final Queue<String> queue = new PriorityBlockingQueue<>();
|
||||
|
||||
public ResearchFiIterator(final String baseUrl, final String authToken) {
|
||||
this.baseUrl = baseUrl;
|
||||
this.authToken = authToken;
|
||||
this.currPage = 0;
|
||||
this.nPages = 0;
|
||||
this.nextUrl = null;
|
||||
}
|
||||
|
||||
private void verifyStarted() {
|
||||
if (this.currPage == 0) {
|
||||
|
||||
try {
|
||||
nextCall();
|
||||
if (this.nCalls == 0) {
|
||||
this.nextUrl = invokeUrl(this.baseUrl);
|
||||
}
|
||||
} catch (final CollectorException e) {
|
||||
throw new IllegalStateException(e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasNext() {
|
||||
|
@ -62,9 +62,9 @@ public class ResearchFiIterator implements Iterator<String> {
|
|||
synchronized (this.queue) {
|
||||
verifyStarted();
|
||||
final String res = this.queue.poll();
|
||||
while (this.queue.isEmpty() && (this.currPage < this.nPages)) {
|
||||
while (this.queue.isEmpty() && StringUtils.isNotBlank(this.nextUrl)) {
|
||||
try {
|
||||
nextCall();
|
||||
this.nextUrl = invokeUrl(this.nextUrl);
|
||||
} catch (final CollectorException e) {
|
||||
throw new IllegalStateException(e);
|
||||
}
|
||||
|
@ -73,18 +73,11 @@ public class ResearchFiIterator implements Iterator<String> {
|
|||
}
|
||||
}
|
||||
|
||||
private void nextCall() throws CollectorException {
|
||||
private String invokeUrl(final String url) throws CollectorException {
|
||||
|
||||
this.currPage += 1;
|
||||
this.nCalls += 1;
|
||||
String next = null;
|
||||
|
||||
final String url;
|
||||
if (!this.baseUrl.contains("?")) {
|
||||
url = String.format("%s?PageNumber=%d&PageSize=%d", this.baseUrl, this.currPage, PAGE_SIZE);
|
||||
} else if (!this.baseUrl.contains("PageSize=")) {
|
||||
url = String.format("%s&PageNumber=%d&PageSize=%d", this.baseUrl, this.currPage, PAGE_SIZE);
|
||||
} else {
|
||||
url = String.format("%s&PageNumber=%d", this.baseUrl, this.currPage);
|
||||
}
|
||||
log.info("Calling url: " + url);
|
||||
|
||||
try (final CloseableHttpClient client = HttpClients.createDefault()) {
|
||||
|
@ -94,11 +87,15 @@ public class ResearchFiIterator implements Iterator<String> {
|
|||
try (final CloseableHttpResponse response = client.execute(req)) {
|
||||
for (final Header header : response.getAllHeaders()) {
|
||||
log.debug("HEADER: " + header.getName() + " = " + header.getValue());
|
||||
if ("x-page-count".equals(header.getName())) {
|
||||
final int totalPages = NumberUtils.toInt(header.getValue());
|
||||
if (this.nPages != totalPages) {
|
||||
this.nPages = NumberUtils.toInt(header.getValue());
|
||||
log.info("Total pages: " + totalPages);
|
||||
if ("link".equals(header.getName())) {
|
||||
final String s = StringUtils.substringBetween(header.getValue(), "<", ">");
|
||||
final String token = StringUtils
|
||||
.substringBefore(StringUtils.substringAfter(s, "NextPageToken="), "&");
|
||||
|
||||
if (this.baseUrl.contains("?")) {
|
||||
next = this.baseUrl + "&NextPageToken=" + token;
|
||||
} else {
|
||||
next = this.baseUrl + "?NextPageToken=" + token;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -108,6 +105,9 @@ public class ResearchFiIterator implements Iterator<String> {
|
|||
|
||||
jsonArray.forEach(obj -> this.queue.add(JsonUtils.convertToXML(obj.toString())));
|
||||
}
|
||||
|
||||
return next;
|
||||
|
||||
} catch (final Throwable e) {
|
||||
log.warn("Error calling url: " + url, e);
|
||||
throw new CollectorException("Error calling url: " + url, e);
|
||||
|
|
Loading…
Reference in New Issue