add note harvesting range, respect IETFdraft rateLimit #140
|
@ -49,4 +49,9 @@ public class Constants {
|
|||
public static final String CONTENT_INVALIDRECORDS = "InvalidRecords";
|
||||
public static final String CONTENT_TRANSFORMEDRECORDS = "transformedItems";
|
||||
|
||||
// IETF Draft and used by Repositories like ZENODO , not included in APACHE HTTP java packages
|
||||
// see https://ietf-wg-httpapi.github.io/ratelimit-headers/draft-ietf-httpapi-ratelimit-headers.html
|
||||
public static final String HTTPHEADER_IETF_DRAFT_RATELIMIT_LIMIT = "X-RateLimit-Limit";
|
||||
public static final String HTTPHEADER_IETF_DRAFT_RATELIMIT_REMAINING = "X-RateLimit-Remaining";
|
||||
public static final String HTTPHEADER_IETF_DRAFT_RATELIMIT_RESET = "X-RateLimit-Reset";
|
||||
}
|
||||
|
|
|
@ -20,7 +20,7 @@ import eu.dnetlib.dhp.aggregation.common.AggregatorReport;
|
|||
/**
|
||||
* Migrated from https://svn.driver.research-infrastructures.eu/driver/dnet45/modules/dnet-modular-collector-service/trunk/src/main/java/eu/dnetlib/data/collector/plugins/HttpConnector.java
|
||||
*
|
||||
* @author jochen, michele, andrea, alessia, claudio
|
||||
* @author jochen, michele, andrea, alessia, claudio, andreas
|
||||
*/
|
||||
public class HttpConnector2 {
|
||||
|
||||
|
@ -112,6 +112,17 @@ public class HttpConnector2 {
|
|||
}
|
||||
|
||||
int retryAfter = obtainRetryAfter(urlConn.getHeaderFields());
|
||||
String rateLimit = urlConn.getHeaderField(eu.dnetlib.dhp.common.Constants.HTTPHEADER_IETF_DRAFT_RATELIMIT_LIMIT);
|
||||
String rateRemaining = urlConn.getHeaderField(eu.dnetlib.dhp.common.Constants.HTTPHEADER_IETF_DRAFT_RATELIMIT_REMAINING);
|
||||
|
||||
if((rateLimit != null) && (rateRemaining != null) && ( Integer.parseInt(rateRemaining) < 2)) {
|
||||
if (retryAfter > 0) {
|
||||
backoffAndSleep(retryAfter);
|
||||
} else {
|
||||
backoffAndSleep(1000);
|
||||
}
|
||||
}
|
||||
|
||||
if (is2xx(urlConn.getResponseCode())) {
|
||||
input = urlConn.getInputStream();
|
||||
responseType = urlConn.getContentType();
|
||||
|
|
|
@ -62,12 +62,14 @@ public class OaiCollectorPlugin implements CollectorPlugin {
|
|||
throw new CollectorException("Param 'mdFormat' is null or empty");
|
||||
}
|
||||
|
||||
if (fromDate != null && !fromDate.matches("\\d{4}-\\d{2}-\\d{2}") && !fromDate.matches("\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}Z")) {
|
||||
throw new CollectorException("Invalid date (YYYY-MM-DD): " + fromDate);
|
||||
if (fromDate != null && !fromDate.matches("\\d{4}-\\d{2}-\\d{2}")
|
||||
&& !fromDate.matches("\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}Z")) {
|
||||
throw new CollectorException("Invalid date (YYYY-MM-DD or YYYY-MM-DDT00:00:00Z): " + fromDate);
|
||||
}
|
||||
|
||||
if (untilDate != null && !untilDate.matches("\\d{4}-\\d{2}-\\d{2}") && !untilDate.matches("\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}Z")) {
|
||||
throw new CollectorException("Invalid date (YYYY-MM-DD): " + untilDate);
|
||||
if (untilDate != null && !untilDate.matches("\\d{4}-\\d{2}-\\d{2}")
|
||||
&& !untilDate.matches("\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}Z")) {
|
||||
throw new CollectorException("Invalid date (YYYY-MM-DD or YYYY-MM-DDT00:00:00Z): " + untilDate);
|
||||
}
|
||||
|
||||
final Iterator<Iterator<String>> iters = sets
|
||||
|
|
|
@ -107,10 +107,12 @@ public class OaiIterator implements Iterator<String> {
|
|||
if (set != null && !set.isEmpty()) {
|
||||
url += "&set=" + URLEncoder.encode(set, "UTF-8");
|
||||
}
|
||||
if (fromDate != null && (fromDate.matches("\\d{4}-\\d{2}-\\d{2}") || fromDate.matches("\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}Z"))) {
|
||||
if (fromDate != null && (fromDate.matches("\\d{4}-\\d{2}-\\d{2}")
|
||||
|| fromDate.matches("\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}Z"))) {
|
||||
url += "&from=" + URLEncoder.encode(fromDate, "UTF-8");
|
||||
}
|
||||
if (untilDate != null && (untilDate.matches("\\d{4}-\\d{2}-\\d{2}") || untilDate.matches("\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}Z"))) {
|
||||
if (untilDate != null && (untilDate.matches("\\d{4}-\\d{2}-\\d{2}")
|
||||
|| untilDate.matches("\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}Z"))) {
|
||||
url += "&until=" + URLEncoder.encode(untilDate, "UTF-8");
|
||||
}
|
||||
log.info("Start harvesting using url: " + url);
|
||||
|
|
Loading…
Reference in New Issue