diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/Constants.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/Constants.java index 108edad47..f8d8e6684 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/Constants.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/Constants.java @@ -49,4 +49,9 @@ public class Constants { public static final String CONTENT_INVALIDRECORDS = "InvalidRecords"; public static final String CONTENT_TRANSFORMEDRECORDS = "transformedItems"; + // IETF Draft and used by Repositories like ZENODO , not included in APACHE HTTP java packages + // see https://ietf-wg-httpapi.github.io/ratelimit-headers/draft-ietf-httpapi-ratelimit-headers.html + public static final String HTTPHEADER_IETF_DRAFT_RATELIMIT_LIMIT = "X-RateLimit-Limit"; + public static final String HTTPHEADER_IETF_DRAFT_RATELIMIT_REMAINING = "X-RateLimit-Remaining"; + public static final String HTTPHEADER_IETF_DRAFT_RATELIMIT_RESET = "X-RateLimit-Reset"; } diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/HttpConnector2.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/HttpConnector2.java index a84b26955..b745584ee 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/HttpConnector2.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/HttpConnector2.java @@ -20,7 +20,7 @@ import eu.dnetlib.dhp.aggregation.common.AggregatorReport; /** * Migrated from https://svn.driver.research-infrastructures.eu/driver/dnet45/modules/dnet-modular-collector-service/trunk/src/main/java/eu/dnetlib/data/collector/plugins/HttpConnector.java * - * @author jochen, michele, andrea, alessia, claudio + * @author jochen, michele, andrea, alessia, claudio, andreas */ public class HttpConnector2 { @@ -112,6 +112,17 @@ public class HttpConnector2 { } int retryAfter = obtainRetryAfter(urlConn.getHeaderFields()); + String rateLimit = urlConn.getHeaderField(eu.dnetlib.dhp.common.Constants.HTTPHEADER_IETF_DRAFT_RATELIMIT_LIMIT); + String rateRemaining = urlConn.getHeaderField(eu.dnetlib.dhp.common.Constants.HTTPHEADER_IETF_DRAFT_RATELIMIT_REMAINING); + + if((rateLimit != null) && (rateRemaining != null) && ( Integer.parseInt(rateRemaining) < 2)) { + if (retryAfter > 0) { + backoffAndSleep(retryAfter); + } else { + backoffAndSleep(1000); + } + } + if (is2xx(urlConn.getResponseCode())) { input = urlConn.getInputStream(); responseType = urlConn.getContentType(); diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiCollectorPlugin.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiCollectorPlugin.java index 67fd352a3..5434bed1b 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiCollectorPlugin.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiCollectorPlugin.java @@ -62,12 +62,14 @@ public class OaiCollectorPlugin implements CollectorPlugin { throw new CollectorException("Param 'mdFormat' is null or empty"); } - if (fromDate != null && !fromDate.matches("\\d{4}-\\d{2}-\\d{2}") && !fromDate.matches("\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}Z")) { - throw new CollectorException("Invalid date (YYYY-MM-DD): " + fromDate); + if (fromDate != null && !fromDate.matches("\\d{4}-\\d{2}-\\d{2}") + && !fromDate.matches("\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}Z")) { + throw new CollectorException("Invalid date (YYYY-MM-DD or YYYY-MM-DDT00:00:00Z): " + fromDate); } - if (untilDate != null && !untilDate.matches("\\d{4}-\\d{2}-\\d{2}") && !untilDate.matches("\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}Z")) { - throw new CollectorException("Invalid date (YYYY-MM-DD): " + untilDate); + if (untilDate != null && !untilDate.matches("\\d{4}-\\d{2}-\\d{2}") + && !untilDate.matches("\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}Z")) { + throw new CollectorException("Invalid date (YYYY-MM-DD or YYYY-MM-DDT00:00:00Z): " + untilDate); } final Iterator> iters = sets diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiIterator.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiIterator.java index c044e02db..392311765 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiIterator.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiIterator.java @@ -107,10 +107,12 @@ public class OaiIterator implements Iterator { if (set != null && !set.isEmpty()) { url += "&set=" + URLEncoder.encode(set, "UTF-8"); } - if (fromDate != null && (fromDate.matches("\\d{4}-\\d{2}-\\d{2}") || fromDate.matches("\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}Z"))) { + if (fromDate != null && (fromDate.matches("\\d{4}-\\d{2}-\\d{2}") + || fromDate.matches("\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}Z"))) { url += "&from=" + URLEncoder.encode(fromDate, "UTF-8"); } - if (untilDate != null && (untilDate.matches("\\d{4}-\\d{2}-\\d{2}") || untilDate.matches("\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}Z"))) { + if (untilDate != null && (untilDate.matches("\\d{4}-\\d{2}-\\d{2}") + || untilDate.matches("\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}Z"))) { url += "&until=" + URLEncoder.encode(untilDate, "UTF-8"); } log.info("Start harvesting using url: " + url);