From 663b1556d7adc0b9f8a94f75d99c299e216b08a1 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Wed, 15 Sep 2021 16:40:25 +0200 Subject: [PATCH] manually integrating PR#140 https://code-repo.d4science.org/D-Net/dnet-hadoop/pulls/140 --- .../main/java/eu/dnetlib/dhp/common/Constants.java | 6 ++++++ .../dhp/common/collection/HttpConnector2.java | 14 +++++++++++++- .../collection/plugin/oai/OaiCollectorPlugin.java | 4 ++-- 3 files changed, 21 insertions(+), 3 deletions(-) diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/Constants.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/Constants.java index 8fab94e92..a62a0ac79 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/Constants.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/Constants.java @@ -52,4 +52,10 @@ public class Constants { public static final String CONTENT_INVALIDRECORDS = "InvalidRecords"; public static final String CONTENT_TRANSFORMEDRECORDS = "transformedItems"; + // IETF Draft and used by Repositories like ZENODO , not included in APACHE HTTP java packages + // see https://ietf-wg-httpapi.github.io/ratelimit-headers/draft-ietf-httpapi-ratelimit-headers.html + public static final String HTTPHEADER_IETF_DRAFT_RATELIMIT_LIMIT = "X-RateLimit-Limit"; + public static final String HTTPHEADER_IETF_DRAFT_RATELIMIT_REMAINING = "X-RateLimit-Remaining"; + public static final String HTTPHEADER_IETF_DRAFT_RATELIMIT_RESET = "X-RateLimit-Reset"; + } diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/HttpConnector2.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/HttpConnector2.java index 724f5f0e1..dd46ab1f4 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/HttpConnector2.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/HttpConnector2.java @@ -15,12 +15,13 @@ import org.apache.http.HttpHeaders; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import eu.dnetlib.dhp.common.Constants; import eu.dnetlib.dhp.common.aggregation.AggregatorReport; /** * Migrated from https://svn.driver.research-infrastructures.eu/driver/dnet45/modules/dnet-modular-collector-service/trunk/src/main/java/eu/dnetlib/data/collector/plugins/HttpConnector.java * - * @author jochen, michele, andrea, alessia, claudio + * @author jochen, michele, andrea, alessia, claudio, andreas */ public class HttpConnector2 { @@ -112,6 +113,17 @@ public class HttpConnector2 { } int retryAfter = obtainRetryAfter(urlConn.getHeaderFields()); + String rateLimit = urlConn.getHeaderField(Constants.HTTPHEADER_IETF_DRAFT_RATELIMIT_LIMIT); + String rateRemaining = urlConn.getHeaderField(Constants.HTTPHEADER_IETF_DRAFT_RATELIMIT_REMAINING); + + if ((rateLimit != null) && (rateRemaining != null) && (Integer.parseInt(rateRemaining) < 2)) { + if (retryAfter > 0) { + backoffAndSleep(retryAfter); + } else { + backoffAndSleep(1000); + } + } + if (is2xx(urlConn.getResponseCode())) { input = urlConn.getInputStream(); responseType = urlConn.getContentType(); diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiCollectorPlugin.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiCollectorPlugin.java index 878e286e0..2d04b2574 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiCollectorPlugin.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiCollectorPlugin.java @@ -66,11 +66,11 @@ public class OaiCollectorPlugin implements CollectorPlugin { } if (fromDate != null && !fromDate.matches(DATE_REGEX) && !fromDate.matches(UTC_DATETIME_REGEX)) { - throw new CollectorException("Invalid date (YYYY-MM-DD): " + fromDate); + throw new CollectorException("Invalid date (YYYY-MM-DD or YYYY-MM-DDT00:00:00Z): " + fromDate); } if (untilDate != null && !untilDate.matches(DATE_REGEX) && !untilDate.matches(UTC_DATETIME_REGEX)) { - throw new CollectorException("Invalid date (YYYY-MM-DD): " + untilDate); + throw new CollectorException("Invalid date (YYYY-MM-DD or YYYY-MM-DDT00:00:00Z): " + untilDate); } final Iterator> iters = sets