From 9e8fc6aa88d592fc3cb354bc36894b95679b5092 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 26 Jan 2024 09:17:20 +0100 Subject: [PATCH] [collection] increased logging from the oai-pmh metadata collection process --- .../dhp/common/collection/HttpConnector2.java | 63 ++++++++++++++----- 1 file changed, 47 insertions(+), 16 deletions(-) diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/HttpConnector2.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/HttpConnector2.java index 08cc3ec59..342d73cdc 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/HttpConnector2.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/HttpConnector2.java @@ -97,8 +97,6 @@ public class HttpConnector2 { throw new CollectorException(msg); } - log.info("Request attempt {} [{}]", retryNumber, requestUrl); - InputStream input = null; long start = System.currentTimeMillis(); @@ -106,6 +104,9 @@ public class HttpConnector2 { if (getClientParams().getRequestDelay() > 0) { backoffAndSleep(getClientParams().getRequestDelay()); } + + log.info("Request attempt {} [{}]", retryNumber, requestUrl); + final HttpURLConnection urlConn = (HttpURLConnection) new URL(requestUrl).openConnection(); urlConn.setInstanceFollowRedirects(false); urlConn.setReadTimeout(getClientParams().getReadTimeOut() * 1000); @@ -135,9 +136,7 @@ public class HttpConnector2 { } if (is2xx(urlConn.getResponseCode())) { - input = urlConn.getInputStream(); - responseType = urlConn.getContentType(); - return input; + return getInputStream(urlConn, start); } if (is3xx(urlConn.getResponseCode())) { // REDIRECTS @@ -147,6 +146,7 @@ public class HttpConnector2 { .put( REPORT_PREFIX + urlConn.getResponseCode(), String.format("Moved to: %s", newUrl)); + logRequestTime(start); urlConn.disconnect(); if (retryAfter > 0) { backoffAndSleep(retryAfter); @@ -162,19 +162,39 @@ public class HttpConnector2 { if (retryAfter > 0) { log .warn( - "{} - waiting and repeating request after suggested retry-after {} sec.", - requestUrl, retryAfter); + "waiting and repeating request after suggested retry-after {} sec for URL {}", + retryAfter, requestUrl); backoffAndSleep(retryAfter * 1000); } else { log .warn( - "{} - waiting and repeating request after default delay of {} sec.", - requestUrl, getClientParams().getRetryDelay()); + "waiting and repeating request after default delay of {} sec for URL {}", + getClientParams().getRetryDelay(), requestUrl); backoffAndSleep(retryNumber * getClientParams().getRetryDelay()); } report.put(REPORT_PREFIX + urlConn.getResponseCode(), requestUrl); + + logRequestTime(start); + urlConn.disconnect(); + return attemptDownload(requestUrl, retryNumber + 1, report); + case 422: // UNPROCESSABLE ENTITY + report.put(REPORT_PREFIX + urlConn.getResponseCode(), requestUrl); + log.warn("waiting and repeating request after 10 sec for URL {}", requestUrl); + backoffAndSleep(10000); + urlConn.disconnect(); + logRequestTime(start); + try { + return getInputStream(urlConn, start); + } catch (IOException e) { + log + .error( + "server returned 422 and got IOException accessing the response body from URL {}", + requestUrl); + log.error("IOException:", e); + return attemptDownload(requestUrl, retryNumber + 1, report); + } default: log.error("gor error {} from URL: {}", urlConn.getResponseCode(), urlConn.getURL()); log.error("response message: {}", urlConn.getResponseMessage()); @@ -184,6 +204,8 @@ public class HttpConnector2 { String .format( "%s Error: %s", requestUrl, urlConn.getResponseMessage())); + logRequestTime(start); + urlConn.disconnect(); throw new CollectorException(urlConn.getResponseCode() + " error " + report); } } @@ -201,16 +223,25 @@ public class HttpConnector2 { report.put(e.getClass().getName(), e.getMessage()); backoffAndSleep(getClientParams().getRetryDelay() * retryNumber * 1000); return attemptDownload(requestUrl, retryNumber + 1, report); - } finally { - log - .info( - "request time elapsed: {}sec", - TimeUnit.MILLISECONDS.toSeconds(System.currentTimeMillis() - start)); } } + private InputStream getInputStream(HttpURLConnection urlConn, long start) throws IOException { + InputStream input = urlConn.getInputStream(); + responseType = urlConn.getContentType(); + logRequestTime(start); + return input; + } + + private static void logRequestTime(long start) { + log + .info( + "request time elapsed: {}sec", + TimeUnit.MILLISECONDS.toSeconds(System.currentTimeMillis() - start)); + } + private void logHeaderFields(final HttpURLConnection urlConn) throws IOException { - log.info("StatusCode: {}", urlConn.getResponseMessage()); + log.info("Response: {} - {}", urlConn.getResponseCode(), urlConn.getResponseMessage()); for (Map.Entry> e : urlConn.getHeaderFields().entrySet()) { if (e.getKey() != null) { @@ -235,7 +266,7 @@ public class HttpConnector2 { for (String key : headerMap.keySet()) { if ((key != null) && key.equalsIgnoreCase(HttpHeaders.RETRY_AFTER) && (!headerMap.get(key).isEmpty()) && NumberUtils.isCreatable(headerMap.get(key).get(0))) { - return Integer.parseInt(headerMap.get(key).get(0)) + 10; + return Integer.parseInt(headerMap.get(key).get(0)); } } return -1;