added requestDelay to HttpConnector2 configuration; Aggregation workflow constants moved in dhp-common

This commit is contained in:
Claudio Atzori 2021-02-08 12:19:38 +01:00
parent 40df0f987d
commit 50add4c61b
18 changed files with 106 additions and 49 deletions

View File

@ -1,5 +1,5 @@
package eu.dnetlib.dhp.collector.worker.model; package eu.dnetlib.dhp.collection;
import java.util.HashMap; import java.util.HashMap;
import java.util.Map; import java.util.Map;

View File

@ -27,4 +27,24 @@ public class Constants {
coarCodeLabelMap.put("c_f1cf", "EMBARGO"); coarCodeLabelMap.put("c_f1cf", "EMBARGO");
} }
public static final String SEQUENCE_FILE_NAME = "/sequence_file";
public static final String REPORT_FILE_NAME = "/report";
public static final String MDSTORE_DATA_PATH = "/store";
public static final String MDSTORE_SIZE_PATH = "/size";
public static final String COLLECTION_MODE = "collectionMode";
public static final String METADATA_ENCODING = "metadataEncoding";
public static final String OOZIE_WF_PATH = "oozieWfPath";
public static final String DNET_MESSAGE_MGR_URL = "dnetMessageManagerURL";
public static final String MAX_NUMBER_OF_RETRY = "maxNumberOfRetry";
public static final String REQUEST_DELAY = "requestDelay";
public static final String RETRY_DELAY = "retryDelay";
public static final String CONNECT_TIMEOUT = "connectTimeOut";
public static final String READ_TIMEOUT = "readTimeOut";
public static final String CONTENT_TOTALITEMS = "TotalItems";
public static final String CONTENT_INVALIDRECORDS = "InvalidRecords";
public static final String CONTENT_TRANSFORMEDRECORDS = "transformedItems";
} }

View File

@ -1,15 +0,0 @@
package eu.dnetlib.dhp.aggregation.common;
public class AggregationConstants {
public static final String SEQUENCE_FILE_NAME = "/sequence_file";
public static final String REPORT_FILE_NAME = "/report";
public static final String MDSTORE_DATA_PATH = "/store";
public static final String MDSTORE_SIZE_PATH = "/size";
public static final String CONTENT_TOTALITEMS = "TotalItems";
public static final String CONTENT_INVALIDRECORDS = "InvalidRecords";
public static final String CONTENT_TRANSFORMEDRECORDS = "transformedItems";
}

View File

@ -1,8 +1,7 @@
package eu.dnetlib.dhp.aggregation.mdstore; package eu.dnetlib.dhp.aggregation.mdstore;
import static eu.dnetlib.dhp.aggregation.common.AggregationConstants.*; import static eu.dnetlib.dhp.common.Constants.*;
import static eu.dnetlib.dhp.application.ApplicationUtils.*;
import static eu.dnetlib.dhp.utils.DHPUtils.*; import static eu.dnetlib.dhp.utils.DHPUtils.*;
import java.net.URI; import java.net.URI;

View File

@ -1,7 +1,7 @@
package eu.dnetlib.dhp.collection; package eu.dnetlib.dhp.collection;
import static eu.dnetlib.dhp.aggregation.common.AggregationConstants.*; import static eu.dnetlib.dhp.common.Constants.*;
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
import static eu.dnetlib.dhp.utils.DHPUtils.*; import static eu.dnetlib.dhp.utils.DHPUtils.*;

View File

@ -3,9 +3,9 @@ package eu.dnetlib.dhp.collection.plugin;
import java.util.stream.Stream; import java.util.stream.Stream;
import eu.dnetlib.dhp.collection.ApiDescriptor;
import eu.dnetlib.dhp.collection.worker.CollectorException; import eu.dnetlib.dhp.collection.worker.CollectorException;
import eu.dnetlib.dhp.collection.worker.CollectorPluginReport; import eu.dnetlib.dhp.collection.worker.CollectorPluginReport;
import eu.dnetlib.dhp.collector.worker.model.ApiDescriptor;
public interface CollectorPlugin { public interface CollectorPlugin {

View File

@ -13,11 +13,11 @@ import com.google.common.base.Splitter;
import com.google.common.collect.Iterators; import com.google.common.collect.Iterators;
import com.google.common.collect.Lists; import com.google.common.collect.Lists;
import eu.dnetlib.dhp.collection.ApiDescriptor;
import eu.dnetlib.dhp.collection.plugin.CollectorPlugin; import eu.dnetlib.dhp.collection.plugin.CollectorPlugin;
import eu.dnetlib.dhp.collection.worker.CollectorException; import eu.dnetlib.dhp.collection.worker.CollectorException;
import eu.dnetlib.dhp.collection.worker.CollectorPluginReport; import eu.dnetlib.dhp.collection.worker.CollectorPluginReport;
import eu.dnetlib.dhp.collection.worker.HttpClientParams; import eu.dnetlib.dhp.collection.worker.HttpClientParams;
import eu.dnetlib.dhp.collector.worker.model.ApiDescriptor;
public class OaiCollectorPlugin implements CollectorPlugin { public class OaiCollectorPlugin implements CollectorPlugin {

View File

@ -1,7 +1,7 @@
package eu.dnetlib.dhp.collection.worker; package eu.dnetlib.dhp.collection.worker;
import static eu.dnetlib.dhp.aggregation.common.AggregationConstants.SEQUENCE_FILE_NAME; import static eu.dnetlib.dhp.common.Constants.SEQUENCE_FILE_NAME;
import java.io.IOException; import java.io.IOException;
import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicInteger;
@ -15,8 +15,8 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import eu.dnetlib.data.mdstore.manager.common.model.MDStoreVersion; import eu.dnetlib.data.mdstore.manager.common.model.MDStoreVersion;
import eu.dnetlib.dhp.collection.ApiDescriptor;
import eu.dnetlib.dhp.collection.plugin.CollectorPlugin; import eu.dnetlib.dhp.collection.plugin.CollectorPlugin;
import eu.dnetlib.dhp.collector.worker.model.ApiDescriptor;
import eu.dnetlib.dhp.message.MessageSender; import eu.dnetlib.dhp.message.MessageSender;
public class CollectorWorker { public class CollectorWorker {

View File

@ -1,7 +1,7 @@
package eu.dnetlib.dhp.collection.worker; package eu.dnetlib.dhp.collection.worker;
import static eu.dnetlib.dhp.aggregation.common.AggregationConstants.*; import static eu.dnetlib.dhp.common.Constants.*;
import static eu.dnetlib.dhp.utils.DHPUtils.*; import static eu.dnetlib.dhp.utils.DHPUtils.*;
import java.io.IOException; import java.io.IOException;
@ -17,7 +17,7 @@ import org.slf4j.LoggerFactory;
import eu.dnetlib.data.mdstore.manager.common.model.MDStoreVersion; import eu.dnetlib.data.mdstore.manager.common.model.MDStoreVersion;
import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.collector.worker.model.ApiDescriptor; import eu.dnetlib.dhp.collection.ApiDescriptor;
import eu.dnetlib.dhp.message.MessageSender; import eu.dnetlib.dhp.message.MessageSender;
/** /**
@ -55,7 +55,7 @@ public class CollectorWorkerApplication {
final String mdStoreVersion = argumentParser.get("mdStoreVersion"); final String mdStoreVersion = argumentParser.get("mdStoreVersion");
log.info("mdStoreVersion is {}", mdStoreVersion); log.info("mdStoreVersion is {}", mdStoreVersion);
final String dnetMessageManagerURL = argumentParser.get("dnetMessageManagerURL"); final String dnetMessageManagerURL = argumentParser.get(DNET_MESSAGE_MGR_URL);
log.info("dnetMessageManagerURL is {}", dnetMessageManagerURL); log.info("dnetMessageManagerURL is {}", dnetMessageManagerURL);
final String workflowId = argumentParser.get("workflowId"); final String workflowId = argumentParser.get("workflowId");
@ -87,15 +87,23 @@ public class CollectorWorkerApplication {
clientParams clientParams
.setMaxNumberOfRetry( .setMaxNumberOfRetry(
Optional Optional
.ofNullable(argumentParser.get("maxNumberOfRetry")) .ofNullable(argumentParser.get(MAX_NUMBER_OF_RETRY))
.map(Integer::parseInt) .map(Integer::parseInt)
.orElse(HttpClientParams._maxNumberOfRetry)); .orElse(HttpClientParams._maxNumberOfRetry));
log.info("maxNumberOfRetry is {}", clientParams.getMaxNumberOfRetry()); log.info("maxNumberOfRetry is {}", clientParams.getMaxNumberOfRetry());
clientParams
.setRequestDelay(
Optional
.ofNullable(argumentParser.get(REQUEST_DELAY))
.map(Integer::parseInt)
.orElse(HttpClientParams._requestDelay));
log.info("requestDelay is {}", clientParams.getRequestDelay());
clientParams clientParams
.setRetryDelay( .setRetryDelay(
Optional Optional
.ofNullable(argumentParser.get("retryDelay")) .ofNullable(argumentParser.get(RETRY_DELAY))
.map(Integer::parseInt) .map(Integer::parseInt)
.orElse(HttpClientParams._retryDelay)); .orElse(HttpClientParams._retryDelay));
log.info("retryDelay is {}", clientParams.getRetryDelay()); log.info("retryDelay is {}", clientParams.getRetryDelay());
@ -103,7 +111,7 @@ public class CollectorWorkerApplication {
clientParams clientParams
.setConnectTimeOut( .setConnectTimeOut(
Optional Optional
.ofNullable(argumentParser.get("connectTimeOut")) .ofNullable(argumentParser.get(CONNECT_TIMEOUT))
.map(Integer::parseInt) .map(Integer::parseInt)
.orElse(HttpClientParams._connectTimeOut)); .orElse(HttpClientParams._connectTimeOut));
log.info("connectTimeOut is {}", clientParams.getConnectTimeOut()); log.info("connectTimeOut is {}", clientParams.getConnectTimeOut());
@ -111,7 +119,7 @@ public class CollectorWorkerApplication {
clientParams clientParams
.setReadTimeOut( .setReadTimeOut(
Optional Optional
.ofNullable(argumentParser.get("readTimeOut")) .ofNullable(argumentParser.get(READ_TIMEOUT))
.map(Integer::parseInt) .map(Integer::parseInt)
.orElse(HttpClientParams._readTimeOut)); .orElse(HttpClientParams._readTimeOut));
log.info("readTimeOut is {}", clientParams.getReadTimeOut()); log.info("readTimeOut is {}", clientParams.getReadTimeOut());

View File

@ -1,7 +1,7 @@
package eu.dnetlib.dhp.collection.worker; package eu.dnetlib.dhp.collection.worker;
import static eu.dnetlib.dhp.aggregation.common.AggregationConstants.REPORT_FILE_NAME; import static eu.dnetlib.dhp.common.Constants.REPORT_FILE_NAME;
import static eu.dnetlib.dhp.utils.DHPUtils.*; import static eu.dnetlib.dhp.utils.DHPUtils.*;
import java.io.IOException; import java.io.IOException;

View File

@ -6,22 +6,46 @@ package eu.dnetlib.dhp.collection.worker;
*/ */
public class HttpClientParams { public class HttpClientParams {
// Defaults
public static int _maxNumberOfRetry = 3; public static int _maxNumberOfRetry = 3;
public static int _requestDelay = 0; // milliseconds
public static int _retryDelay = 10; // seconds public static int _retryDelay = 10; // seconds
public static int _connectTimeOut = 10; // seconds public static int _connectTimeOut = 10; // seconds
public static int _readTimeOut = 30; // seconds public static int _readTimeOut = 30; // seconds
/**
* Maximum number of allowed retires before failing
*/
private int maxNumberOfRetry; private int maxNumberOfRetry;
/**
* Delay between request (Milliseconds)
*/
private int requestDelay;
/**
* Time to wait after a failure before retrying (Seconds)
*/
private int retryDelay; private int retryDelay;
/**
* Connect timeout (Seconds)
*/
private int connectTimeOut; private int connectTimeOut;
/**
* Read timeout (Seconds)
*/
private int readTimeOut; private int readTimeOut;
public HttpClientParams() { public HttpClientParams() {
this(_maxNumberOfRetry, _retryDelay, _connectTimeOut, _readTimeOut); this(_maxNumberOfRetry, _requestDelay, _retryDelay, _connectTimeOut, _readTimeOut);
} }
public HttpClientParams(int maxNumberOfRetry, int retryDelay, int connectTimeOut, int readTimeOut) { public HttpClientParams(int maxNumberOfRetry, int requestDelay, int retryDelay, int connectTimeOut,
int readTimeOut) {
this.maxNumberOfRetry = maxNumberOfRetry; this.maxNumberOfRetry = maxNumberOfRetry;
this.requestDelay = requestDelay;
this.retryDelay = retryDelay; this.retryDelay = retryDelay;
this.connectTimeOut = connectTimeOut; this.connectTimeOut = connectTimeOut;
this.readTimeOut = readTimeOut; this.readTimeOut = readTimeOut;
@ -35,6 +59,14 @@ public class HttpClientParams {
this.maxNumberOfRetry = maxNumberOfRetry; this.maxNumberOfRetry = maxNumberOfRetry;
} }
public int getRequestDelay() {
return requestDelay;
}
public void setRequestDelay(int requestDelay) {
this.requestDelay = requestDelay;
}
public int getRetryDelay() { public int getRetryDelay() {
return retryDelay; return retryDelay;
} }

View File

@ -18,7 +18,7 @@ import org.slf4j.LoggerFactory;
/** /**
* Migrated from https://svn.driver.research-infrastructures.eu/driver/dnet45/modules/dnet-modular-collector-service/trunk/src/main/java/eu/dnetlib/data/collector/plugins/HttpConnector.java * Migrated from https://svn.driver.research-infrastructures.eu/driver/dnet45/modules/dnet-modular-collector-service/trunk/src/main/java/eu/dnetlib/data/collector/plugins/HttpConnector.java
* *
* @author jochen, michele, andrea, alessia * @author jochen, michele, andrea, alessia, claudio
*/ */
public class HttpConnector2 { public class HttpConnector2 {
@ -83,14 +83,22 @@ public class HttpConnector2 {
final CollectorPluginReport report) throws CollectorException, IOException { final CollectorPluginReport report) throws CollectorException, IOException {
if (retryNumber > getClientParams().getMaxNumberOfRetry()) { if (retryNumber > getClientParams().getMaxNumberOfRetry()) {
throw new CollectorException("Max number of retries exceeded. Cause: \n " + report); final String msg = String
.format(
"Max number of retries (%s/%s) exceeded, failing.",
retryNumber, getClientParams().getMaxNumberOfRetry());
log.error(msg);
throw new CollectorException(msg);
} }
log.info("Downloading attempt {} [{}]", retryNumber, requestUrl); log.info("Request attempt {} [{}]", retryNumber, requestUrl);
InputStream input = null; InputStream input = null;
try { try {
if (getClientParams().getRequestDelay() > 0) {
backoffAndSleep(getClientParams().getRequestDelay());
}
final HttpURLConnection urlConn = (HttpURLConnection) new URL(requestUrl).openConnection(); final HttpURLConnection urlConn = (HttpURLConnection) new URL(requestUrl).openConnection();
urlConn.setInstanceFollowRedirects(false); urlConn.setInstanceFollowRedirects(false);
urlConn.setReadTimeout(getClientParams().getReadTimeOut() * 1000); urlConn.setReadTimeout(getClientParams().getReadTimeOut() * 1000);
@ -190,10 +198,10 @@ public class HttpConnector2 {
} }
} }
private void backoffAndSleep(int sleepTime) throws CollectorException { private void backoffAndSleep(int sleepTimeMs) throws CollectorException {
log.info("I'm going to sleep for {}ms", sleepTime); log.info("I'm going to sleep for {}ms", sleepTimeMs);
try { try {
Thread.sleep(sleepTime); Thread.sleep(sleepTimeMs);
} catch (InterruptedException e) { } catch (InterruptedException e) {
log.error(e.getMessage(), e); log.error(e.getMessage(), e);
throw new CollectorException(e); throw new CollectorException(e);

View File

@ -1,7 +1,7 @@
package eu.dnetlib.dhp.transformation; package eu.dnetlib.dhp.transformation;
import static eu.dnetlib.dhp.aggregation.common.AggregationConstants.*; import static eu.dnetlib.dhp.common.Constants.*;
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
import static eu.dnetlib.dhp.utils.DHPUtils.*; import static eu.dnetlib.dhp.utils.DHPUtils.*;

View File

@ -30,25 +30,31 @@
"paramRequired": true "paramRequired": true
}, },
{ {
"paramName": "mr", "paramName": "mnr",
"paramLongName": "maxNumberOfRetry", "paramLongName": "maxNumberOfRetry",
"paramDescription": "the maximum number of admitted connection retries", "paramDescription": "the maximum number of admitted connection retries",
"paramRequired": false "paramRequired": false
}, },
{ {
"paramName": "rd", "paramName": "rqd",
"paramLongName": "requestDelay",
"paramDescription": "the delay (ms) between requests",
"paramRequired": false
},
{
"paramName": "rtd",
"paramLongName": "retryDelay", "paramLongName": "retryDelay",
"paramDescription": "the delay (ms) between retries", "paramDescription": "the delay (ms) between retries",
"paramRequired": false "paramRequired": false
}, },
{ {
"paramName": "ct", "paramName": "cto",
"paramLongName": "connectTimeOut", "paramLongName": "connectTimeOut",
"paramDescription": "the maximum allowed time (ms) to connect to the remote host", "paramDescription": "the maximum allowed time (ms) to connect to the remote host",
"paramRequired": false "paramRequired": false
}, },
{ {
"paramName": "rt", "paramName": "rto",
"paramLongName": "readTimeOut", "paramLongName": "readTimeOut",
"paramDescription": "the maximum allowed time (ms) to receive content from the remote host", "paramDescription": "the maximum allowed time (ms) to receive content from the remote host",
"paramRequired": false "paramRequired": false

View File

@ -95,6 +95,7 @@
<arg>--dnetMessageManagerURL</arg><arg>${dnetMessageManagerURL}</arg> <arg>--dnetMessageManagerURL</arg><arg>${dnetMessageManagerURL}</arg>
<arg>--mdStoreVersion</arg><arg>${wf:actionData('StartTransaction')['mdStoreVersion']}</arg> <arg>--mdStoreVersion</arg><arg>${wf:actionData('StartTransaction')['mdStoreVersion']}</arg>
<arg>--maxNumberOfRetry</arg><arg>${maxNumberOfRetry}</arg> <arg>--maxNumberOfRetry</arg><arg>${maxNumberOfRetry}</arg>
<arg>--requestDelay</arg><arg>${requestDelay}</arg>
<arg>--retryDelay</arg><arg>${retryDelay}</arg> <arg>--retryDelay</arg><arg>${retryDelay}</arg>
<arg>--connectTimeOut</arg><arg>${connectTimeOut}</arg> <arg>--connectTimeOut</arg><arg>${connectTimeOut}</arg>
<arg>--readTimeOut</arg><arg>${readTimeOut}</arg> <arg>--readTimeOut</arg><arg>${readTimeOut}</arg>

View File

@ -1,7 +1,7 @@
package eu.dnetlib.dhp.aggregation; package eu.dnetlib.dhp.aggregation;
import static eu.dnetlib.dhp.aggregation.common.AggregationConstants.MDSTORE_DATA_PATH; import static eu.dnetlib.dhp.common.Constants.MDSTORE_DATA_PATH;
import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertEquals;
import java.io.File; import java.io.File;

View File

@ -8,9 +8,9 @@ import org.junit.jupiter.api.Test;
import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.collection.ApiDescriptor;
import eu.dnetlib.dhp.collection.worker.CollectorPluginFactory; import eu.dnetlib.dhp.collection.worker.CollectorPluginFactory;
import eu.dnetlib.dhp.collection.worker.HttpClientParams; import eu.dnetlib.dhp.collection.worker.HttpClientParams;
import eu.dnetlib.dhp.collector.worker.model.ApiDescriptor;
@Disabled @Disabled
public class CollectorWorkerApplicationTests { public class CollectorWorkerApplicationTests {

View File

@ -1,14 +1,12 @@
package eu.dnetlib.dhp.transformation; package eu.dnetlib.dhp.transformation;
import static eu.dnetlib.dhp.aggregation.common.AggregationConstants.MDSTORE_DATA_PATH; import static eu.dnetlib.dhp.common.Constants.MDSTORE_DATA_PATH;
import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.mockito.Mockito.lenient;
import java.io.IOException; import java.io.IOException;
import java.nio.file.Files; import java.nio.file.Files;
import java.nio.file.Path; import java.nio.file.Path;
import java.util.Collections;
import java.util.Map; import java.util.Map;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import java.util.stream.Stream; import java.util.stream.Stream;