added requestDelay to HttpConnector2 configuration; Aggregation workflow constants moved in dhp-common

This commit is contained in:
Claudio Atzori 2021-02-08 12:19:38 +01:00
parent 40df0f987d
commit 50add4c61b
18 changed files with 106 additions and 49 deletions

View File

@ -1,5 +1,5 @@
package eu.dnetlib.dhp.collector.worker.model;
package eu.dnetlib.dhp.collection;
import java.util.HashMap;
import java.util.Map;

View File

@ -27,4 +27,24 @@ public class Constants {
coarCodeLabelMap.put("c_f1cf", "EMBARGO");
}
public static final String SEQUENCE_FILE_NAME = "/sequence_file";
public static final String REPORT_FILE_NAME = "/report";
public static final String MDSTORE_DATA_PATH = "/store";
public static final String MDSTORE_SIZE_PATH = "/size";
public static final String COLLECTION_MODE = "collectionMode";
public static final String METADATA_ENCODING = "metadataEncoding";
public static final String OOZIE_WF_PATH = "oozieWfPath";
public static final String DNET_MESSAGE_MGR_URL = "dnetMessageManagerURL";
public static final String MAX_NUMBER_OF_RETRY = "maxNumberOfRetry";
public static final String REQUEST_DELAY = "requestDelay";
public static final String RETRY_DELAY = "retryDelay";
public static final String CONNECT_TIMEOUT = "connectTimeOut";
public static final String READ_TIMEOUT = "readTimeOut";
public static final String CONTENT_TOTALITEMS = "TotalItems";
public static final String CONTENT_INVALIDRECORDS = "InvalidRecords";
public static final String CONTENT_TRANSFORMEDRECORDS = "transformedItems";
}

View File

@ -1,15 +0,0 @@
package eu.dnetlib.dhp.aggregation.common;
public class AggregationConstants {
public static final String SEQUENCE_FILE_NAME = "/sequence_file";
public static final String REPORT_FILE_NAME = "/report";
public static final String MDSTORE_DATA_PATH = "/store";
public static final String MDSTORE_SIZE_PATH = "/size";
public static final String CONTENT_TOTALITEMS = "TotalItems";
public static final String CONTENT_INVALIDRECORDS = "InvalidRecords";
public static final String CONTENT_TRANSFORMEDRECORDS = "transformedItems";
}

View File

@ -1,8 +1,7 @@
package eu.dnetlib.dhp.aggregation.mdstore;
import static eu.dnetlib.dhp.aggregation.common.AggregationConstants.*;
import static eu.dnetlib.dhp.application.ApplicationUtils.*;
import static eu.dnetlib.dhp.common.Constants.*;
import static eu.dnetlib.dhp.utils.DHPUtils.*;
import java.net.URI;

View File

@ -1,7 +1,7 @@
package eu.dnetlib.dhp.collection;
import static eu.dnetlib.dhp.aggregation.common.AggregationConstants.*;
import static eu.dnetlib.dhp.common.Constants.*;
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
import static eu.dnetlib.dhp.utils.DHPUtils.*;

View File

@ -3,9 +3,9 @@ package eu.dnetlib.dhp.collection.plugin;
import java.util.stream.Stream;
import eu.dnetlib.dhp.collection.ApiDescriptor;
import eu.dnetlib.dhp.collection.worker.CollectorException;
import eu.dnetlib.dhp.collection.worker.CollectorPluginReport;
import eu.dnetlib.dhp.collector.worker.model.ApiDescriptor;
public interface CollectorPlugin {

View File

@ -13,11 +13,11 @@ import com.google.common.base.Splitter;
import com.google.common.collect.Iterators;
import com.google.common.collect.Lists;
import eu.dnetlib.dhp.collection.ApiDescriptor;
import eu.dnetlib.dhp.collection.plugin.CollectorPlugin;
import eu.dnetlib.dhp.collection.worker.CollectorException;
import eu.dnetlib.dhp.collection.worker.CollectorPluginReport;
import eu.dnetlib.dhp.collection.worker.HttpClientParams;
import eu.dnetlib.dhp.collector.worker.model.ApiDescriptor;
public class OaiCollectorPlugin implements CollectorPlugin {

View File

@ -1,7 +1,7 @@
package eu.dnetlib.dhp.collection.worker;
import static eu.dnetlib.dhp.aggregation.common.AggregationConstants.SEQUENCE_FILE_NAME;
import static eu.dnetlib.dhp.common.Constants.SEQUENCE_FILE_NAME;
import java.io.IOException;
import java.util.concurrent.atomic.AtomicInteger;
@ -15,8 +15,8 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import eu.dnetlib.data.mdstore.manager.common.model.MDStoreVersion;
import eu.dnetlib.dhp.collection.ApiDescriptor;
import eu.dnetlib.dhp.collection.plugin.CollectorPlugin;
import eu.dnetlib.dhp.collector.worker.model.ApiDescriptor;
import eu.dnetlib.dhp.message.MessageSender;
public class CollectorWorker {

View File

@ -1,7 +1,7 @@
package eu.dnetlib.dhp.collection.worker;
import static eu.dnetlib.dhp.aggregation.common.AggregationConstants.*;
import static eu.dnetlib.dhp.common.Constants.*;
import static eu.dnetlib.dhp.utils.DHPUtils.*;
import java.io.IOException;
@ -17,7 +17,7 @@ import org.slf4j.LoggerFactory;
import eu.dnetlib.data.mdstore.manager.common.model.MDStoreVersion;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.collector.worker.model.ApiDescriptor;
import eu.dnetlib.dhp.collection.ApiDescriptor;
import eu.dnetlib.dhp.message.MessageSender;
/**
@ -55,7 +55,7 @@ public class CollectorWorkerApplication {
final String mdStoreVersion = argumentParser.get("mdStoreVersion");
log.info("mdStoreVersion is {}", mdStoreVersion);
final String dnetMessageManagerURL = argumentParser.get("dnetMessageManagerURL");
final String dnetMessageManagerURL = argumentParser.get(DNET_MESSAGE_MGR_URL);
log.info("dnetMessageManagerURL is {}", dnetMessageManagerURL);
final String workflowId = argumentParser.get("workflowId");
@ -87,15 +87,23 @@ public class CollectorWorkerApplication {
clientParams
.setMaxNumberOfRetry(
Optional
.ofNullable(argumentParser.get("maxNumberOfRetry"))
.ofNullable(argumentParser.get(MAX_NUMBER_OF_RETRY))
.map(Integer::parseInt)
.orElse(HttpClientParams._maxNumberOfRetry));
log.info("maxNumberOfRetry is {}", clientParams.getMaxNumberOfRetry());
clientParams
.setRequestDelay(
Optional
.ofNullable(argumentParser.get(REQUEST_DELAY))
.map(Integer::parseInt)
.orElse(HttpClientParams._requestDelay));
log.info("requestDelay is {}", clientParams.getRequestDelay());
clientParams
.setRetryDelay(
Optional
.ofNullable(argumentParser.get("retryDelay"))
.ofNullable(argumentParser.get(RETRY_DELAY))
.map(Integer::parseInt)
.orElse(HttpClientParams._retryDelay));
log.info("retryDelay is {}", clientParams.getRetryDelay());
@ -103,7 +111,7 @@ public class CollectorWorkerApplication {
clientParams
.setConnectTimeOut(
Optional
.ofNullable(argumentParser.get("connectTimeOut"))
.ofNullable(argumentParser.get(CONNECT_TIMEOUT))
.map(Integer::parseInt)
.orElse(HttpClientParams._connectTimeOut));
log.info("connectTimeOut is {}", clientParams.getConnectTimeOut());
@ -111,7 +119,7 @@ public class CollectorWorkerApplication {
clientParams
.setReadTimeOut(
Optional
.ofNullable(argumentParser.get("readTimeOut"))
.ofNullable(argumentParser.get(READ_TIMEOUT))
.map(Integer::parseInt)
.orElse(HttpClientParams._readTimeOut));
log.info("readTimeOut is {}", clientParams.getReadTimeOut());

View File

@ -1,7 +1,7 @@
package eu.dnetlib.dhp.collection.worker;
import static eu.dnetlib.dhp.aggregation.common.AggregationConstants.REPORT_FILE_NAME;
import static eu.dnetlib.dhp.common.Constants.REPORT_FILE_NAME;
import static eu.dnetlib.dhp.utils.DHPUtils.*;
import java.io.IOException;

View File

@ -6,22 +6,46 @@ package eu.dnetlib.dhp.collection.worker;
*/
public class HttpClientParams {
// Defaults
public static int _maxNumberOfRetry = 3;
public static int _requestDelay = 0; // milliseconds
public static int _retryDelay = 10; // seconds
public static int _connectTimeOut = 10; // seconds
public static int _readTimeOut = 30; // seconds
/**
* Maximum number of allowed retires before failing
*/
private int maxNumberOfRetry;
/**
* Delay between request (Milliseconds)
*/
private int requestDelay;
/**
* Time to wait after a failure before retrying (Seconds)
*/
private int retryDelay;
/**
* Connect timeout (Seconds)
*/
private int connectTimeOut;
/**
* Read timeout (Seconds)
*/
private int readTimeOut;
public HttpClientParams() {
this(_maxNumberOfRetry, _retryDelay, _connectTimeOut, _readTimeOut);
this(_maxNumberOfRetry, _requestDelay, _retryDelay, _connectTimeOut, _readTimeOut);
}
public HttpClientParams(int maxNumberOfRetry, int retryDelay, int connectTimeOut, int readTimeOut) {
public HttpClientParams(int maxNumberOfRetry, int requestDelay, int retryDelay, int connectTimeOut,
int readTimeOut) {
this.maxNumberOfRetry = maxNumberOfRetry;
this.requestDelay = requestDelay;
this.retryDelay = retryDelay;
this.connectTimeOut = connectTimeOut;
this.readTimeOut = readTimeOut;
@ -35,6 +59,14 @@ public class HttpClientParams {
this.maxNumberOfRetry = maxNumberOfRetry;
}
public int getRequestDelay() {
return requestDelay;
}
public void setRequestDelay(int requestDelay) {
this.requestDelay = requestDelay;
}
public int getRetryDelay() {
return retryDelay;
}

View File

@ -18,7 +18,7 @@ import org.slf4j.LoggerFactory;
/**
* Migrated from https://svn.driver.research-infrastructures.eu/driver/dnet45/modules/dnet-modular-collector-service/trunk/src/main/java/eu/dnetlib/data/collector/plugins/HttpConnector.java
*
* @author jochen, michele, andrea, alessia
* @author jochen, michele, andrea, alessia, claudio
*/
public class HttpConnector2 {
@ -83,14 +83,22 @@ public class HttpConnector2 {
final CollectorPluginReport report) throws CollectorException, IOException {
if (retryNumber > getClientParams().getMaxNumberOfRetry()) {
throw new CollectorException("Max number of retries exceeded. Cause: \n " + report);
final String msg = String
.format(
"Max number of retries (%s/%s) exceeded, failing.",
retryNumber, getClientParams().getMaxNumberOfRetry());
log.error(msg);
throw new CollectorException(msg);
}
log.info("Downloading attempt {} [{}]", retryNumber, requestUrl);
log.info("Request attempt {} [{}]", retryNumber, requestUrl);
InputStream input = null;
try {
if (getClientParams().getRequestDelay() > 0) {
backoffAndSleep(getClientParams().getRequestDelay());
}
final HttpURLConnection urlConn = (HttpURLConnection) new URL(requestUrl).openConnection();
urlConn.setInstanceFollowRedirects(false);
urlConn.setReadTimeout(getClientParams().getReadTimeOut() * 1000);
@ -190,10 +198,10 @@ public class HttpConnector2 {
}
}
private void backoffAndSleep(int sleepTime) throws CollectorException {
log.info("I'm going to sleep for {}ms", sleepTime);
private void backoffAndSleep(int sleepTimeMs) throws CollectorException {
log.info("I'm going to sleep for {}ms", sleepTimeMs);
try {
Thread.sleep(sleepTime);
Thread.sleep(sleepTimeMs);
} catch (InterruptedException e) {
log.error(e.getMessage(), e);
throw new CollectorException(e);

View File

@ -1,7 +1,7 @@
package eu.dnetlib.dhp.transformation;
import static eu.dnetlib.dhp.aggregation.common.AggregationConstants.*;
import static eu.dnetlib.dhp.common.Constants.*;
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
import static eu.dnetlib.dhp.utils.DHPUtils.*;

View File

@ -30,25 +30,31 @@
"paramRequired": true
},
{
"paramName": "mr",
"paramName": "mnr",
"paramLongName": "maxNumberOfRetry",
"paramDescription": "the maximum number of admitted connection retries",
"paramRequired": false
},
{
"paramName": "rd",
"paramName": "rqd",
"paramLongName": "requestDelay",
"paramDescription": "the delay (ms) between requests",
"paramRequired": false
},
{
"paramName": "rtd",
"paramLongName": "retryDelay",
"paramDescription": "the delay (ms) between retries",
"paramRequired": false
},
{
"paramName": "ct",
"paramName": "cto",
"paramLongName": "connectTimeOut",
"paramDescription": "the maximum allowed time (ms) to connect to the remote host",
"paramRequired": false
},
{
"paramName": "rt",
"paramName": "rto",
"paramLongName": "readTimeOut",
"paramDescription": "the maximum allowed time (ms) to receive content from the remote host",
"paramRequired": false

View File

@ -95,6 +95,7 @@
<arg>--dnetMessageManagerURL</arg><arg>${dnetMessageManagerURL}</arg>
<arg>--mdStoreVersion</arg><arg>${wf:actionData('StartTransaction')['mdStoreVersion']}</arg>
<arg>--maxNumberOfRetry</arg><arg>${maxNumberOfRetry}</arg>
<arg>--requestDelay</arg><arg>${requestDelay}</arg>
<arg>--retryDelay</arg><arg>${retryDelay}</arg>
<arg>--connectTimeOut</arg><arg>${connectTimeOut}</arg>
<arg>--readTimeOut</arg><arg>${readTimeOut}</arg>

View File

@ -1,7 +1,7 @@
package eu.dnetlib.dhp.aggregation;
import static eu.dnetlib.dhp.aggregation.common.AggregationConstants.MDSTORE_DATA_PATH;
import static eu.dnetlib.dhp.common.Constants.MDSTORE_DATA_PATH;
import static org.junit.jupiter.api.Assertions.assertEquals;
import java.io.File;

View File

@ -8,9 +8,9 @@ import org.junit.jupiter.api.Test;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.collection.ApiDescriptor;
import eu.dnetlib.dhp.collection.worker.CollectorPluginFactory;
import eu.dnetlib.dhp.collection.worker.HttpClientParams;
import eu.dnetlib.dhp.collector.worker.model.ApiDescriptor;
@Disabled
public class CollectorWorkerApplicationTests {

View File

@ -1,14 +1,12 @@
package eu.dnetlib.dhp.transformation;
import static eu.dnetlib.dhp.aggregation.common.AggregationConstants.MDSTORE_DATA_PATH;
import static eu.dnetlib.dhp.common.Constants.MDSTORE_DATA_PATH;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.mockito.Mockito.lenient;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.Collections;
import java.util.Map;
import java.util.stream.Collectors;
import java.util.stream.Stream;