forked from D-Net/dnet-hadoop
added requestDelay to HttpConnector2 configuration; Aggregation workflow constants moved in dhp-common
This commit is contained in:
parent
40df0f987d
commit
50add4c61b
|
@ -1,5 +1,5 @@
|
|||
|
||||
package eu.dnetlib.dhp.collector.worker.model;
|
||||
package eu.dnetlib.dhp.collection;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
|
@ -27,4 +27,24 @@ public class Constants {
|
|||
coarCodeLabelMap.put("c_f1cf", "EMBARGO");
|
||||
}
|
||||
|
||||
public static final String SEQUENCE_FILE_NAME = "/sequence_file";
|
||||
public static final String REPORT_FILE_NAME = "/report";
|
||||
public static final String MDSTORE_DATA_PATH = "/store";
|
||||
public static final String MDSTORE_SIZE_PATH = "/size";
|
||||
|
||||
public static final String COLLECTION_MODE = "collectionMode";
|
||||
public static final String METADATA_ENCODING = "metadataEncoding";
|
||||
public static final String OOZIE_WF_PATH = "oozieWfPath";
|
||||
public static final String DNET_MESSAGE_MGR_URL = "dnetMessageManagerURL";
|
||||
|
||||
public static final String MAX_NUMBER_OF_RETRY = "maxNumberOfRetry";
|
||||
public static final String REQUEST_DELAY = "requestDelay";
|
||||
public static final String RETRY_DELAY = "retryDelay";
|
||||
public static final String CONNECT_TIMEOUT = "connectTimeOut";
|
||||
public static final String READ_TIMEOUT = "readTimeOut";
|
||||
|
||||
public static final String CONTENT_TOTALITEMS = "TotalItems";
|
||||
public static final String CONTENT_INVALIDRECORDS = "InvalidRecords";
|
||||
public static final String CONTENT_TRANSFORMEDRECORDS = "transformedItems";
|
||||
|
||||
}
|
||||
|
|
|
@ -1,15 +0,0 @@
|
|||
|
||||
package eu.dnetlib.dhp.aggregation.common;
|
||||
|
||||
public class AggregationConstants {
|
||||
|
||||
public static final String SEQUENCE_FILE_NAME = "/sequence_file";
|
||||
public static final String REPORT_FILE_NAME = "/report";
|
||||
public static final String MDSTORE_DATA_PATH = "/store";
|
||||
public static final String MDSTORE_SIZE_PATH = "/size";
|
||||
|
||||
public static final String CONTENT_TOTALITEMS = "TotalItems";
|
||||
public static final String CONTENT_INVALIDRECORDS = "InvalidRecords";
|
||||
public static final String CONTENT_TRANSFORMEDRECORDS = "transformedItems";
|
||||
|
||||
}
|
|
@ -1,8 +1,7 @@
|
|||
|
||||
package eu.dnetlib.dhp.aggregation.mdstore;
|
||||
|
||||
import static eu.dnetlib.dhp.aggregation.common.AggregationConstants.*;
|
||||
import static eu.dnetlib.dhp.application.ApplicationUtils.*;
|
||||
import static eu.dnetlib.dhp.common.Constants.*;
|
||||
import static eu.dnetlib.dhp.utils.DHPUtils.*;
|
||||
|
||||
import java.net.URI;
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
|
||||
package eu.dnetlib.dhp.collection;
|
||||
|
||||
import static eu.dnetlib.dhp.aggregation.common.AggregationConstants.*;
|
||||
import static eu.dnetlib.dhp.common.Constants.*;
|
||||
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
||||
import static eu.dnetlib.dhp.utils.DHPUtils.*;
|
||||
|
||||
|
|
|
@ -3,9 +3,9 @@ package eu.dnetlib.dhp.collection.plugin;
|
|||
|
||||
import java.util.stream.Stream;
|
||||
|
||||
import eu.dnetlib.dhp.collection.ApiDescriptor;
|
||||
import eu.dnetlib.dhp.collection.worker.CollectorException;
|
||||
import eu.dnetlib.dhp.collection.worker.CollectorPluginReport;
|
||||
import eu.dnetlib.dhp.collector.worker.model.ApiDescriptor;
|
||||
|
||||
public interface CollectorPlugin {
|
||||
|
||||
|
|
|
@ -13,11 +13,11 @@ import com.google.common.base.Splitter;
|
|||
import com.google.common.collect.Iterators;
|
||||
import com.google.common.collect.Lists;
|
||||
|
||||
import eu.dnetlib.dhp.collection.ApiDescriptor;
|
||||
import eu.dnetlib.dhp.collection.plugin.CollectorPlugin;
|
||||
import eu.dnetlib.dhp.collection.worker.CollectorException;
|
||||
import eu.dnetlib.dhp.collection.worker.CollectorPluginReport;
|
||||
import eu.dnetlib.dhp.collection.worker.HttpClientParams;
|
||||
import eu.dnetlib.dhp.collector.worker.model.ApiDescriptor;
|
||||
|
||||
public class OaiCollectorPlugin implements CollectorPlugin {
|
||||
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
|
||||
package eu.dnetlib.dhp.collection.worker;
|
||||
|
||||
import static eu.dnetlib.dhp.aggregation.common.AggregationConstants.SEQUENCE_FILE_NAME;
|
||||
import static eu.dnetlib.dhp.common.Constants.SEQUENCE_FILE_NAME;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
|
@ -15,8 +15,8 @@ import org.slf4j.Logger;
|
|||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import eu.dnetlib.data.mdstore.manager.common.model.MDStoreVersion;
|
||||
import eu.dnetlib.dhp.collection.ApiDescriptor;
|
||||
import eu.dnetlib.dhp.collection.plugin.CollectorPlugin;
|
||||
import eu.dnetlib.dhp.collector.worker.model.ApiDescriptor;
|
||||
import eu.dnetlib.dhp.message.MessageSender;
|
||||
|
||||
public class CollectorWorker {
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
|
||||
package eu.dnetlib.dhp.collection.worker;
|
||||
|
||||
import static eu.dnetlib.dhp.aggregation.common.AggregationConstants.*;
|
||||
import static eu.dnetlib.dhp.common.Constants.*;
|
||||
import static eu.dnetlib.dhp.utils.DHPUtils.*;
|
||||
|
||||
import java.io.IOException;
|
||||
|
@ -17,7 +17,7 @@ import org.slf4j.LoggerFactory;
|
|||
|
||||
import eu.dnetlib.data.mdstore.manager.common.model.MDStoreVersion;
|
||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||
import eu.dnetlib.dhp.collector.worker.model.ApiDescriptor;
|
||||
import eu.dnetlib.dhp.collection.ApiDescriptor;
|
||||
import eu.dnetlib.dhp.message.MessageSender;
|
||||
|
||||
/**
|
||||
|
@ -55,7 +55,7 @@ public class CollectorWorkerApplication {
|
|||
final String mdStoreVersion = argumentParser.get("mdStoreVersion");
|
||||
log.info("mdStoreVersion is {}", mdStoreVersion);
|
||||
|
||||
final String dnetMessageManagerURL = argumentParser.get("dnetMessageManagerURL");
|
||||
final String dnetMessageManagerURL = argumentParser.get(DNET_MESSAGE_MGR_URL);
|
||||
log.info("dnetMessageManagerURL is {}", dnetMessageManagerURL);
|
||||
|
||||
final String workflowId = argumentParser.get("workflowId");
|
||||
|
@ -87,15 +87,23 @@ public class CollectorWorkerApplication {
|
|||
clientParams
|
||||
.setMaxNumberOfRetry(
|
||||
Optional
|
||||
.ofNullable(argumentParser.get("maxNumberOfRetry"))
|
||||
.ofNullable(argumentParser.get(MAX_NUMBER_OF_RETRY))
|
||||
.map(Integer::parseInt)
|
||||
.orElse(HttpClientParams._maxNumberOfRetry));
|
||||
log.info("maxNumberOfRetry is {}", clientParams.getMaxNumberOfRetry());
|
||||
|
||||
clientParams
|
||||
.setRequestDelay(
|
||||
Optional
|
||||
.ofNullable(argumentParser.get(REQUEST_DELAY))
|
||||
.map(Integer::parseInt)
|
||||
.orElse(HttpClientParams._requestDelay));
|
||||
log.info("requestDelay is {}", clientParams.getRequestDelay());
|
||||
|
||||
clientParams
|
||||
.setRetryDelay(
|
||||
Optional
|
||||
.ofNullable(argumentParser.get("retryDelay"))
|
||||
.ofNullable(argumentParser.get(RETRY_DELAY))
|
||||
.map(Integer::parseInt)
|
||||
.orElse(HttpClientParams._retryDelay));
|
||||
log.info("retryDelay is {}", clientParams.getRetryDelay());
|
||||
|
@ -103,7 +111,7 @@ public class CollectorWorkerApplication {
|
|||
clientParams
|
||||
.setConnectTimeOut(
|
||||
Optional
|
||||
.ofNullable(argumentParser.get("connectTimeOut"))
|
||||
.ofNullable(argumentParser.get(CONNECT_TIMEOUT))
|
||||
.map(Integer::parseInt)
|
||||
.orElse(HttpClientParams._connectTimeOut));
|
||||
log.info("connectTimeOut is {}", clientParams.getConnectTimeOut());
|
||||
|
@ -111,7 +119,7 @@ public class CollectorWorkerApplication {
|
|||
clientParams
|
||||
.setReadTimeOut(
|
||||
Optional
|
||||
.ofNullable(argumentParser.get("readTimeOut"))
|
||||
.ofNullable(argumentParser.get(READ_TIMEOUT))
|
||||
.map(Integer::parseInt)
|
||||
.orElse(HttpClientParams._readTimeOut));
|
||||
log.info("readTimeOut is {}", clientParams.getReadTimeOut());
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
|
||||
package eu.dnetlib.dhp.collection.worker;
|
||||
|
||||
import static eu.dnetlib.dhp.aggregation.common.AggregationConstants.REPORT_FILE_NAME;
|
||||
import static eu.dnetlib.dhp.common.Constants.REPORT_FILE_NAME;
|
||||
import static eu.dnetlib.dhp.utils.DHPUtils.*;
|
||||
|
||||
import java.io.IOException;
|
||||
|
|
|
@ -6,22 +6,46 @@ package eu.dnetlib.dhp.collection.worker;
|
|||
*/
|
||||
public class HttpClientParams {
|
||||
|
||||
// Defaults
|
||||
public static int _maxNumberOfRetry = 3;
|
||||
public static int _requestDelay = 0; // milliseconds
|
||||
public static int _retryDelay = 10; // seconds
|
||||
public static int _connectTimeOut = 10; // seconds
|
||||
public static int _readTimeOut = 30; // seconds
|
||||
|
||||
/**
|
||||
* Maximum number of allowed retires before failing
|
||||
*/
|
||||
private int maxNumberOfRetry;
|
||||
|
||||
/**
|
||||
* Delay between request (Milliseconds)
|
||||
*/
|
||||
private int requestDelay;
|
||||
|
||||
/**
|
||||
* Time to wait after a failure before retrying (Seconds)
|
||||
*/
|
||||
private int retryDelay;
|
||||
|
||||
/**
|
||||
* Connect timeout (Seconds)
|
||||
*/
|
||||
private int connectTimeOut;
|
||||
|
||||
/**
|
||||
* Read timeout (Seconds)
|
||||
*/
|
||||
private int readTimeOut;
|
||||
|
||||
public HttpClientParams() {
|
||||
this(_maxNumberOfRetry, _retryDelay, _connectTimeOut, _readTimeOut);
|
||||
this(_maxNumberOfRetry, _requestDelay, _retryDelay, _connectTimeOut, _readTimeOut);
|
||||
}
|
||||
|
||||
public HttpClientParams(int maxNumberOfRetry, int retryDelay, int connectTimeOut, int readTimeOut) {
|
||||
public HttpClientParams(int maxNumberOfRetry, int requestDelay, int retryDelay, int connectTimeOut,
|
||||
int readTimeOut) {
|
||||
this.maxNumberOfRetry = maxNumberOfRetry;
|
||||
this.requestDelay = requestDelay;
|
||||
this.retryDelay = retryDelay;
|
||||
this.connectTimeOut = connectTimeOut;
|
||||
this.readTimeOut = readTimeOut;
|
||||
|
@ -35,6 +59,14 @@ public class HttpClientParams {
|
|||
this.maxNumberOfRetry = maxNumberOfRetry;
|
||||
}
|
||||
|
||||
public int getRequestDelay() {
|
||||
return requestDelay;
|
||||
}
|
||||
|
||||
public void setRequestDelay(int requestDelay) {
|
||||
this.requestDelay = requestDelay;
|
||||
}
|
||||
|
||||
public int getRetryDelay() {
|
||||
return retryDelay;
|
||||
}
|
||||
|
|
|
@ -18,7 +18,7 @@ import org.slf4j.LoggerFactory;
|
|||
/**
|
||||
* Migrated from https://svn.driver.research-infrastructures.eu/driver/dnet45/modules/dnet-modular-collector-service/trunk/src/main/java/eu/dnetlib/data/collector/plugins/HttpConnector.java
|
||||
*
|
||||
* @author jochen, michele, andrea, alessia
|
||||
* @author jochen, michele, andrea, alessia, claudio
|
||||
*/
|
||||
public class HttpConnector2 {
|
||||
|
||||
|
@ -83,14 +83,22 @@ public class HttpConnector2 {
|
|||
final CollectorPluginReport report) throws CollectorException, IOException {
|
||||
|
||||
if (retryNumber > getClientParams().getMaxNumberOfRetry()) {
|
||||
throw new CollectorException("Max number of retries exceeded. Cause: \n " + report);
|
||||
final String msg = String
|
||||
.format(
|
||||
"Max number of retries (%s/%s) exceeded, failing.",
|
||||
retryNumber, getClientParams().getMaxNumberOfRetry());
|
||||
log.error(msg);
|
||||
throw new CollectorException(msg);
|
||||
}
|
||||
|
||||
log.info("Downloading attempt {} [{}]", retryNumber, requestUrl);
|
||||
log.info("Request attempt {} [{}]", retryNumber, requestUrl);
|
||||
|
||||
InputStream input = null;
|
||||
|
||||
try {
|
||||
if (getClientParams().getRequestDelay() > 0) {
|
||||
backoffAndSleep(getClientParams().getRequestDelay());
|
||||
}
|
||||
final HttpURLConnection urlConn = (HttpURLConnection) new URL(requestUrl).openConnection();
|
||||
urlConn.setInstanceFollowRedirects(false);
|
||||
urlConn.setReadTimeout(getClientParams().getReadTimeOut() * 1000);
|
||||
|
@ -190,10 +198,10 @@ public class HttpConnector2 {
|
|||
}
|
||||
}
|
||||
|
||||
private void backoffAndSleep(int sleepTime) throws CollectorException {
|
||||
log.info("I'm going to sleep for {}ms", sleepTime);
|
||||
private void backoffAndSleep(int sleepTimeMs) throws CollectorException {
|
||||
log.info("I'm going to sleep for {}ms", sleepTimeMs);
|
||||
try {
|
||||
Thread.sleep(sleepTime);
|
||||
Thread.sleep(sleepTimeMs);
|
||||
} catch (InterruptedException e) {
|
||||
log.error(e.getMessage(), e);
|
||||
throw new CollectorException(e);
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
|
||||
package eu.dnetlib.dhp.transformation;
|
||||
|
||||
import static eu.dnetlib.dhp.aggregation.common.AggregationConstants.*;
|
||||
import static eu.dnetlib.dhp.common.Constants.*;
|
||||
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
||||
import static eu.dnetlib.dhp.utils.DHPUtils.*;
|
||||
|
||||
|
|
|
@ -30,25 +30,31 @@
|
|||
"paramRequired": true
|
||||
},
|
||||
{
|
||||
"paramName": "mr",
|
||||
"paramName": "mnr",
|
||||
"paramLongName": "maxNumberOfRetry",
|
||||
"paramDescription": "the maximum number of admitted connection retries",
|
||||
"paramRequired": false
|
||||
},
|
||||
{
|
||||
"paramName": "rd",
|
||||
"paramName": "rqd",
|
||||
"paramLongName": "requestDelay",
|
||||
"paramDescription": "the delay (ms) between requests",
|
||||
"paramRequired": false
|
||||
},
|
||||
{
|
||||
"paramName": "rtd",
|
||||
"paramLongName": "retryDelay",
|
||||
"paramDescription": "the delay (ms) between retries",
|
||||
"paramRequired": false
|
||||
},
|
||||
{
|
||||
"paramName": "ct",
|
||||
"paramName": "cto",
|
||||
"paramLongName": "connectTimeOut",
|
||||
"paramDescription": "the maximum allowed time (ms) to connect to the remote host",
|
||||
"paramRequired": false
|
||||
},
|
||||
{
|
||||
"paramName": "rt",
|
||||
"paramName": "rto",
|
||||
"paramLongName": "readTimeOut",
|
||||
"paramDescription": "the maximum allowed time (ms) to receive content from the remote host",
|
||||
"paramRequired": false
|
||||
|
|
|
@ -95,6 +95,7 @@
|
|||
<arg>--dnetMessageManagerURL</arg><arg>${dnetMessageManagerURL}</arg>
|
||||
<arg>--mdStoreVersion</arg><arg>${wf:actionData('StartTransaction')['mdStoreVersion']}</arg>
|
||||
<arg>--maxNumberOfRetry</arg><arg>${maxNumberOfRetry}</arg>
|
||||
<arg>--requestDelay</arg><arg>${requestDelay}</arg>
|
||||
<arg>--retryDelay</arg><arg>${retryDelay}</arg>
|
||||
<arg>--connectTimeOut</arg><arg>${connectTimeOut}</arg>
|
||||
<arg>--readTimeOut</arg><arg>${readTimeOut}</arg>
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
|
||||
package eu.dnetlib.dhp.aggregation;
|
||||
|
||||
import static eu.dnetlib.dhp.aggregation.common.AggregationConstants.MDSTORE_DATA_PATH;
|
||||
import static eu.dnetlib.dhp.common.Constants.MDSTORE_DATA_PATH;
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
|
||||
import java.io.File;
|
||||
|
|
|
@ -8,9 +8,9 @@ import org.junit.jupiter.api.Test;
|
|||
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
|
||||
import eu.dnetlib.dhp.collection.ApiDescriptor;
|
||||
import eu.dnetlib.dhp.collection.worker.CollectorPluginFactory;
|
||||
import eu.dnetlib.dhp.collection.worker.HttpClientParams;
|
||||
import eu.dnetlib.dhp.collector.worker.model.ApiDescriptor;
|
||||
|
||||
@Disabled
|
||||
public class CollectorWorkerApplicationTests {
|
||||
|
|
|
@ -1,14 +1,12 @@
|
|||
|
||||
package eu.dnetlib.dhp.transformation;
|
||||
|
||||
import static eu.dnetlib.dhp.aggregation.common.AggregationConstants.MDSTORE_DATA_PATH;
|
||||
import static eu.dnetlib.dhp.common.Constants.MDSTORE_DATA_PATH;
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
import static org.mockito.Mockito.lenient;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.util.Collections;
|
||||
import java.util.Map;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.stream.Stream;
|
||||
|
|
Loading…
Reference in New Issue