better logging, WIP: collectorWorker error reporting

This commit is contained in:
Claudio Atzori 2021-02-03 12:33:41 +01:00
parent 53884d12c2
commit 0e8a4f9f1a
11 changed files with 159 additions and 136 deletions

View File

@ -0,0 +1,21 @@
package eu.dnetlib.dhp.application;
import java.io.File;
import java.io.FileOutputStream;
import java.io.OutputStream;
import java.util.Properties;
public class ApplicationUtils {
public static void populateOOZIEEnv(final String paramName, String value) throws Exception {
File file = new File(System.getProperty("oozie.action.output.properties"));
Properties props = new Properties();
props.setProperty(paramName, value);
OutputStream os = new FileOutputStream(file);
props.store(os, "");
os.close();
}
}

View File

@ -1,6 +1,9 @@
package eu.dnetlib.dhp.aggregation.mdstore; package eu.dnetlib.dhp.aggregation.mdstore;
import static eu.dnetlib.dhp.aggregation.common.AggregationUtility.*;
import static eu.dnetlib.dhp.application.ApplicationUtils.*;
import java.io.File; import java.io.File;
import java.io.FileOutputStream; import java.io.FileOutputStream;
import java.io.OutputStream; import java.io.OutputStream;
@ -16,11 +19,8 @@ import org.apache.hadoop.fs.Path;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.data.mdstore.manager.common.model.MDStoreVersion; import eu.dnetlib.data.mdstore.manager.common.model.MDStoreVersion;
import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.collection.worker.CollectorWorker;
import eu.dnetlib.dhp.common.rest.DNetRestClient; import eu.dnetlib.dhp.common.rest.DNetRestClient;
public class MDStoreActionNode { public class MDStoreActionNode {
@ -28,11 +28,8 @@ public class MDStoreActionNode {
enum MDAction { enum MDAction {
NEW_VERSION, ROLLBACK, COMMIT, READ_LOCK, READ_UNLOCK NEW_VERSION, ROLLBACK, COMMIT, READ_LOCK, READ_UNLOCK
} }
private static final ObjectMapper mapper = new ObjectMapper();
public static String NEW_VERSION_URI = "%s/mdstore/%s/newVersion"; public static String NEW_VERSION_URI = "%s/mdstore/%s/newVersion";
public static final String COMMIT_VERSION_URL = "%s/version/%s/commit/%s"; public static final String COMMIT_VERSION_URL = "%s/version/%s/commit/%s";
@ -48,13 +45,13 @@ public class MDStoreActionNode {
final ArgumentApplicationParser argumentParser = new ArgumentApplicationParser( final ArgumentApplicationParser argumentParser = new ArgumentApplicationParser(
IOUtils IOUtils
.toString( .toString(
CollectorWorker.class MDStoreActionNode.class
.getResourceAsStream( .getResourceAsStream(
"/eu/dnetlib/dhp/collection/mdstore_action_parameters.json"))); "/eu/dnetlib/dhp/collection/mdstore_action_parameters.json")));
argumentParser.parseArgument(args); argumentParser.parseArgument(args);
final MDAction action = MDAction.valueOf(argumentParser.get("action")); final MDAction action = MDAction.valueOf(argumentParser.get("action"));
log.info("Curren action is {}", action); log.info("Current action is {}", action);
final String mdStoreManagerURI = argumentParser.get("mdStoreManagerURI"); final String mdStoreManagerURI = argumentParser.get("mdStoreManagerURI");
log.info("mdStoreManagerURI is {}", mdStoreManagerURI); log.info("mdStoreManagerURI is {}", mdStoreManagerURI);
@ -67,7 +64,7 @@ public class MDStoreActionNode {
} }
final MDStoreVersion currentVersion = DNetRestClient final MDStoreVersion currentVersion = DNetRestClient
.doGET(String.format(NEW_VERSION_URI, mdStoreManagerURI, mdStoreID), MDStoreVersion.class); .doGET(String.format(NEW_VERSION_URI, mdStoreManagerURI, mdStoreID), MDStoreVersion.class);
populateOOZIEEnv(MDSTOREVERSIONPARAM, mapper.writeValueAsString(currentVersion)); populateOOZIEEnv(MDSTOREVERSIONPARAM, MAPPER.writeValueAsString(currentVersion));
break; break;
} }
case COMMIT: { case COMMIT: {
@ -77,7 +74,7 @@ public class MDStoreActionNode {
throw new IllegalArgumentException("missing or empty argument namenode"); throw new IllegalArgumentException("missing or empty argument namenode");
} }
final String mdStoreVersion_params = argumentParser.get("mdStoreVersion"); final String mdStoreVersion_params = argumentParser.get("mdStoreVersion");
final MDStoreVersion mdStoreVersion = mapper.readValue(mdStoreVersion_params, MDStoreVersion.class); final MDStoreVersion mdStoreVersion = MAPPER.readValue(mdStoreVersion_params, MDStoreVersion.class);
if (StringUtils.isBlank(mdStoreVersion.getId())) { if (StringUtils.isBlank(mdStoreVersion.getId())) {
throw new IllegalArgumentException( throw new IllegalArgumentException(
@ -110,7 +107,7 @@ public class MDStoreActionNode {
} }
case ROLLBACK: { case ROLLBACK: {
final String mdStoreVersion_params = argumentParser.get("mdStoreVersion"); final String mdStoreVersion_params = argumentParser.get("mdStoreVersion");
final MDStoreVersion mdStoreVersion = mapper.readValue(mdStoreVersion_params, MDStoreVersion.class); final MDStoreVersion mdStoreVersion = MAPPER.readValue(mdStoreVersion_params, MDStoreVersion.class);
if (StringUtils.isBlank(mdStoreVersion.getId())) { if (StringUtils.isBlank(mdStoreVersion.getId())) {
throw new IllegalArgumentException( throw new IllegalArgumentException(
@ -127,12 +124,12 @@ public class MDStoreActionNode {
} }
final MDStoreVersion currentVersion = DNetRestClient final MDStoreVersion currentVersion = DNetRestClient
.doGET(String.format(READ_LOCK_URL, mdStoreManagerURI, mdStoreID), MDStoreVersion.class); .doGET(String.format(READ_LOCK_URL, mdStoreManagerURI, mdStoreID), MDStoreVersion.class);
populateOOZIEEnv(MDSTOREREADLOCKPARAM, mapper.writeValueAsString(currentVersion)); populateOOZIEEnv(MDSTOREREADLOCKPARAM, MAPPER.writeValueAsString(currentVersion));
break; break;
} }
case READ_UNLOCK: { case READ_UNLOCK: {
final String mdStoreVersion_params = argumentParser.get("readMDStoreId"); final String mdStoreVersion_params = argumentParser.get("readMDStoreId");
final MDStoreVersion mdStoreVersion = mapper.readValue(mdStoreVersion_params, MDStoreVersion.class); final MDStoreVersion mdStoreVersion = MAPPER.readValue(mdStoreVersion_params, MDStoreVersion.class);
if (StringUtils.isBlank(mdStoreVersion.getId())) { if (StringUtils.isBlank(mdStoreVersion.getId())) {
throw new IllegalArgumentException( throw new IllegalArgumentException(
@ -148,13 +145,4 @@ public class MDStoreActionNode {
} }
public static void populateOOZIEEnv(final String paramName, String value) throws Exception {
File file = new File(System.getProperty("oozie.action.output.properties"));
Properties props = new Properties();
props.setProperty(paramName, value);
OutputStream os = new FileOutputStream(file);
props.store(os, "");
os.close();
}
} }

View File

@ -4,9 +4,12 @@ package eu.dnetlib.dhp.collection.plugin;
import java.util.stream.Stream; import java.util.stream.Stream;
import eu.dnetlib.dhp.collection.worker.CollectorException; import eu.dnetlib.dhp.collection.worker.CollectorException;
import eu.dnetlib.dhp.collection.worker.utils.CollectorPluginErrorLogList;
import eu.dnetlib.dhp.collector.worker.model.ApiDescriptor; import eu.dnetlib.dhp.collector.worker.model.ApiDescriptor;
public interface CollectorPlugin { public interface CollectorPlugin {
Stream<String> collect(ApiDescriptor api) throws CollectorException; Stream<String> collect(ApiDescriptor api) throws CollectorException;
CollectorPluginErrorLogList getCollectionErrors();
} }

View File

@ -9,12 +9,15 @@ import java.util.Spliterators;
import java.util.stream.Stream; import java.util.stream.Stream;
import java.util.stream.StreamSupport; import java.util.stream.StreamSupport;
import org.jetbrains.annotations.NotNull;
import com.google.common.base.Splitter; import com.google.common.base.Splitter;
import com.google.common.collect.Iterators; import com.google.common.collect.Iterators;
import com.google.common.collect.Lists; import com.google.common.collect.Lists;
import eu.dnetlib.dhp.collection.plugin.CollectorPlugin; import eu.dnetlib.dhp.collection.plugin.CollectorPlugin;
import eu.dnetlib.dhp.collection.worker.CollectorException; import eu.dnetlib.dhp.collection.worker.CollectorException;
import eu.dnetlib.dhp.collection.worker.utils.CollectorPluginErrorLogList;
import eu.dnetlib.dhp.collector.worker.model.ApiDescriptor; import eu.dnetlib.dhp.collector.worker.model.ApiDescriptor;
public class OaiCollectorPlugin implements CollectorPlugin { public class OaiCollectorPlugin implements CollectorPlugin {
@ -26,8 +29,19 @@ public class OaiCollectorPlugin implements CollectorPlugin {
private OaiIteratorFactory oaiIteratorFactory; private OaiIteratorFactory oaiIteratorFactory;
private final CollectorPluginErrorLogList errorLogList = new CollectorPluginErrorLogList();
@Override @Override
public Stream<String> collect(final ApiDescriptor api) throws CollectorException { public Stream<String> collect(final ApiDescriptor api) throws CollectorException {
try {
return doCollect(api);
} catch (CollectorException e) {
errorLogList.add(e.getMessage());
throw e;
}
}
private Stream<String> doCollect(ApiDescriptor api) throws CollectorException {
final String baseUrl = api.getBaseUrl(); final String baseUrl = api.getBaseUrl();
final String mdFormat = api.getParams().get(FORMAT_PARAM); final String mdFormat = api.getParams().get(FORMAT_PARAM);
final String setParam = api.getParams().get(OAI_SET_PARAM); final String setParam = api.getParams().get(OAI_SET_PARAM);
@ -65,7 +79,7 @@ public class OaiCollectorPlugin implements CollectorPlugin {
.stream() .stream()
.map( .map(
set -> getOaiIteratorFactory() set -> getOaiIteratorFactory()
.newIterator(baseUrl, mdFormat, set, fromDate, untilDate)) .newIterator(baseUrl, mdFormat, set, fromDate, untilDate, errorLogList))
.iterator(); .iterator();
return StreamSupport return StreamSupport
@ -79,4 +93,9 @@ public class OaiCollectorPlugin implements CollectorPlugin {
} }
return oaiIteratorFactory; return oaiIteratorFactory;
} }
@Override
public CollectorPluginErrorLogList getCollectionErrors() {
return errorLogList;
}
} }

View File

@ -15,15 +15,17 @@ import org.dom4j.Document;
import org.dom4j.DocumentException; import org.dom4j.DocumentException;
import org.dom4j.Node; import org.dom4j.Node;
import org.dom4j.io.SAXReader; import org.dom4j.io.SAXReader;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import eu.dnetlib.dhp.collection.worker.CollectorException; import eu.dnetlib.dhp.collection.worker.CollectorException;
import eu.dnetlib.dhp.collection.worker.utils.CollectorPluginErrorLogList;
import eu.dnetlib.dhp.collection.worker.utils.HttpConnector; import eu.dnetlib.dhp.collection.worker.utils.HttpConnector;
import eu.dnetlib.dhp.collection.worker.utils.XmlCleaner; import eu.dnetlib.dhp.collection.worker.utils.XmlCleaner;
public class OaiIterator implements Iterator<String> { public class OaiIterator implements Iterator<String> {
private static final Log log = LogFactory.getLog(OaiIterator.class); // NOPMD by marko on private static final Logger log = LoggerFactory.getLogger(OaiIterator.class);
// 11/24/08 5:02 PM
private final Queue<String> queue = new PriorityBlockingQueue<>(); private final Queue<String> queue = new PriorityBlockingQueue<>();
private final SAXReader reader = new SAXReader(); private final SAXReader reader = new SAXReader();
@ -36,6 +38,7 @@ public class OaiIterator implements Iterator<String> {
private String token; private String token;
private boolean started; private boolean started;
private final HttpConnector httpConnector; private final HttpConnector httpConnector;
private CollectorPluginErrorLogList errorLogList;
public OaiIterator( public OaiIterator(
final String baseUrl, final String baseUrl,
@ -43,7 +46,8 @@ public class OaiIterator implements Iterator<String> {
final String set, final String set,
final String fromDate, final String fromDate,
final String untilDate, final String untilDate,
final HttpConnector httpConnector) { final HttpConnector httpConnector,
final CollectorPluginErrorLogList errorLogList) {
this.baseUrl = baseUrl; this.baseUrl = baseUrl;
this.mdFormat = mdFormat; this.mdFormat = mdFormat;
this.set = set; this.set = set;
@ -51,6 +55,7 @@ public class OaiIterator implements Iterator<String> {
this.untilDate = untilDate; this.untilDate = untilDate;
this.started = false; this.started = false;
this.httpConnector = httpConnector; this.httpConnector = httpConnector;
this.errorLogList = errorLogList;
} }
private void verifyStarted() { private void verifyStarted() {
@ -139,7 +144,7 @@ public class OaiIterator implements Iterator<String> {
private String downloadPage(final String url) throws CollectorException { private String downloadPage(final String url) throws CollectorException {
final String xml = httpConnector.getInputSource(url); final String xml = httpConnector.getInputSource(url, errorLogList);
Document doc; Document doc;
try { try {
doc = reader.read(new StringReader(xml)); doc = reader.read(new StringReader(xml));
@ -174,4 +179,8 @@ public class OaiIterator implements Iterator<String> {
return doc.valueOf("//*[local-name()='resumptionToken']"); return doc.valueOf("//*[local-name()='resumptionToken']");
} }
public CollectorPluginErrorLogList getErrorLogList() {
return errorLogList;
}
} }

View File

@ -3,6 +3,7 @@ package eu.dnetlib.dhp.collection.plugin.oai;
import java.util.Iterator; import java.util.Iterator;
import eu.dnetlib.dhp.collection.worker.utils.CollectorPluginErrorLogList;
import eu.dnetlib.dhp.collection.worker.utils.HttpConnector; import eu.dnetlib.dhp.collection.worker.utils.HttpConnector;
public class OaiIteratorFactory { public class OaiIteratorFactory {
@ -14,8 +15,9 @@ public class OaiIteratorFactory {
final String mdFormat, final String mdFormat,
final String set, final String set,
final String fromDate, final String fromDate,
final String untilDate) { final String untilDate,
return new OaiIterator(baseUrl, mdFormat, set, fromDate, untilDate, getHttpConnector()); final CollectorPluginErrorLogList errorLogList) {
return new OaiIterator(baseUrl, mdFormat, set, fromDate, untilDate, getHttpConnector(), errorLogList);
} }
private HttpConnector getHttpConnector() { private HttpConnector getHttpConnector() {

View File

@ -15,6 +15,7 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import eu.dnetlib.dhp.collection.plugin.CollectorPlugin; import eu.dnetlib.dhp.collection.plugin.CollectorPlugin;
import eu.dnetlib.dhp.collection.worker.utils.CollectorPluginErrorLogList;
import eu.dnetlib.dhp.collection.worker.utils.CollectorPluginFactory; import eu.dnetlib.dhp.collection.worker.utils.CollectorPluginFactory;
import eu.dnetlib.dhp.collector.worker.model.ApiDescriptor; import eu.dnetlib.dhp.collector.worker.model.ApiDescriptor;
@ -22,69 +23,65 @@ public class CollectorWorker {
private static final Logger log = LoggerFactory.getLogger(CollectorWorker.class); private static final Logger log = LoggerFactory.getLogger(CollectorWorker.class);
private final CollectorPluginFactory collectorPluginFactory;
private final ApiDescriptor api; private final ApiDescriptor api;
private final String hdfsuri; private final String hdfsuri;
private final String hdfsPath; private final String hdfsPath;
private CollectorPlugin plugin;
public CollectorWorker( public CollectorWorker(
final CollectorPluginFactory collectorPluginFactory,
final ApiDescriptor api, final ApiDescriptor api,
final String hdfsuri, final String hdfsuri,
final String hdfsPath) { final String hdfsPath) throws CollectorException {
this.collectorPluginFactory = collectorPluginFactory;
this.api = api; this.api = api;
this.hdfsuri = hdfsuri; this.hdfsuri = hdfsuri;
this.hdfsPath = hdfsPath; this.hdfsPath = hdfsPath;
this.plugin = CollectorPluginFactory.getPluginByProtocol(api.getProtocol());
} }
public void collect() throws CollectorException { public CollectorPluginErrorLogList collect() throws IOException, CollectorException {
try {
final CollectorPlugin plugin = collectorPluginFactory.getPluginByProtocol(api.getProtocol());
// ====== Init HDFS File System Object // ====== Init HDFS File System Object
Configuration conf = new Configuration(); Configuration conf = new Configuration();
// Set FileSystem URI // Set FileSystem URI
conf.set("fs.defaultFS", hdfsuri); conf.set("fs.defaultFS", hdfsuri);
// Because of Maven // Because of Maven
conf.set("fs.hdfs.impl", org.apache.hadoop.hdfs.DistributedFileSystem.class.getName()); conf.set("fs.hdfs.impl", org.apache.hadoop.hdfs.DistributedFileSystem.class.getName());
conf.set("fs.file.impl", org.apache.hadoop.fs.LocalFileSystem.class.getName()); conf.set("fs.file.impl", org.apache.hadoop.fs.LocalFileSystem.class.getName());
System.setProperty("hadoop.home.dir", "/"); System.setProperty("hadoop.home.dir", "/");
// Get the filesystem - HDFS // Get the filesystem - HDFS
FileSystem.get(URI.create(hdfsuri), conf);
Path hdfswritepath = new Path(hdfsPath);
log.info("Created path " + hdfswritepath.toString()); FileSystem.get(URI.create(hdfsuri), conf);
Path hdfswritepath = new Path(hdfsPath);
final AtomicInteger counter = new AtomicInteger(0); log.info("Created path " + hdfswritepath.toString());
try (SequenceFile.Writer writer = SequenceFile
.createWriter( final AtomicInteger counter = new AtomicInteger(0);
conf, try (SequenceFile.Writer writer = SequenceFile
SequenceFile.Writer.file(hdfswritepath), .createWriter(
SequenceFile.Writer.keyClass(IntWritable.class), conf,
SequenceFile.Writer.valueClass(Text.class))) { SequenceFile.Writer.file(hdfswritepath),
final IntWritable key = new IntWritable(counter.get()); SequenceFile.Writer.keyClass(IntWritable.class),
final Text value = new Text(); SequenceFile.Writer.valueClass(Text.class))) {
plugin final IntWritable key = new IntWritable(counter.get());
.collect(api) final Text value = new Text();
.forEach( plugin
content -> { .collect(api)
key.set(counter.getAndIncrement()); .forEach(
value.set(content); content -> {
try { key.set(counter.getAndIncrement());
writer.append(key, value); value.set(content);
} catch (IOException e) { try {
throw new RuntimeException(e); writer.append(key, value);
} } catch (IOException e) {
}); throw new RuntimeException(e);
} }
} catch (Throwable e) { });
throw new CollectorException("Error on collecting ", e); } finally {
return plugin.getCollectionErrors();
} }
} }
} }

View File

@ -2,6 +2,8 @@
package eu.dnetlib.dhp.collection.worker; package eu.dnetlib.dhp.collection.worker;
import static eu.dnetlib.dhp.aggregation.common.AggregationConstants.*; import static eu.dnetlib.dhp.aggregation.common.AggregationConstants.*;
import static eu.dnetlib.dhp.aggregation.common.AggregationUtility.*;
import static eu.dnetlib.dhp.application.ApplicationUtils.*;
import org.apache.commons.io.IOUtils; import org.apache.commons.io.IOUtils;
import org.slf4j.Logger; import org.slf4j.Logger;
@ -10,7 +12,9 @@ import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.data.mdstore.manager.common.model.MDStoreVersion; import eu.dnetlib.data.mdstore.manager.common.model.MDStoreVersion;
import eu.dnetlib.dhp.aggregation.common.AggregationUtility;
import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.collection.worker.utils.CollectorPluginErrorLogList;
import eu.dnetlib.dhp.collection.worker.utils.CollectorPluginFactory; import eu.dnetlib.dhp.collection.worker.utils.CollectorPluginFactory;
import eu.dnetlib.dhp.collector.worker.model.ApiDescriptor; import eu.dnetlib.dhp.collector.worker.model.ApiDescriptor;
@ -25,8 +29,6 @@ public class CollectorWorkerApplication {
private static final Logger log = LoggerFactory.getLogger(CollectorWorkerApplication.class); private static final Logger log = LoggerFactory.getLogger(CollectorWorkerApplication.class);
private static final CollectorPluginFactory collectorPluginFactory = new CollectorPluginFactory();
/** /**
* @param args * @param args
*/ */
@ -49,14 +51,16 @@ public class CollectorWorkerApplication {
final String mdStoreVersion = argumentParser.get("mdStoreVersion"); final String mdStoreVersion = argumentParser.get("mdStoreVersion");
log.info("mdStoreVersion is {}", mdStoreVersion); log.info("mdStoreVersion is {}", mdStoreVersion);
final ObjectMapper jsonMapper = new ObjectMapper(); final MDStoreVersion currentVersion = MAPPER.readValue(mdStoreVersion, MDStoreVersion.class);
final String hdfsPath = currentVersion.getHdfsPath() + SEQUENCE_FILE_NAME;
log.info("hdfs path is {}", hdfsPath);
final MDStoreVersion currentVersion = jsonMapper.readValue(mdStoreVersion, MDStoreVersion.class); final ApiDescriptor api = MAPPER.readValue(apiDescriptor, ApiDescriptor.class);
final ApiDescriptor api = jsonMapper.readValue(apiDescriptor, ApiDescriptor.class); final CollectorWorker worker = new CollectorWorker(api, hdfsuri, hdfsPath);
final CollectorWorker worker = new CollectorWorker(collectorPluginFactory, api, hdfsuri, CollectorPluginErrorLogList errors = worker.collect();
currentVersion.getHdfsPath() + SEQUENCE_FILE_NAME);
worker.collect(); populateOOZIEEnv("collectorErrors", errors.toString());
} }

View File

@ -7,7 +7,7 @@ import eu.dnetlib.dhp.collection.worker.CollectorException;
public class CollectorPluginFactory { public class CollectorPluginFactory {
public CollectorPlugin getPluginByProtocol(final String protocol) throws CollectorException { public static CollectorPlugin getPluginByProtocol(final String protocol) throws CollectorException {
if (protocol == null) if (protocol == null)
throw new CollectorException("protocol cannot be null"); throw new CollectorException("protocol cannot be null");
switch (protocol.toLowerCase().trim()) { switch (protocol.toLowerCase().trim()) {

View File

@ -16,14 +16,14 @@ import javax.net.ssl.X509TrustManager;
import org.apache.commons.io.IOUtils; import org.apache.commons.io.IOUtils;
import org.apache.commons.lang.math.NumberUtils; import org.apache.commons.lang.math.NumberUtils;
import org.apache.commons.logging.Log; import org.slf4j.Logger;
import org.apache.commons.logging.LogFactory; import org.slf4j.LoggerFactory;
import eu.dnetlib.dhp.collection.worker.CollectorException; import eu.dnetlib.dhp.collection.worker.CollectorException;
public class HttpConnector { public class HttpConnector {
private static final Log log = LogFactory.getLog(HttpConnector.class); private static final Logger log = LoggerFactory.getLogger(HttpConnector.class);
private int maxNumberOfRetry = 6; private int maxNumberOfRetry = 6;
private int defaultDelay = 120; // seconds private int defaultDelay = 120; // seconds
@ -45,7 +45,20 @@ public class HttpConnector {
* @throws CollectorException when retrying more than maxNumberOfRetry times * @throws CollectorException when retrying more than maxNumberOfRetry times
*/ */
public String getInputSource(final String requestUrl) throws CollectorException { public String getInputSource(final String requestUrl) throws CollectorException {
return attemptDownlaodAsString(requestUrl, 1, new CollectorPluginErrorLogList()); return attemptDownloadAsString(requestUrl, 1, new CollectorPluginErrorLogList());
}
/**
* Given the URL returns the content via HTTP GET
*
* @param requestUrl the URL
* @param errorLogList the list of errors
* @return the content of the downloaded resource
* @throws CollectorException when retrying more than maxNumberOfRetry times
*/
public String getInputSource(final String requestUrl, CollectorPluginErrorLogList errorLogList)
throws CollectorException {
return attemptDownloadAsString(requestUrl, 1, errorLogList);
} }
/** /**
@ -59,18 +72,20 @@ public class HttpConnector {
return attemptDownload(requestUrl, 1, new CollectorPluginErrorLogList()); return attemptDownload(requestUrl, 1, new CollectorPluginErrorLogList());
} }
private String attemptDownlaodAsString( private String attemptDownloadAsString(
final String requestUrl, final int retryNumber, final CollectorPluginErrorLogList errorList) final String requestUrl, final int retryNumber, final CollectorPluginErrorLogList errorList)
throws CollectorException { throws CollectorException {
log.info("requesting URL [{}]", requestUrl);
try { try {
final InputStream s = attemptDownload(requestUrl, 1, new CollectorPluginErrorLogList()); final InputStream s = attemptDownload(requestUrl, 1, new CollectorPluginErrorLogList());
try { try {
return IOUtils.toString(s); return IOUtils.toString(s);
} catch (final IOException e) { } catch (final IOException e) {
log.error("error while retrieving from http-connection occured: " + requestUrl, e); log.error("error while retrieving from http-connection occurred: {}", requestUrl, e);
Thread.sleep(defaultDelay * 1000); Thread.sleep(defaultDelay * 1000);
errorList.add(e.getMessage()); errorList.add(e.getMessage());
return attemptDownlaodAsString(requestUrl, retryNumber + 1, errorList); return attemptDownloadAsString(requestUrl, retryNumber + 1, errorList);
} finally { } finally {
IOUtils.closeQuietly(s); IOUtils.closeQuietly(s);
} }
@ -87,7 +102,7 @@ public class HttpConnector {
throw new CollectorException("Max number of retries exceeded. Cause: \n " + errorList); throw new CollectorException("Max number of retries exceeded. Cause: \n " + errorList);
} }
log.debug("Downloading " + requestUrl + " - try: " + retryNumber); log.debug("requesting URL [{}], try {}", requestUrl, retryNumber);
try { try {
InputStream input = null; InputStream input = null;
@ -103,7 +118,7 @@ public class HttpConnector {
final int retryAfter = obtainRetryAfter(urlConn.getHeaderFields()); final int retryAfter = obtainRetryAfter(urlConn.getHeaderFields());
if (retryAfter > 0 && urlConn.getResponseCode() == HttpURLConnection.HTTP_UNAVAILABLE) { if (retryAfter > 0 && urlConn.getResponseCode() == HttpURLConnection.HTTP_UNAVAILABLE) {
log.warn("waiting and repeating request after " + retryAfter + " sec."); log.warn("waiting and repeating request after {} sec.", retryAfter);
Thread.sleep(retryAfter * 1000); Thread.sleep(retryAfter * 1000);
errorList.add("503 Service Unavailable"); errorList.add("503 Service Unavailable");
urlConn.disconnect(); urlConn.disconnect();
@ -111,7 +126,7 @@ public class HttpConnector {
} else if (urlConn.getResponseCode() == HttpURLConnection.HTTP_MOVED_PERM } else if (urlConn.getResponseCode() == HttpURLConnection.HTTP_MOVED_PERM
|| urlConn.getResponseCode() == HttpURLConnection.HTTP_MOVED_TEMP) { || urlConn.getResponseCode() == HttpURLConnection.HTTP_MOVED_TEMP) {
final String newUrl = obtainNewLocation(urlConn.getHeaderFields()); final String newUrl = obtainNewLocation(urlConn.getHeaderFields());
log.debug("The requested url has been moved to " + newUrl); log.debug("The requested url has been moved to {}", newUrl);
errorList errorList
.add( .add(
String String
@ -121,15 +136,11 @@ public class HttpConnector {
urlConn.disconnect(); urlConn.disconnect();
return attemptDownload(newUrl, retryNumber + 1, errorList); return attemptDownload(newUrl, retryNumber + 1, errorList);
} else if (urlConn.getResponseCode() != HttpURLConnection.HTTP_OK) { } else if (urlConn.getResponseCode() != HttpURLConnection.HTTP_OK) {
log final String msg = String
.error( .format("HTTP error: %s %s", urlConn.getResponseCode(), urlConn.getResponseMessage());
String log.error(msg);
.format(
"HTTP error: %s %s", urlConn.getResponseCode(), urlConn.getResponseMessage()));
Thread.sleep(defaultDelay * 1000); Thread.sleep(defaultDelay * 1000);
errorList errorList.add(msg);
.add(
String.format("%s %s", urlConn.getResponseCode(), urlConn.getResponseMessage()));
urlConn.disconnect(); urlConn.disconnect();
return attemptDownload(requestUrl, retryNumber + 1, errorList); return attemptDownload(requestUrl, retryNumber + 1, errorList);
} else { } else {
@ -138,7 +149,7 @@ public class HttpConnector {
return input; return input;
} }
} catch (final IOException e) { } catch (final IOException e) {
log.error("error while retrieving from http-connection occured: " + requestUrl, e); log.error("error while retrieving from http-connection occurred: {}", requestUrl, e);
Thread.sleep(defaultDelay * 1000); Thread.sleep(defaultDelay * 1000);
errorList.add(e.getMessage()); errorList.add(e.getMessage());
return attemptDownload(requestUrl, retryNumber + 1, errorList); return attemptDownload(requestUrl, retryNumber + 1, errorList);
@ -149,12 +160,12 @@ public class HttpConnector {
} }
private void logHeaderFields(final HttpURLConnection urlConn) throws IOException { private void logHeaderFields(final HttpURLConnection urlConn) throws IOException {
log.debug("StatusCode: " + urlConn.getResponseMessage()); log.debug("StatusCode: {}", urlConn.getResponseMessage());
for (final Map.Entry<String, List<String>> e : urlConn.getHeaderFields().entrySet()) { for (final Map.Entry<String, List<String>> e : urlConn.getHeaderFields().entrySet()) {
if (e.getKey() != null) { if (e.getKey() != null) {
for (final String v : e.getValue()) { for (final String v : e.getValue()) {
log.debug(" key: " + e.getKey() + " - value: " + v); log.debug(" key: {} value: {}", e.getKey(), v);
} }
} }
} }
@ -183,37 +194,6 @@ public class HttpConnector {
"The requested url has been MOVED, but 'location' param is MISSING"); "The requested url has been MOVED, but 'location' param is MISSING");
} }
/**
* register for https scheme; this is a workaround and not intended for the use in trusted environments
*/
public void initTrustManager() {
final X509TrustManager tm = new X509TrustManager() {
@Override
public void checkClientTrusted(final X509Certificate[] xcs, final String string) {
}
@Override
public void checkServerTrusted(final X509Certificate[] xcs, final String string) {
}
@Override
public X509Certificate[] getAcceptedIssuers() {
return null;
}
};
try {
final SSLContext ctx = SSLContext.getInstance("TLS");
ctx.init(null, new TrustManager[] {
tm
}, null);
HttpsURLConnection.setDefaultSSLSocketFactory(ctx.getSocketFactory());
} catch (final GeneralSecurityException e) {
log.fatal(e);
throw new IllegalStateException(e);
}
}
public int getMaxNumberOfRetry() { public int getMaxNumberOfRetry() {
return maxNumberOfRetry; return maxNumberOfRetry;
} }

View File

@ -40,7 +40,7 @@ public class DnetCollectorWorkerApplicationTests {
public void testFeeding(@TempDir Path testDir) throws Exception { public void testFeeding(@TempDir Path testDir) throws Exception {
System.out.println(testDir.toString()); System.out.println(testDir.toString());
CollectorWorker worker = new CollectorWorker(new CollectorPluginFactory(), getApi(), CollectorWorker worker = new CollectorWorker(getApi(),
"file://" + testDir.toString() + "/file.seq", testDir.toString() + "/file.seq"); "file://" + testDir.toString() + "/file.seq", testDir.toString() + "/file.seq");
worker.collect(); worker.collect();