diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiIterator.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiIterator.java index 2392dee6a..df0722905 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiIterator.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiIterator.java @@ -149,14 +149,14 @@ public class OaiIterator implements Iterator { try { doc = reader.read(new StringReader(xml)); } catch (final DocumentException e) { - log.warn("Error parsing xml, I try to clean it: " + xml, e); + log.warn("Error parsing xml, I try to clean it. {}", e.getMessage()); final String cleaned = XmlCleaner.cleanAllEntities(xml); try { doc = reader.read(new StringReader(cleaned)); } catch (final DocumentException e1) { final String resumptionToken = extractResumptionToken(xml); if (resumptionToken == null) { - throw new CollectorException("Error parsing cleaned document:" + cleaned, e1); + throw new CollectorException("Error parsing cleaned document:\n" + cleaned, e1); } return resumptionToken; } diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/worker/CollectorWorkerApplication.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/worker/CollectorWorkerApplication.java index 1d99689db..d89bcee54 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/worker/CollectorWorkerApplication.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/worker/CollectorWorkerApplication.java @@ -19,16 +19,19 @@ import eu.dnetlib.dhp.collection.worker.utils.CollectorPluginFactory; import eu.dnetlib.dhp.collector.worker.model.ApiDescriptor; /** - * DnetCollectortWorkerApplication is the main class responsible to start the Dnet Collection into HDFS. This module - * will be executed on the hadoop cluster and taking in input some parameters that tells it which is the right collector - * plugin to use and where store the data into HDFS path + * CollectorWorkerApplication is the main class responsible to start the metadata collection process, storing the outcomes + * into HDFS. This application will be executed on the hadoop cluster, where invoked in the context of the metadata collection + * oozie workflow, it will receive all the input parameters necessary to instantiate the specific collection plugin and the + * relative specific configurations * - * @author Sandro La Bruzzo + * @author Sandro La Bruzzo, Claudio Atzori */ public class CollectorWorkerApplication { private static final Logger log = LoggerFactory.getLogger(CollectorWorkerApplication.class); + public static final String COLLECTOR_WORKER_ERRORS = "collectorWorker-errors"; + /** * @param args */ @@ -60,7 +63,7 @@ public class CollectorWorkerApplication { final CollectorWorker worker = new CollectorWorker(api, hdfsuri, hdfsPath); CollectorPluginErrorLogList errors = worker.collect(); - populateOOZIEEnv("collectorErrors", errors.toString()); + populateOOZIEEnv(COLLECTOR_WORKER_ERRORS, errors.toString()); } diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/collection/oozie_app/workflow.xml b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/collection/oozie_app/workflow.xml index 2b2cf9dce..595613a2e 100644 --- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/collection/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/collection/oozie_app/workflow.xml @@ -87,6 +87,7 @@ --apidescriptor${apiDescription} --namenode${nameNode} --mdStoreVersion${wf:actionData('StartTransaction')['mdStoreVersion']} + @@ -133,7 +134,6 @@ --actionREAD_UNLOCK --mdStoreManagerURI${mdStoreManagerURI} --readMDStoreId${wf:actionData('BeginRead')['mdStoreReadLockVersion']} - @@ -165,7 +165,6 @@ --actionREAD_UNLOCK --mdStoreManagerURI${mdStoreManagerURI} --readMDStoreId${wf:actionData('BeginRead')['mdStoreReadLockVersion']} -