diff --git a/dhp-applications/dhp-collector-worker/pom.xml b/dhp-applications/dhp-collector-worker/pom.xml
deleted file mode 100644
index 07008b56f0..0000000000
--- a/dhp-applications/dhp-collector-worker/pom.xml
+++ /dev/null
@@ -1,115 +0,0 @@
-
-
-
-
- eu.dnetlib.dhp
- dhp-applications
- 1.0.0-SNAPSHOT
- ../
-
-
-
- 4.0.0
-
- eu.dnetlib
- dhp-collector-worker
-
-
-
-
-
-
-
- org.apache.maven.plugins
- maven-compiler-plugin
- 3.6.0
-
-
- 1.8
- ${project.build.sourceEncoding}
-
-
-
-
- org.apache.maven.plugins
- maven-jar-plugin
- 3.0.2
-
-
-
- org.apache.maven.plugins
- maven-source-plugin
- 3.0.1
-
-
- attach-sources
- verify
-
- jar-no-fork
-
-
-
-
-
-
- org.apache.maven.plugins
- maven-surefire-plugin
- 2.19.1
-
- true
-
-
-
- org.apache.maven.plugins
- maven-javadoc-plugin
- 2.10.4
-
- true
-
-
-
- org.apache.maven.plugins
- maven-dependency-plugin
- 3.0.0
-
-
-
-
-
-
-
-
-
- commons-cli
- commons-cli
-
-
- org.apache.hadoop
- hadoop-client
-
-
- com.fasterxml.jackson.core
- jackson-core
-
-
- com.fasterxml.jackson.core
- jackson-annotations
-
-
- com.fasterxml.jackson.core
- jackson-databind
-
-
- jaxen
- jaxen
-
-
- dom4j
- dom4j
-
-
-
-
-
\ No newline at end of file
diff --git a/dhp-applications/dhp-collector-worker/src/main/resources/application.properties b/dhp-applications/dhp-collector-worker/src/main/resources/application.properties
deleted file mode 100644
index 2a867fa5f8..0000000000
--- a/dhp-applications/dhp-collector-worker/src/main/resources/application.properties
+++ /dev/null
@@ -1,2 +0,0 @@
-spring.main.banner-mode=off
-logging.level.root=OFF
\ No newline at end of file
diff --git a/dhp-applications/dhp-collector-worker/src/test/resources/log4j.properties b/dhp-applications/dhp-collector-worker/src/test/resources/log4j.properties
deleted file mode 100644
index fd3cc24e16..0000000000
--- a/dhp-applications/dhp-collector-worker/src/test/resources/log4j.properties
+++ /dev/null
@@ -1,14 +0,0 @@
-### Root Level ###
-log4j.rootLogger=WARN, CONSOLE
-
-### Configuration for the CONSOLE appender ###
-log4j.appender.CONSOLE=org.apache.log4j.ConsoleAppender
-log4j.appender.CONSOLE.layout=org.apache.log4j.PatternLayout
-log4j.appender.CONSOLE.layout.ConversionPattern=[%-5p] %d %c - %m%n
-
-org.apache.cxf.Logger=org.apache.cxf.common.logging.Log4jLogger
-
-### Application Level ###
-log4j.logger.eu.dnetlib=INFO
-log4j.logger.eu.dnetlib.collector.worker.DnetCollectorWorker=DEBUG
-
diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/model/mdstore/MetadataRecord.java b/dhp-common/src/main/java/eu/dnetlib/dhp/model/mdstore/MetadataRecord.java
index 42859061c5..acbb7ffbbb 100644
--- a/dhp-common/src/main/java/eu/dnetlib/dhp/model/mdstore/MetadataRecord.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/model/mdstore/MetadataRecord.java
@@ -1,7 +1,6 @@
package eu.dnetlib.dhp.model.mdstore;
import eu.dnetlib.dhp.utils.DHPUtils;
-import org.apache.commons.lang3.StringUtils;
import java.io.Serializable;
@@ -44,6 +43,11 @@ public class MetadataRecord implements Serializable {
*/
private long dateOfCollection;
+ /**
+ * the date when the record has been stored
+ */
+ private long dateOfTransformation;
+
public MetadataRecord() {
this.dateOfCollection = System.currentTimeMillis();
@@ -109,6 +113,14 @@ public class MetadataRecord implements Serializable {
this.dateOfCollection = dateOfCollection;
}
+ public long getDateOfTransformation() {
+ return dateOfTransformation;
+ }
+
+ public void setDateOfTransformation(long dateOfTransformation) {
+ this.dateOfTransformation = dateOfTransformation;
+ }
+
@Override
public boolean equals(Object o) {
if (!(o instanceof MetadataRecord)) {
diff --git a/dhp-applications/dhp-collector-worker/README.md b/dhp-workflows/dhp-aggregation/README.md
similarity index 100%
rename from dhp-applications/dhp-collector-worker/README.md
rename to dhp-workflows/dhp-aggregation/README.md
diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/GenerateNativeStoreSparkJob.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/GenerateNativeStoreSparkJob.java
index 958b271a65..61e8cc34fa 100644
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/GenerateNativeStoreSparkJob.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/GenerateNativeStoreSparkJob.java
@@ -68,12 +68,14 @@ public class GenerateNativeStoreSparkJob {
final SparkSession spark = SparkSession
.builder()
.appName("GenerateNativeStoreSparkJob")
- .master("yarn")
+ .master(parser.get("master"))
.getOrCreate();
final Map ongoingMap = new HashMap<>();
final Map reportMap = new HashMap<>();
+ final boolean test = parser.get("isTest") == null?false: Boolean.valueOf(parser.get("isTest"));
+
final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext());
final JavaPairRDD inputRDD = sc.sequenceFile(parser.get("input"), IntWritable.class, Text.class);
@@ -88,19 +90,28 @@ public class GenerateNativeStoreSparkJob {
.filter(Objects::nonNull).distinct();
ongoingMap.put("ongoing", "0");
- manager.sendMessage(new Message(parser.get("workflowId"),"DataFrameCreation", MessageType.ONGOING, ongoingMap ), parser.get("rabbitOngoingQueue"), true, false);
+ if (!test) {
+ manager.sendMessage(new Message(parser.get("workflowId"),"DataFrameCreation", MessageType.ONGOING, ongoingMap ), parser.get("rabbitOngoingQueue"), true, false);
+ }
+
final Encoder encoder = Encoders.bean(MetadataRecord.class);
final Dataset mdstore = spark.createDataset(mappeRDD.rdd(), encoder);
final LongAccumulator mdStoreRecords = sc.sc().longAccumulator("MDStoreRecords");
mdStoreRecords.add(mdstore.count());
ongoingMap.put("ongoing", ""+ totalItems.value());
- manager.sendMessage(new Message(parser.get("workflowId"),"DataFrameCreation", MessageType.ONGOING, ongoingMap ), parser.get("rabbitOngoingQueue"), true, false);
+ if (!test) {
+ manager.sendMessage(new Message(parser.get("workflowId"), "DataFrameCreation", MessageType.ONGOING, ongoingMap), parser.get("rabbitOngoingQueue"), true, false);
+ }
mdstore.write().format("parquet").save(parser.get("output"));
reportMap.put("inputItem" , ""+ totalItems.value());
reportMap.put("invalidRecords", "" + invalidRecords.value());
reportMap.put("mdStoreSize", "" + mdStoreRecords.value());
- manager.sendMessage(new Message(parser.get("workflowId"),"Collection", MessageType.REPORT, reportMap ), parser.get("rabbitReportQueue"), true, false);
+ if (!test) {
+ manager.sendMessage(new Message(parser.get("workflowId"), "Collection", MessageType.REPORT, reportMap), parser.get("rabbitReportQueue"), true, false);
+ manager.close();
+ }
+
}
}
diff --git a/dhp-applications/dhp-collector-worker/src/main/java/eu/dnetlib/collector/worker/plugins/CollectorPlugin.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/CollectorPlugin.java
similarity index 65%
rename from dhp-applications/dhp-collector-worker/src/main/java/eu/dnetlib/collector/worker/plugins/CollectorPlugin.java
rename to dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/CollectorPlugin.java
index 5ec1e9a6ec..cfa0e417b4 100644
--- a/dhp-applications/dhp-collector-worker/src/main/java/eu/dnetlib/collector/worker/plugins/CollectorPlugin.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/CollectorPlugin.java
@@ -1,10 +1,10 @@
-package eu.dnetlib.collector.worker.plugins;
+package eu.dnetlib.dhp.collection.plugin;
+
+import eu.dnetlib.collector.worker.model.ApiDescriptor;
+import eu.dnetlib.dhp.collection.worker.DnetCollectorException;
import java.util.stream.Stream;
-import eu.dnetlib.collector.worker.DnetCollectorException;
-import eu.dnetlib.collector.worker.model.ApiDescriptor;
-
public interface CollectorPlugin {
Stream collect(ApiDescriptor api) throws DnetCollectorException;
diff --git a/dhp-applications/dhp-collector-worker/src/main/java/eu/dnetlib/collector/worker/plugins/oai/OaiCollectorPlugin.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiCollectorPlugin.java
similarity index 93%
rename from dhp-applications/dhp-collector-worker/src/main/java/eu/dnetlib/collector/worker/plugins/oai/OaiCollectorPlugin.java
rename to dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiCollectorPlugin.java
index 1dea7f6272..ad893ce528 100644
--- a/dhp-applications/dhp-collector-worker/src/main/java/eu/dnetlib/collector/worker/plugins/oai/OaiCollectorPlugin.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiCollectorPlugin.java
@@ -1,4 +1,4 @@
-package eu.dnetlib.collector.worker.plugins.oai;
+package eu.dnetlib.dhp.collection.plugin.oai;
import java.util.ArrayList;
import java.util.Iterator;
@@ -11,9 +11,10 @@ import com.google.common.base.Splitter;
import com.google.common.collect.Iterators;
import com.google.common.collect.Lists;
-import eu.dnetlib.collector.worker.DnetCollectorException;
+
import eu.dnetlib.collector.worker.model.ApiDescriptor;
-import eu.dnetlib.collector.worker.plugins.CollectorPlugin;
+import eu.dnetlib.dhp.collection.plugin.CollectorPlugin;
+import eu.dnetlib.dhp.collection.worker.DnetCollectorException;
public class OaiCollectorPlugin implements CollectorPlugin {
diff --git a/dhp-applications/dhp-collector-worker/src/main/java/eu/dnetlib/collector/worker/plugins/oai/OaiIterator.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiIterator.java
similarity index 95%
rename from dhp-applications/dhp-collector-worker/src/main/java/eu/dnetlib/collector/worker/plugins/oai/OaiIterator.java
rename to dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiIterator.java
index 191b7b5969..cd093ed2d1 100644
--- a/dhp-applications/dhp-collector-worker/src/main/java/eu/dnetlib/collector/worker/plugins/oai/OaiIterator.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiIterator.java
@@ -1,4 +1,4 @@
-package eu.dnetlib.collector.worker.plugins.oai;
+package eu.dnetlib.dhp.collection.plugin.oai;
import java.io.StringReader;
import java.io.UnsupportedEncodingException;
@@ -7,6 +7,9 @@ import java.util.Iterator;
import java.util.Queue;
import java.util.concurrent.PriorityBlockingQueue;
+import eu.dnetlib.dhp.collection.worker.DnetCollectorException;
+import eu.dnetlib.dhp.collection.worker.utils.HttpConnector;
+import eu.dnetlib.dhp.collection.worker.utils.XmlCleaner;
import org.apache.commons.lang.StringUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
@@ -15,9 +18,6 @@ import org.dom4j.DocumentException;
import org.dom4j.Node;
import org.dom4j.io.SAXReader;
-import eu.dnetlib.collector.worker.DnetCollectorException;
-import eu.dnetlib.collector.worker.utils.HttpConnector;
-import eu.dnetlib.collector.worker.utils.XmlCleaner;
public class OaiIterator implements Iterator {
diff --git a/dhp-applications/dhp-collector-worker/src/main/java/eu/dnetlib/collector/worker/plugins/oai/OaiIteratorFactory.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiIteratorFactory.java
similarity index 82%
rename from dhp-applications/dhp-collector-worker/src/main/java/eu/dnetlib/collector/worker/plugins/oai/OaiIteratorFactory.java
rename to dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiIteratorFactory.java
index ef7ce222f8..fc8dba5687 100644
--- a/dhp-applications/dhp-collector-worker/src/main/java/eu/dnetlib/collector/worker/plugins/oai/OaiIteratorFactory.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiIteratorFactory.java
@@ -1,7 +1,8 @@
-package eu.dnetlib.collector.worker.plugins.oai;
+package eu.dnetlib.dhp.collection.plugin.oai;
+
+import eu.dnetlib.dhp.collection.worker.utils.HttpConnector;
import java.util.Iterator;
-import eu.dnetlib.collector.worker.utils.HttpConnector;
public class OaiIteratorFactory {
diff --git a/dhp-applications/dhp-collector-worker/src/main/java/eu/dnetlib/collector/worker/DnetCollectorException.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/worker/DnetCollectorException.java
similarity index 94%
rename from dhp-applications/dhp-collector-worker/src/main/java/eu/dnetlib/collector/worker/DnetCollectorException.java
rename to dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/worker/DnetCollectorException.java
index bc4287a0dc..75ccc4d91c 100644
--- a/dhp-applications/dhp-collector-worker/src/main/java/eu/dnetlib/collector/worker/DnetCollectorException.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/worker/DnetCollectorException.java
@@ -1,4 +1,4 @@
-package eu.dnetlib.collector.worker;
+package eu.dnetlib.dhp.collection.worker;
public class DnetCollectorException extends Exception {
diff --git a/dhp-applications/dhp-collector-worker/src/main/java/eu/dnetlib/collector/worker/DnetCollectorWorker.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/worker/DnetCollectorWorker.java
similarity index 96%
rename from dhp-applications/dhp-collector-worker/src/main/java/eu/dnetlib/collector/worker/DnetCollectorWorker.java
rename to dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/worker/DnetCollectorWorker.java
index 3d458d8fcc..c76536b3a9 100644
--- a/dhp-applications/dhp-collector-worker/src/main/java/eu/dnetlib/collector/worker/DnetCollectorWorker.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/worker/DnetCollectorWorker.java
@@ -1,10 +1,11 @@
-package eu.dnetlib.collector.worker;
+package eu.dnetlib.dhp.collection.worker;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.collector.worker.model.ApiDescriptor;
-import eu.dnetlib.collector.worker.plugins.CollectorPlugin;
-import eu.dnetlib.collector.worker.utils.CollectorPluginFactory;
+
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
+import eu.dnetlib.dhp.collection.plugin.CollectorPlugin;
+import eu.dnetlib.dhp.collection.worker.utils.CollectorPluginFactory;
import eu.dnetlib.message.Message;
import eu.dnetlib.message.MessageManager;
import eu.dnetlib.message.MessageType;
diff --git a/dhp-applications/dhp-collector-worker/src/main/java/eu/dnetlib/collector/worker/DnetCollectorWorkerApplication.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/worker/DnetCollectorWorkerApplication.java
similarity index 93%
rename from dhp-applications/dhp-collector-worker/src/main/java/eu/dnetlib/collector/worker/DnetCollectorWorkerApplication.java
rename to dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/worker/DnetCollectorWorkerApplication.java
index fdd7ceb543..d4bd22817c 100644
--- a/dhp-applications/dhp-collector-worker/src/main/java/eu/dnetlib/collector/worker/DnetCollectorWorkerApplication.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/worker/DnetCollectorWorkerApplication.java
@@ -1,7 +1,8 @@
-package eu.dnetlib.collector.worker;
+package eu.dnetlib.dhp.collection.worker;
+
-import eu.dnetlib.collector.worker.utils.CollectorPluginFactory;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
+import eu.dnetlib.dhp.collection.worker.utils.CollectorPluginFactory;
import eu.dnetlib.message.MessageManager;
import org.apache.commons.io.IOUtils;
import org.slf4j.Logger;
diff --git a/dhp-applications/dhp-collector-worker/src/main/java/eu/dnetlib/collector/worker/utils/CollectorPluginErrorLogList.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/worker/utils/CollectorPluginErrorLogList.java
similarity index 89%
rename from dhp-applications/dhp-collector-worker/src/main/java/eu/dnetlib/collector/worker/utils/CollectorPluginErrorLogList.java
rename to dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/worker/utils/CollectorPluginErrorLogList.java
index 062f6c7a8c..807479c5da 100644
--- a/dhp-applications/dhp-collector-worker/src/main/java/eu/dnetlib/collector/worker/utils/CollectorPluginErrorLogList.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/worker/utils/CollectorPluginErrorLogList.java
@@ -1,4 +1,4 @@
-package eu.dnetlib.collector.worker.utils;
+package eu.dnetlib.dhp.collection.worker.utils;
import java.util.LinkedList;
diff --git a/dhp-applications/dhp-collector-worker/src/main/java/eu/dnetlib/collector/worker/utils/CollectorPluginFactory.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/worker/utils/CollectorPluginFactory.java
similarity index 66%
rename from dhp-applications/dhp-collector-worker/src/main/java/eu/dnetlib/collector/worker/utils/CollectorPluginFactory.java
rename to dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/worker/utils/CollectorPluginFactory.java
index 4c55617781..cc2eaaddc8 100644
--- a/dhp-applications/dhp-collector-worker/src/main/java/eu/dnetlib/collector/worker/utils/CollectorPluginFactory.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/worker/utils/CollectorPluginFactory.java
@@ -1,8 +1,10 @@
-package eu.dnetlib.collector.worker.utils;
+package eu.dnetlib.dhp.collection.worker.utils;
-import eu.dnetlib.collector.worker.DnetCollectorException;
-import eu.dnetlib.collector.worker.plugins.CollectorPlugin;
-import eu.dnetlib.collector.worker.plugins.oai.OaiCollectorPlugin;
+import eu.dnetlib.dhp.collection.plugin.CollectorPlugin;
+import eu.dnetlib.dhp.collection.plugin.oai.OaiCollectorPlugin;
+import eu.dnetlib.dhp.collection.worker.DnetCollectorException;
+
+;
public class CollectorPluginFactory {
diff --git a/dhp-applications/dhp-collector-worker/src/main/java/eu/dnetlib/collector/worker/utils/HttpConnector.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/worker/utils/HttpConnector.java
similarity index 96%
rename from dhp-applications/dhp-collector-worker/src/main/java/eu/dnetlib/collector/worker/utils/HttpConnector.java
rename to dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/worker/utils/HttpConnector.java
index 73a416e18e..24e9f1ac15 100644
--- a/dhp-applications/dhp-collector-worker/src/main/java/eu/dnetlib/collector/worker/utils/HttpConnector.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/worker/utils/HttpConnector.java
@@ -1,29 +1,22 @@
-package eu.dnetlib.collector.worker.utils;
-
-import java.io.IOException;
-import java.io.InputStream;
-import java.net.CookieHandler;
-import java.net.CookieManager;
-import java.net.CookiePolicy;
-import java.net.HttpURLConnection;
-import java.net.URL;
-import java.security.GeneralSecurityException;
-import java.security.cert.X509Certificate;
-import java.util.List;
-import java.util.Map;
-
-import javax.net.ssl.HttpsURLConnection;
-import javax.net.ssl.SSLContext;
-import javax.net.ssl.TrustManager;
-import javax.net.ssl.X509TrustManager;
+package eu.dnetlib.dhp.collection.worker.utils;
+import eu.dnetlib.dhp.collection.worker.DnetCollectorException;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang.math.NumberUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
-
-import eu.dnetlib.collector.worker.DnetCollectorException;
+import javax.net.ssl.HttpsURLConnection;
+import javax.net.ssl.SSLContext;
+import javax.net.ssl.TrustManager;
+import javax.net.ssl.X509TrustManager;
+import java.io.IOException;
+import java.io.InputStream;
+import java.net.*;
+import java.security.GeneralSecurityException;
+import java.security.cert.X509Certificate;
+import java.util.List;
+import java.util.Map;
public class HttpConnector {
diff --git a/dhp-applications/dhp-collector-worker/src/main/java/eu/dnetlib/collector/worker/utils/XmlCleaner.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/worker/utils/XmlCleaner.java
similarity index 99%
rename from dhp-applications/dhp-collector-worker/src/main/java/eu/dnetlib/collector/worker/utils/XmlCleaner.java
rename to dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/worker/utils/XmlCleaner.java
index 7d1121a6d1..8c8ee629f3 100644
--- a/dhp-applications/dhp-collector-worker/src/main/java/eu/dnetlib/collector/worker/utils/XmlCleaner.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/worker/utils/XmlCleaner.java
@@ -1,4 +1,4 @@
-package eu.dnetlib.collector.worker.utils;
+package eu.dnetlib.dhp.collection.worker.utils;
import java.util.HashMap;
import java.util.HashSet;
diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/TransformFunction.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/TransformFunction.java
index c186058a3c..5cd78491b8 100644
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/TransformFunction.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/TransformFunction.java
@@ -2,6 +2,7 @@ package eu.dnetlib.dhp.transformation;
import eu.dnetlib.dhp.model.mdstore.MetadataRecord;
import eu.dnetlib.dhp.transformation.functions.Cleaner;
+import eu.dnetlib.dhp.transformation.vocabulary.Vocabulary;
import net.sf.saxon.s9api.*;
import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.util.LongAccumulator;
@@ -9,6 +10,7 @@ import org.apache.spark.util.LongAccumulator;
import javax.xml.transform.stream.StreamSource;
import java.io.ByteArrayInputStream;
import java.io.StringWriter;
+import java.util.Map;
public class TransformFunction implements MapFunction {
@@ -16,29 +18,30 @@ public class TransformFunction implements MapFunction vocabularies) throws Exception {
this.totalItems= totalItems;
this.errorItems = errorItems;
this.transformedItems = transformedItems;
- this.trasformationRule = trasformationRule;
+ this.transformationRule = transformationRule;
this.dateOfTransformation = dateOfTransformation;
+ cleanFunction = new Cleaner(vocabularies);
}
@Override
public MetadataRecord call(MetadataRecord value) {
totalItems.add(1);
try {
- final Cleaner cleanFunction = new Cleaner();
Processor processor = new Processor(false);
processor.registerExtensionFunction(cleanFunction);
final XsltCompiler comp = processor.newXsltCompiler();
- XsltExecutable xslt = comp.compile(new StreamSource(new ByteArrayInputStream(trasformationRule.getBytes())));
+ XsltExecutable xslt = comp.compile(new StreamSource(new ByteArrayInputStream(transformationRule.getBytes())));
XdmNode source = processor.newDocumentBuilder().build(new StreamSource(new ByteArrayInputStream(value.getBody().getBytes())));
XsltTransformer trans = xslt.load();
trans.setInitialContextNode(source);
@@ -50,7 +53,7 @@ public class TransformFunction implements MapFunction encoder = Encoders.bean(MetadataRecord.class);
final Dataset mdstoreInput = spark.read().format("parquet").load(inputPath).as(encoder);
final LongAccumulator totalItems = spark.sparkContext().longAccumulator("TotalItems");
final LongAccumulator errorItems = spark.sparkContext().longAccumulator("errorItems");
final LongAccumulator transformedItems = spark.sparkContext().longAccumulator("transformedItems");
- final TransformFunction transformFunction = new TransformFunction(totalItems, errorItems, transformedItems, trasformationRule, dateOfCollection) ;
+ final Map vocabularies = new HashMap<>();
+ vocabularies.put("dnet:languages", VocabularyHelper.getVocabularyFromAPI("dnet:languages"));
+ final TransformFunction transformFunction = new TransformFunction(totalItems, errorItems, transformedItems, trasformationRule, dateOfCollection, vocabularies) ;
mdstoreInput.map(transformFunction, encoder).write().format("parquet").save(outputPath);
if (rabbitHost != null) {
System.out.println("SEND FINAL REPORT");
diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/functions/Cleaner.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/functions/Cleaner.java
index a0d4be94cf..47f33a342a 100644
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/functions/Cleaner.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/functions/Cleaner.java
@@ -1,8 +1,22 @@
package eu.dnetlib.dhp.transformation.functions;
+import eu.dnetlib.dhp.transformation.vocabulary.Term;
+import eu.dnetlib.dhp.transformation.vocabulary.Vocabulary;
import net.sf.saxon.s9api.*;
+import scala.Serializable;
-public class Cleaner implements ExtensionFunction {
+import java.util.Map;
+import java.util.Optional;
+
+public class Cleaner implements ExtensionFunction, Serializable {
+
+
+ private final Map vocabularies;
+
+
+ public Cleaner(Map vocabularies) {
+ this.vocabularies = vocabularies;
+ }
@Override
public QName getName() {
@@ -11,20 +25,25 @@ public class Cleaner implements ExtensionFunction {
@Override
public SequenceType getResultType() {
- return SequenceType.makeSequenceType(ItemType.STRING, OccurrenceIndicator.ONE);
+ return SequenceType.makeSequenceType(ItemType.STRING, OccurrenceIndicator.ONE_OR_MORE);
}
@Override
public SequenceType[] getArgumentTypes() {
return new SequenceType[]
{
+ SequenceType.makeSequenceType(ItemType.STRING, OccurrenceIndicator.ONE),
SequenceType.makeSequenceType(ItemType.STRING, OccurrenceIndicator.ONE)
+
};
}
@Override
public XdmValue call(XdmValue[] xdmValues) throws SaxonApiException {
final String currentValue = xdmValues[0].itemAt(0).getStringValue();
- return new XdmAtomicValue("cleaned"+currentValue);
+ final String vocabularyName =xdmValues[1].itemAt(0).getStringValue();
+ Optional cleanedValue = vocabularies.get(vocabularyName).getTerms().stream().filter(it -> it.getNativeName().equalsIgnoreCase(currentValue)).findAny();
+
+ return new XdmAtomicValue(cleanedValue.isPresent()?cleanedValue.get().getCode():currentValue);
}
}
diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/vocabulary/Term.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/vocabulary/Term.java
new file mode 100644
index 0000000000..f93c2a1207
--- /dev/null
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/vocabulary/Term.java
@@ -0,0 +1,53 @@
+package eu.dnetlib.dhp.transformation.vocabulary;
+
+import java.io.Serializable;
+
+public class Term implements Serializable {
+
+ private String englishName;
+ private String nativeName;
+ private String encoding;
+ private String code;
+ private String synonyms;
+
+
+ public String getEnglishName() {
+ return englishName;
+ }
+
+ public void setEnglishName(String englishName) {
+ this.englishName = englishName;
+ }
+
+ public String getNativeName() {
+ return nativeName;
+ }
+
+ public void setNativeName(String nativeName) {
+ this.nativeName = nativeName;
+ }
+
+ public String getEncoding() {
+ return encoding;
+ }
+
+ public void setEncoding(String encoding) {
+ this.encoding = encoding;
+ }
+
+ public String getCode() {
+ return code;
+ }
+
+ public void setCode(String code) {
+ this.code = code;
+ }
+
+ public String getSynonyms() {
+ return synonyms;
+ }
+
+ public void setSynonyms(String synonyms) {
+ this.synonyms = synonyms;
+ }
+}
diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/vocabulary/Vocabulary.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/vocabulary/Vocabulary.java
new file mode 100644
index 0000000000..58e9cb95c8
--- /dev/null
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/vocabulary/Vocabulary.java
@@ -0,0 +1,57 @@
+package eu.dnetlib.dhp.transformation.vocabulary;
+
+import java.io.Serializable;
+import java.util.List;
+import java.util.Map;
+
+public class Vocabulary implements Serializable {
+
+ private String id;
+ private String name;
+ private String description;
+ private String code;
+ private List terms;
+
+ public String getId() {
+ return id;
+ }
+
+ public void setId(String id) {
+ this.id = id;
+ }
+
+ public String getName() {
+ return name;
+ }
+
+ public void setName(String name) {
+ this.name = name;
+ }
+
+ public String getDescription() {
+ return description;
+ }
+
+ public void setDescription(String description) {
+ this.description = description;
+ }
+
+ public String getCode() {
+ return code;
+ }
+
+ public void setCode(String code) {
+ this.code = code;
+ }
+
+ public List getTerms() {
+ return terms;
+ }
+
+ public void setTerms(List terms) {
+ this.terms = terms;
+ }
+
+
+
+}
diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/vocabulary/VocabularyHelper.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/vocabulary/VocabularyHelper.java
new file mode 100644
index 0000000000..b6ecf795c8
--- /dev/null
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/vocabulary/VocabularyHelper.java
@@ -0,0 +1,23 @@
+package eu.dnetlib.dhp.transformation.vocabulary;
+
+import com.fasterxml.jackson.databind.ObjectMapper;
+import org.apache.commons.io.IOUtils;
+
+import java.io.Serializable;
+import java.net.URL;
+import java.nio.charset.Charset;
+
+public class VocabularyHelper implements Serializable {
+
+ private final static String OPENAIRE_URL ="http://api.openaire.eu/vocabularies/%s.json";
+
+ public static Vocabulary getVocabularyFromAPI(final String vocabularyName) throws Exception {
+ final URL url = new URL(String.format(OPENAIRE_URL, vocabularyName));
+
+ final String response = IOUtils.toString(url, Charset.defaultCharset());
+ final ObjectMapper jsonMapper = new ObjectMapper();
+ final Vocabulary vocabulary = jsonMapper.readValue(response, Vocabulary.class);
+ return vocabulary;
+ }
+
+}
diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/collection/collection_input_parameters.json b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/collection/collection_input_parameters.json
index ed8d04315b..4b4925f276 100644
--- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/collection/collection_input_parameters.json
+++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/collection/collection_input_parameters.json
@@ -1,4 +1,5 @@
[
+ {"paramName":"mt", "paramLongName":"master", "paramDescription": "should be local or yarn", "paramRequired": true},
{"paramName":"e", "paramLongName":"encoding", "paramDescription": "the encoding of the input record should be JSON or XML", "paramRequired": true},
{"paramName":"d", "paramLongName":"dateOfCollection", "paramDescription": "the date when the record has been stored", "paramRequired": true},
{"paramName":"p", "paramLongName":"provenance", "paramDescription": "the infos about the provenance of the collected records", "paramRequired": true},
@@ -10,5 +11,6 @@
{"paramName":"rh", "paramLongName":"rabbitHost", "paramDescription": "the host of the RabbitMq server", "paramRequired": true},
{"paramName":"ro", "paramLongName":"rabbitOngoingQueue", "paramDescription": "the name of the ongoing queue", "paramRequired": true},
{"paramName":"rr", "paramLongName":"rabbitReportQueue", "paramDescription": "the name of the report queue", "paramRequired": true},
- {"paramName":"w", "paramLongName":"workflowId", "paramDescription": "the identifier of the dnet Workflow", "paramRequired": true}
+ {"paramName":"w", "paramLongName":"workflowId", "paramDescription": "the identifier of the dnet Workflow", "paramRequired": true},
+ {"paramName":"t", "paramLongName":"isTest", "paramDescription": "the name of the report queue", "paramRequired": false}
]
\ No newline at end of file
diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/collection/oozie_app/workflow.xml b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/collection/oozie_app/workflow.xml
index 1602519e06..3e7f684012 100644
--- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/collection/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/collection/oozie_app/workflow.xml
@@ -55,22 +55,21 @@
-
+
${jobTracker}
${nameNode}
- lib/dhp-collector-worker-1.0.0.jar
- -p${sequenceFilePath}
- -a${apiDescription}
- -n${nameNode}
- -rh${rmq_host}
- -ru${rmq_user}
- -rp${rmq_pwd}
- -rr${rmq_report}
- -ro${rmq_ongoing}
- -usandro.labruzzo
- -w${workflowId}
-
-
+ eu.dnetlib.dhp.collection.worker.DnetCollectorWorker
+ -p${sequenceFilePath}
+ -a${apiDescription}
+ -n${nameNode}
+ -rh${rmq_host}
+ -ru${rmq_user}
+ -rp${rmq_pwd}
+ -rr${rmq_report}
+ -ro${rmq_ongoing}
+ -usandro.labruzzo
+ -w${workflowId}
+
diff --git a/dhp-applications/dhp-collector-worker/src/main/resources/eu/dnetlib/collector/worker/collector_parameter.json b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/collector/worker/collector_parameter.json
similarity index 100%
rename from dhp-applications/dhp-collector-worker/src/main/resources/eu/dnetlib/collector/worker/collector_parameter.json
rename to dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/collector/worker/collector_parameter.json
diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/CollectionJobTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/CollectionJobTest.java
index 8b49b9df70..c6e50343bd 100644
--- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/CollectionJobTest.java
+++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/CollectionJobTest.java
@@ -3,21 +3,48 @@ package eu.dnetlib.dhp.collection;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.model.mdstore.MetadataRecord;
import eu.dnetlib.dhp.model.mdstore.Provenance;
+import org.apache.commons.io.FileUtils;
import org.apache.commons.io.IOUtils;
-import org.junit.Assert;
-import org.junit.Ignore;
-import org.junit.Test;
+import org.junit.*;
import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Path;
public class CollectionJobTest {
+ private Path testDir;
+ @Before
+ public void setup() throws IOException {
+ testDir = Files.createTempDirectory("dhp-collection");
+
+
+ }
+
+
+ @After
+ public void teadDown() throws IOException {
+ FileUtils.deleteDirectory(testDir.toFile());
+ }
@Test
- @Ignore
- public void test () throws Exception {
+ public void tesCollection () throws Exception {
Provenance provenance = new Provenance("pippo", "puppa", "ns_prefix");
- GenerateNativeStoreSparkJob.main(new String[] {"-e", "XML","-d", ""+System.currentTimeMillis(),"-p", new ObjectMapper().writeValueAsString(provenance), "-x","./*[local-name()='record']/*[local-name()='header']/*[local-name()='identifier']","-i","/home/sandro/Downloads/oai_1","-o","/home/sandro/Downloads/mdstore_result"});
+ GenerateNativeStoreSparkJob.main(new String[] {
+ "-mt", "local",
+ "-w", "wid",
+ "-e", "XML",
+ "-d", ""+System.currentTimeMillis(),
+ "-p", new ObjectMapper().writeValueAsString(provenance),
+ "-x", "./*[local-name()='record']/*[local-name()='header']/*[local-name()='identifier']",
+ "-i", this.getClass().getResource("/eu/dnetlib/dhp/collection/native.seq").toString(),
+ "-o", testDir.toString()+"/store",
+ "-t", "true",
+ "-ru", "",
+ "-rp", "",
+ "-rh", "",
+ "-ro", "",
+ "-rr", ""});
System.out.println(new ObjectMapper().writeValueAsString(provenance));
}
diff --git a/dhp-applications/dhp-collector-worker/src/test/java/eu/dnetlib/collector/worker/DnetCollectorWorkerApplicationTests.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collector/worker/DnetCollectorWorkerApplicationTests.java
similarity index 93%
rename from dhp-applications/dhp-collector-worker/src/test/java/eu/dnetlib/collector/worker/DnetCollectorWorkerApplicationTests.java
rename to dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collector/worker/DnetCollectorWorkerApplicationTests.java
index 370c5166d4..6a9417097f 100644
--- a/dhp-applications/dhp-collector-worker/src/test/java/eu/dnetlib/collector/worker/DnetCollectorWorkerApplicationTests.java
+++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collector/worker/DnetCollectorWorkerApplicationTests.java
@@ -1,9 +1,10 @@
-package eu.dnetlib.collector.worker;
+package eu.dnetlib.dhp.collector.worker;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.collector.worker.model.ApiDescriptor;
-import eu.dnetlib.collector.worker.utils.CollectorPluginFactory;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
+import eu.dnetlib.dhp.collection.worker.DnetCollectorWorker;
+import eu.dnetlib.dhp.collection.worker.utils.CollectorPluginFactory;
import eu.dnetlib.message.Message;
import eu.dnetlib.message.MessageManager;
import org.junit.After;
@@ -48,7 +49,7 @@ public class DnetCollectorWorkerApplicationTests {
@After
public void dropDown(){
- File f = new File("/tmp/test.seq");
+ File f = new File("/tmp/file.seq");
f.delete();
}
diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/transformation/TransformationJobTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/transformation/TransformationJobTest.java
index 0dad1743bd..01f71a19f9 100644
--- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/transformation/TransformationJobTest.java
+++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/transformation/TransformationJobTest.java
@@ -2,26 +2,30 @@ package eu.dnetlib.dhp.transformation;
import eu.dnetlib.dhp.model.mdstore.MetadataRecord;
import eu.dnetlib.dhp.transformation.functions.Cleaner;
+import eu.dnetlib.dhp.transformation.vocabulary.Vocabulary;
+import eu.dnetlib.dhp.transformation.vocabulary.VocabularyHelper;
import eu.dnetlib.dhp.utils.DHPUtils;
import net.sf.saxon.s9api.*;
+import org.apache.commons.io.FileUtils;
import org.apache.commons.io.IOUtils;
import org.apache.spark.util.LongAccumulator;
import org.dom4j.Document;
import org.dom4j.Node;
import org.dom4j.io.SAXReader;
-import org.junit.Assert;
-import org.junit.Rule;
-import org.junit.Test;
+import org.junit.*;
import org.mockito.Mock;
import org.mockito.junit.MockitoJUnit;
import org.mockito.junit.MockitoRule;
import javax.xml.transform.stream.StreamSource;
import java.io.File;
+import java.io.IOException;
import java.io.StringWriter;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.Comparator;
+import java.util.HashMap;
+import java.util.Map;
public class TransformationJobTest {
@@ -31,9 +35,26 @@ public class TransformationJobTest {
@Rule
public MockitoRule mockitoRule = MockitoJUnit.rule();
+ private Path testDir;
+
+ @Before
+ public void setup() throws IOException {
+ testDir = Files.createTempDirectory("dhp-collection");
+ }
+
+
+ @After
+ public void teadDown() throws IOException {
+ FileUtils.deleteDirectory(testDir.toFile());
+ }
+
+
@Test
public void testTransformSaxonHE() throws Exception {
- Cleaner cleanFunction = new Cleaner();
+
+ Map vocabularies = new HashMap<>();
+ vocabularies.put("dnet:languages", VocabularyHelper.getVocabularyFromAPI("dnet:languages"));
+ Cleaner cleanFunction = new Cleaner(vocabularies);
Processor proc = new Processor(false);
proc.registerExtensionFunction(cleanFunction);
final XsltCompiler comp = proc.newXsltCompiler();
@@ -53,26 +74,30 @@ public class TransformationJobTest {
@Test
public void transformTest() throws Exception {
- final String mdstore_input = this.getClass().getResource("/eu/dnetlib/dhp/transform/mdstore").getFile();
- Path tempDirWithPrefix = Files.createTempDirectory("mdstore_output");
-
- final String mdstore_output = tempDirWithPrefix.toFile().getAbsolutePath()+"/version";
-
+ final String mdstore_input = this.getClass().getResource("/eu/dnetlib/dhp/transform/mdstorenative").getFile();
+ final String mdstore_output = testDir.toString()+"/version";
final String xslt = DHPUtils.compressString(IOUtils.toString(this.getClass().getResourceAsStream("/eu/dnetlib/dhp/transform/tr.xml")));
+ TransformSparkJobNode.main(new String[]{
+ "-mt", "local",
+ "-i", mdstore_input,
+ "-o", mdstore_output,
+ "-d", "1",
+ "-w", "1",
+ "-tr", xslt,
+ "-t", "true",
+ "-ru", "",
+ "-rp", "",
+ "-rh", "",
+ "-ro", "",
+ "-rr", ""});
- System.out.println(xslt);
- TransformSparkJobNode.main(new String[]{"-mt","local", "-i", mdstore_input, "-o", mdstore_output,"-d","1", "-w","1","-tr", xslt, "-t", "true", "-ru","", "-rp","", "-rh","", "-ro","", "-rr",""});
- Files.walk(tempDirWithPrefix)
- .sorted(Comparator.reverseOrder())
- .map(Path::toFile)
- .forEach(File::delete);
}
@Test
public void tryLoadFolderOnCP() throws Exception {
- final String path = this.getClass().getResource("/eu/dnetlib/dhp/transform/mdstore").getFile();
+ final String path = this.getClass().getResource("/eu/dnetlib/dhp/transform/mdstorenative").getFile();
System.out.println("path = " + path);
Path tempDirWithPrefix = Files.createTempDirectory("mdsotre_output");
@@ -90,8 +115,10 @@ public class TransformationJobTest {
Document document = reader.read(this.getClass().getResourceAsStream("/eu/dnetlib/dhp/transform/tr.xml"));
Node node = document.selectSingleNode("//CODE/*[local-name()='stylesheet']");
final String xslt = node.asXML();
+ Map vocabularies = new HashMap<>();
+ vocabularies.put("dnet:languages", VocabularyHelper.getVocabularyFromAPI("dnet:languages"));
- TransformFunction tf = new TransformFunction(accumulator, accumulator, accumulator, xslt, 1);
+ TransformFunction tf = new TransformFunction(accumulator, accumulator, accumulator, xslt, 1, vocabularies);
MetadataRecord record = new MetadataRecord();
record.setBody(IOUtils.toString(this.getClass().getResourceAsStream("/eu/dnetlib/dhp/transform/input.xml")));
diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/transformation/vocabulary/VocabularyTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/transformation/vocabulary/VocabularyTest.java
new file mode 100644
index 0000000000..d96a7ac4c8
--- /dev/null
+++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/transformation/vocabulary/VocabularyTest.java
@@ -0,0 +1,17 @@
+package eu.dnetlib.dhp.transformation.vocabulary;
+
+import org.junit.Test;
+import static org.junit.Assert.*;
+public class VocabularyTest {
+
+
+
+ @Test
+ public void testLoadVocabulary() throws Exception {
+
+ final Vocabulary vocabulary = VocabularyHelper.getVocabularyFromAPI("dnet:languages");
+ assertEquals("dnet:languages",vocabulary.getName());
+
+
+ }
+}
diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/collection/native.seq b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/collection/native.seq
new file mode 100644
index 0000000000..e102e96f72
Binary files /dev/null and b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/collection/native.seq differ
diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/transform/ext_simple.xsl b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/transform/ext_simple.xsl
index 90818e5264..cef50aa952 100644
--- a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/transform/ext_simple.xsl
+++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/transform/ext_simple.xsl
@@ -9,7 +9,7 @@
-
+
diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/transform/input.xml b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/transform/input.xml
index 8a22979476..8760d3117d 100644
--- a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/transform/input.xml
+++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/transform/input.xml
@@ -20,8 +20,8 @@
Lombardi, Floriana
Tafuri, F.
Tagliacozzo, A.
- Materials Chemistry
- Geochemistry
+ Acoli
+ Abkhazian
Condensed Matter Physics
Superconducting hybrid junctions are revealing a variety of effects. Some of them are due to the special layout of these devices, which often use a coplanar configuration with relatively large barrier channels and the possibility of hosting Pearl vortices. A Josephson junction with a quasi-ideal two-dimensional barrier has been realized by growing graphene on SiC with Al electrodes. Chemical vapor deposition offers centimeter size monolayer areas where it is possible to realize a comparative analysis of different devices with nominally the same barrier. In samples with a graphene gap below 400 nm, we have found evidence of Josephson coherence in the presence of an incipient Berezinskii-Kosterlitz-Thouless transition. When the magnetic field is cycled, a remarkable hysteretic collapse and revival of the Josephson supercurrent occurs. Similar hysteresis are found in granular systems and are usually justified within the Bean critical state model (CSM). We show that the CSM, with appropriate account for the low-dimensional geometry, can partly explain the odd features measured in these junctions.
info:eu-repo/grantAgreement/EC/FP7/604391//Graphene-Based Revolutions in ICT And Beyond (Graphene Flagship)/
diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/transform/mdstore/part-00000-ccf4ed18-46a4-45d2-ab85-2878a9521dde-c000.snappy.parquet b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/transform/mdstore/part-00000-ccf4ed18-46a4-45d2-ab85-2878a9521dde-c000.snappy.parquet
deleted file mode 100644
index ef3537dcdb..0000000000
Binary files a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/transform/mdstore/part-00000-ccf4ed18-46a4-45d2-ab85-2878a9521dde-c000.snappy.parquet and /dev/null differ
diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/transform/mdstore/part-00001-ccf4ed18-46a4-45d2-ab85-2878a9521dde-c000.snappy.parquet b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/transform/mdstore/part-00001-ccf4ed18-46a4-45d2-ab85-2878a9521dde-c000.snappy.parquet
deleted file mode 100644
index 986ee6f67f..0000000000
Binary files a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/transform/mdstore/part-00001-ccf4ed18-46a4-45d2-ab85-2878a9521dde-c000.snappy.parquet and /dev/null differ
diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/transform/mdstore/_SUCCESS b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/transform/mdstorenative/_SUCCESS
similarity index 100%
rename from dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/transform/mdstore/_SUCCESS
rename to dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/transform/mdstorenative/_SUCCESS
diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/transform/mdstorenative/part-00000.snappy.parquet b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/transform/mdstorenative/part-00000.snappy.parquet
new file mode 100644
index 0000000000..5330104bec
Binary files /dev/null and b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/transform/mdstorenative/part-00000.snappy.parquet differ
diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/transform/tr.xml b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/transform/tr.xml
index 219b03ca34..a9eae85762 100644
--- a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/transform/tr.xml
+++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/transform/tr.xml
@@ -24,7 +24,7 @@
-
+
diff --git a/pom.xml b/pom.xml
index f426b333eb..a91114b2ec 100644
--- a/pom.xml
+++ b/pom.xml
@@ -242,6 +242,18 @@
${google.protobuf.version}
+
+ com.googlecode.protobuf-java-format
+ protobuf-java-format
+ 1.2
+
+
+
+ eu.dnetlib
+ dnet-openaire-data-protos
+ 3.9.4
+
+