diff --git a/README.md b/README.md
new file mode 100644
index 0000000000..1a13ebf052
--- /dev/null
+++ b/README.md
@@ -0,0 +1 @@
+# dnet-hadoop
diff --git a/dhp-build/dhp-build-assembly-resources/README.markdown b/dhp-build/dhp-build-assembly-resources/README.markdown
new file mode 100644
index 0000000000..efee5fa457
--- /dev/null
+++ b/dhp-build/dhp-build-assembly-resources/README.markdown
@@ -0,0 +1,7 @@
+Module utilized by `dhp-wf`.
+
+Contains all required resources by this parent module:
+
+* assembly XML definitions
+* build shell scripts
+* oozie package commands for uploading, running and monitoring oozie workflows
diff --git a/dhp-build/dhp-build-assembly-resources/pom.xml b/dhp-build/dhp-build-assembly-resources/pom.xml
new file mode 100644
index 0000000000..2d2543505f
--- /dev/null
+++ b/dhp-build/dhp-build-assembly-resources/pom.xml
@@ -0,0 +1,24 @@
+
+
+
+ 4.0.0
+
+
+ eu.dnetlib.dhp
+ dhp-build
+ 1.0.0-SNAPSHOT
+
+
+ dhp-build-assembly-resources
+ jar
+
+
+
+
+ org.apache.maven.plugins
+ maven-compiler-plugin
+
+
+
+
+
diff --git a/dhp-build/dhp-build-assembly-resources/src/main/resources/assemblies/oozie-installer.xml b/dhp-build/dhp-build-assembly-resources/src/main/resources/assemblies/oozie-installer.xml
new file mode 100644
index 0000000000..1419c5b1c3
--- /dev/null
+++ b/dhp-build/dhp-build-assembly-resources/src/main/resources/assemblies/oozie-installer.xml
@@ -0,0 +1,32 @@
+
+
+ oozie-installer
+
+ dir
+
+
+
+
+ true
+ ${project.build.directory}/assembly-resources/commands
+
+ /
+
+ **/*
+
+ 0755
+ unix
+
+
+ /
+
diff --git a/dhp-build/dhp-build-assembly-resources/src/main/resources/assemblies/tests.xml b/dhp-build/dhp-build-assembly-resources/src/main/resources/assemblies/tests.xml
new file mode 100644
index 0000000000..bf679e6529
--- /dev/null
+++ b/dhp-build/dhp-build-assembly-resources/src/main/resources/assemblies/tests.xml
@@ -0,0 +1,24 @@
+
+
+ tests
+
+ jar
+
+ false
+
+
+ ${project.build.testOutputDirectory}
+
+
+
+
+
+
\ No newline at end of file
diff --git a/dhp-build/dhp-build-assembly-resources/src/main/resources/commands/get_working_dir.sh b/dhp-build/dhp-build-assembly-resources/src/main/resources/commands/get_working_dir.sh
new file mode 100644
index 0000000000..e9d55f0d7e
--- /dev/null
+++ b/dhp-build/dhp-build-assembly-resources/src/main/resources/commands/get_working_dir.sh
@@ -0,0 +1,3 @@
+#!/bin/bash
+hadoop fs -get ${workingDir}
+
diff --git a/dhp-build/dhp-build-assembly-resources/src/main/resources/commands/print_working_dir.sh b/dhp-build/dhp-build-assembly-resources/src/main/resources/commands/print_working_dir.sh
new file mode 100644
index 0000000000..c79839ea49
--- /dev/null
+++ b/dhp-build/dhp-build-assembly-resources/src/main/resources/commands/print_working_dir.sh
@@ -0,0 +1,5 @@
+#!/bin/bash
+echo ""
+echo "---->Contents of the working directory"
+hadoop fs -ls ${workingDir}
+
diff --git a/dhp-build/dhp-build-assembly-resources/src/main/resources/commands/readme.markdown b/dhp-build/dhp-build-assembly-resources/src/main/resources/commands/readme.markdown
new file mode 100644
index 0000000000..3e049c18b7
--- /dev/null
+++ b/dhp-build/dhp-build-assembly-resources/src/main/resources/commands/readme.markdown
@@ -0,0 +1,5 @@
+Execute the scripts in the following order:
+
+1. `upload_workflow.sh`
+2. `run_workflow.sh`
+3. `print_working_dir.sh` or `get_working_dir.sh`
diff --git a/dhp-build/dhp-build-assembly-resources/src/main/resources/commands/run_workflow.sh b/dhp-build/dhp-build-assembly-resources/src/main/resources/commands/run_workflow.sh
new file mode 100644
index 0000000000..fee3d77370
--- /dev/null
+++ b/dhp-build/dhp-build-assembly-resources/src/main/resources/commands/run_workflow.sh
@@ -0,0 +1,10 @@
+#!/bin/bash
+
+if [ $# = 0 ] ; then
+ oozie job -oozie ${oozieServiceLoc} -config job.properties -run
+else
+ oozie job -oozie ${oozieServiceLoc} -config $1/job.properties -run
+fi
+
+
+
diff --git a/dhp-build/dhp-build-assembly-resources/src/main/resources/commands/upload_workflow.sh b/dhp-build/dhp-build-assembly-resources/src/main/resources/commands/upload_workflow.sh
new file mode 100644
index 0000000000..c5d299c2f0
--- /dev/null
+++ b/dhp-build/dhp-build-assembly-resources/src/main/resources/commands/upload_workflow.sh
@@ -0,0 +1,34 @@
+#!/bin/bash
+exec 3>&1
+BASH_XTRACEFD=3
+set -x ## print every executed command
+
+
+if [ $# = 0 ] ; then
+ target_dir_root=`pwd`'/${oozieAppDir}'
+else
+ target_dir_root=`readlink -f $1`'/${oozieAppDir}'
+fi
+
+# initial phase, creating symbolic links to jars in all subworkflows
+# currently disabled
+#libDir=$target_dir_root'/lib'
+#dirs=`find $target_dir_root/* -maxdepth 10 -type d`
+#for dir in $dirs
+#do
+# if [ -f $dir/workflow.xml ]
+# then
+# echo "creating symbolic links to jars in directory: $dir/lib"
+# if [ ! -d "$dir/lib" ]; then
+# mkdir $dir/lib
+# fi
+# find $libDir -type f -exec ln -s \{\} $dir/lib \;
+# fi
+#done
+
+
+#uploading
+hadoop fs -rm -r ${sandboxDir}
+hadoop fs -mkdir -p ${sandboxDir}
+hadoop fs -mkdir -p ${workingDir}
+hadoop fs -put $target_dir_root ${sandboxDir}
diff --git a/dhp-build/dhp-build-assembly-resources/src/main/resources/project-default.properties b/dhp-build/dhp-build-assembly-resources/src/main/resources/project-default.properties
new file mode 100644
index 0000000000..021ecf55be
--- /dev/null
+++ b/dhp-build/dhp-build-assembly-resources/src/main/resources/project-default.properties
@@ -0,0 +1,7 @@
+#sandboxName when not provided explicitly will be generated
+sandboxName=${sandboxName}
+sandboxDir=/user/${iis.hadoop.frontend.user.name}/${sandboxName}
+workingDir=${sandboxDir}/working_dir
+oozie.wf.application.path = ${nameNode}${sandboxDir}/${oozieAppDir}
+oozieTopWfApplicationPath = ${oozie.wf.application.path}
+
diff --git a/dhp-build/dhp-build-assembly-resources/target/classes/assemblies/oozie-installer.xml b/dhp-build/dhp-build-assembly-resources/target/classes/assemblies/oozie-installer.xml
new file mode 100644
index 0000000000..1419c5b1c3
--- /dev/null
+++ b/dhp-build/dhp-build-assembly-resources/target/classes/assemblies/oozie-installer.xml
@@ -0,0 +1,32 @@
+
+
+ oozie-installer
+
+ dir
+
+
+
+
+ true
+ ${project.build.directory}/assembly-resources/commands
+
+ /
+
+ **/*
+
+ 0755
+ unix
+
+
+ /
+
diff --git a/dhp-build/dhp-build-assembly-resources/target/classes/assemblies/tests.xml b/dhp-build/dhp-build-assembly-resources/target/classes/assemblies/tests.xml
new file mode 100644
index 0000000000..bf679e6529
--- /dev/null
+++ b/dhp-build/dhp-build-assembly-resources/target/classes/assemblies/tests.xml
@@ -0,0 +1,24 @@
+
+
+ tests
+
+ jar
+
+ false
+
+
+ ${project.build.testOutputDirectory}
+
+
+
+
+
+
\ No newline at end of file
diff --git a/dhp-build/dhp-build-assembly-resources/target/classes/commands/get_working_dir.sh b/dhp-build/dhp-build-assembly-resources/target/classes/commands/get_working_dir.sh
new file mode 100644
index 0000000000..e9d55f0d7e
--- /dev/null
+++ b/dhp-build/dhp-build-assembly-resources/target/classes/commands/get_working_dir.sh
@@ -0,0 +1,3 @@
+#!/bin/bash
+hadoop fs -get ${workingDir}
+
diff --git a/dhp-build/dhp-build-assembly-resources/target/classes/commands/print_working_dir.sh b/dhp-build/dhp-build-assembly-resources/target/classes/commands/print_working_dir.sh
new file mode 100644
index 0000000000..c79839ea49
--- /dev/null
+++ b/dhp-build/dhp-build-assembly-resources/target/classes/commands/print_working_dir.sh
@@ -0,0 +1,5 @@
+#!/bin/bash
+echo ""
+echo "---->Contents of the working directory"
+hadoop fs -ls ${workingDir}
+
diff --git a/dhp-build/dhp-build-assembly-resources/target/classes/commands/readme.markdown b/dhp-build/dhp-build-assembly-resources/target/classes/commands/readme.markdown
new file mode 100644
index 0000000000..3e049c18b7
--- /dev/null
+++ b/dhp-build/dhp-build-assembly-resources/target/classes/commands/readme.markdown
@@ -0,0 +1,5 @@
+Execute the scripts in the following order:
+
+1. `upload_workflow.sh`
+2. `run_workflow.sh`
+3. `print_working_dir.sh` or `get_working_dir.sh`
diff --git a/dhp-build/dhp-build-assembly-resources/target/classes/commands/run_workflow.sh b/dhp-build/dhp-build-assembly-resources/target/classes/commands/run_workflow.sh
new file mode 100644
index 0000000000..fee3d77370
--- /dev/null
+++ b/dhp-build/dhp-build-assembly-resources/target/classes/commands/run_workflow.sh
@@ -0,0 +1,10 @@
+#!/bin/bash
+
+if [ $# = 0 ] ; then
+ oozie job -oozie ${oozieServiceLoc} -config job.properties -run
+else
+ oozie job -oozie ${oozieServiceLoc} -config $1/job.properties -run
+fi
+
+
+
diff --git a/dhp-build/dhp-build-assembly-resources/target/classes/commands/upload_workflow.sh b/dhp-build/dhp-build-assembly-resources/target/classes/commands/upload_workflow.sh
new file mode 100644
index 0000000000..c5d299c2f0
--- /dev/null
+++ b/dhp-build/dhp-build-assembly-resources/target/classes/commands/upload_workflow.sh
@@ -0,0 +1,34 @@
+#!/bin/bash
+exec 3>&1
+BASH_XTRACEFD=3
+set -x ## print every executed command
+
+
+if [ $# = 0 ] ; then
+ target_dir_root=`pwd`'/${oozieAppDir}'
+else
+ target_dir_root=`readlink -f $1`'/${oozieAppDir}'
+fi
+
+# initial phase, creating symbolic links to jars in all subworkflows
+# currently disabled
+#libDir=$target_dir_root'/lib'
+#dirs=`find $target_dir_root/* -maxdepth 10 -type d`
+#for dir in $dirs
+#do
+# if [ -f $dir/workflow.xml ]
+# then
+# echo "creating symbolic links to jars in directory: $dir/lib"
+# if [ ! -d "$dir/lib" ]; then
+# mkdir $dir/lib
+# fi
+# find $libDir -type f -exec ln -s \{\} $dir/lib \;
+# fi
+#done
+
+
+#uploading
+hadoop fs -rm -r ${sandboxDir}
+hadoop fs -mkdir -p ${sandboxDir}
+hadoop fs -mkdir -p ${workingDir}
+hadoop fs -put $target_dir_root ${sandboxDir}
diff --git a/dhp-build/dhp-build-assembly-resources/target/classes/project-default.properties b/dhp-build/dhp-build-assembly-resources/target/classes/project-default.properties
new file mode 100644
index 0000000000..021ecf55be
--- /dev/null
+++ b/dhp-build/dhp-build-assembly-resources/target/classes/project-default.properties
@@ -0,0 +1,7 @@
+#sandboxName when not provided explicitly will be generated
+sandboxName=${sandboxName}
+sandboxDir=/user/${iis.hadoop.frontend.user.name}/${sandboxName}
+workingDir=${sandboxDir}/working_dir
+oozie.wf.application.path = ${nameNode}${sandboxDir}/${oozieAppDir}
+oozieTopWfApplicationPath = ${oozie.wf.application.path}
+
diff --git a/dhp-build/dhp-build-properties-maven-plugin/README.markdown b/dhp-build/dhp-build-properties-maven-plugin/README.markdown
new file mode 100644
index 0000000000..f99c7c1b03
--- /dev/null
+++ b/dhp-build/dhp-build-properties-maven-plugin/README.markdown
@@ -0,0 +1,6 @@
+Maven plugin module utilized by `dhp-wf` for proper `job.properties` file building.
+
+It is based on http://site.kuali.org/maven/plugins/properties-maven-plugin/1.3.2/write-project-properties-mojo.html and supplemented with:
+
+* handling includePropertyKeysFromFiles property allowing writing only properties listed in given property files
+As a final outcome only properties listed in `` element and listed as a keys in files from `` element will be written to output file.
diff --git a/dhp-build/dhp-build-properties-maven-plugin/pom.xml b/dhp-build/dhp-build-properties-maven-plugin/pom.xml
new file mode 100644
index 0000000000..38093f4d18
--- /dev/null
+++ b/dhp-build/dhp-build-properties-maven-plugin/pom.xml
@@ -0,0 +1,68 @@
+
+
+
+ 4.0.0
+
+
+ eu.dnetlib.dhp
+ dhp-build
+ 1.0.0-SNAPSHOT
+
+
+ dhp-build-properties-maven-plugin
+ maven-plugin
+
+
+
+
+ org.apache.maven
+ maven-plugin-api
+ 2.0
+
+
+ org.apache.maven
+ maven-project
+ 2.0
+
+
+ org.kuali.maven.plugins
+ properties-maven-plugin
+ 1.3.2
+
+
+
+
+
+ target
+ target/classes
+ ${project.artifactId}-${project.version}
+ target/test-classes
+
+
+ org.apache.maven.plugins
+ maven-compiler-plugin
+
+
+ org.apache.maven.plugins
+ maven-source-plugin
+
+
+ attach-sources
+ verify
+
+ jar-no-fork
+
+
+
+
+
+ org.apache.maven.plugins
+ maven-javadoc-plugin
+
+ true
+
+
+
+
+
+
diff --git a/dhp-build/dhp-build-properties-maven-plugin/src/main/java/eu/dnetlib/maven/plugin/properties/GenerateOoziePropertiesMojo.java b/dhp-build/dhp-build-properties-maven-plugin/src/main/java/eu/dnetlib/maven/plugin/properties/GenerateOoziePropertiesMojo.java
new file mode 100644
index 0000000000..a3a99cc0c6
--- /dev/null
+++ b/dhp-build/dhp-build-properties-maven-plugin/src/main/java/eu/dnetlib/maven/plugin/properties/GenerateOoziePropertiesMojo.java
@@ -0,0 +1,71 @@
+package eu.dnetlib.maven.plugin.properties;
+
+import java.io.File;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.commons.lang.ArrayUtils;
+import org.apache.commons.lang.StringUtils;
+import org.apache.maven.plugin.AbstractMojo;
+import org.apache.maven.plugin.MojoExecutionException;
+import org.apache.maven.plugin.MojoFailureException;
+
+/**
+ * Generates oozie properties which were not provided from commandline.
+ * @author mhorst
+ *
+ * @goal generate-properties
+ */
+public class GenerateOoziePropertiesMojo extends AbstractMojo {
+
+ public static final String PROPERTY_NAME_WF_SOURCE_DIR = "workflow.source.dir";
+ public static final String PROPERTY_NAME_SANDBOX_NAME = "sandboxName";
+
+ private final String[] limiters = {"iis", "dnetlib", "eu", "dhp"};
+
+ @Override
+ public void execute() throws MojoExecutionException, MojoFailureException {
+ if (System.getProperties().containsKey(PROPERTY_NAME_WF_SOURCE_DIR) &&
+ !System.getProperties().containsKey(PROPERTY_NAME_SANDBOX_NAME)) {
+ String generatedSandboxName = generateSandboxName(System.getProperties().getProperty(
+ PROPERTY_NAME_WF_SOURCE_DIR));
+ if (generatedSandboxName!=null) {
+ System.getProperties().setProperty(PROPERTY_NAME_SANDBOX_NAME,
+ generatedSandboxName);
+ } else {
+ System.out.println("unable to generate sandbox name from path: " +
+ System.getProperties().getProperty(PROPERTY_NAME_WF_SOURCE_DIR));
+ }
+ }
+ }
+
+ /**
+ * Generates sandbox name from workflow source directory.
+ * @param wfSourceDir
+ * @return generated sandbox name
+ */
+ private String generateSandboxName(String wfSourceDir) {
+// utilize all dir names until finding one of the limiters
+ List sandboxNameParts = new ArrayList();
+ String[] tokens = StringUtils.split(wfSourceDir, File.separatorChar);
+ ArrayUtils.reverse(tokens);
+ if (tokens.length>0) {
+ for (String token : tokens) {
+ for (String limiter : limiters) {
+ if (limiter.equals(token)) {
+ return sandboxNameParts.size()>0?
+ StringUtils.join(sandboxNameParts.toArray()):null;
+ }
+ }
+ if (sandboxNameParts.size()>0) {
+ sandboxNameParts.add(0, File.separator);
+ }
+ sandboxNameParts.add(0, token);
+ }
+ return StringUtils.join(sandboxNameParts.toArray());
+ } else {
+ return null;
+ }
+ }
+
+}
diff --git a/dhp-build/dhp-build-properties-maven-plugin/src/main/java/eu/dnetlib/maven/plugin/properties/WritePredefinedProjectProperties.java b/dhp-build/dhp-build-properties-maven-plugin/src/main/java/eu/dnetlib/maven/plugin/properties/WritePredefinedProjectProperties.java
new file mode 100644
index 0000000000..62f04761a9
--- /dev/null
+++ b/dhp-build/dhp-build-properties-maven-plugin/src/main/java/eu/dnetlib/maven/plugin/properties/WritePredefinedProjectProperties.java
@@ -0,0 +1,436 @@
+/**
+ *
+ * Licensed under the Educational Community License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.opensource.org/licenses/ecl2.php
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package eu.dnetlib.maven.plugin.properties;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.Date;
+import java.util.List;
+import java.util.Map.Entry;
+import java.util.Properties;
+import java.util.Set;
+
+import org.apache.commons.io.FileUtils;
+import org.apache.commons.io.IOUtils;
+import org.apache.commons.lang.StringUtils;
+import org.apache.maven.plugin.AbstractMojo;
+import org.apache.maven.plugin.MojoExecutionException;
+import org.apache.maven.plugin.MojoFailureException;
+import org.apache.maven.project.MavenProject;
+import org.springframework.core.io.DefaultResourceLoader;
+import org.springframework.core.io.Resource;
+import org.springframework.core.io.ResourceLoader;
+
+import edu.umd.cs.findbugs.annotations.SuppressFBWarnings;
+
+/**
+ * Writes project properties for the keys listed in specified properties files.
+ * Based on:
+ * http://site.kuali.org/maven/plugins/properties-maven-plugin/1.3.2/write-project-properties-mojo.html
+
+ * @author mhorst
+ * @goal write-project-properties
+ */
+public class WritePredefinedProjectProperties extends AbstractMojo {
+
+ private static final String CR = "\r";
+ private static final String LF = "\n";
+ private static final String TAB = "\t";
+ protected static final String PROPERTY_PREFIX_ENV = "env.";
+ private static final String ENCODING_UTF8 = "utf8";
+
+ /**
+ * @parameter property="properties.includePropertyKeysFromFiles"
+ */
+ private String[] includePropertyKeysFromFiles;
+
+ /**
+ * @parameter default-value="${project}"
+ * @required
+ * @readonly
+ */
+ protected MavenProject project;
+
+ /**
+ * The file that properties will be written to
+ *
+ * @parameter property="properties.outputFile"
+ * default-value="${project.build.directory}/properties/project.properties";
+ * @required
+ */
+ protected File outputFile;
+
+ /**
+ * If true, the plugin will silently ignore any non-existent properties files, and the build will continue
+ *
+ * @parameter property="properties.quiet" default-value="true"
+ */
+ private boolean quiet;
+
+ /**
+ * Comma separated list of characters to escape when writing property values. cr=carriage return, lf=linefeed,
+ * tab=tab. Any other values are taken literally.
+ *
+ * @parameter default-value="cr,lf,tab" property="properties.escapeChars"
+ */
+ private String escapeChars;
+
+ /**
+ * If true, the plugin will include system properties when writing the properties file. System properties override
+ * both environment variables and project properties.
+ *
+ * @parameter default-value="false" property="properties.includeSystemProperties"
+ */
+ private boolean includeSystemProperties;
+
+ /**
+ * If true, the plugin will include environment variables when writing the properties file. Environment variables
+ * are prefixed with "env". Environment variables override project properties.
+ *
+ * @parameter default-value="false" property="properties.includeEnvironmentVariables"
+ */
+ private boolean includeEnvironmentVariables;
+
+ /**
+ * Comma separated set of properties to exclude when writing the properties file
+ *
+ * @parameter property="properties.exclude"
+ */
+ private String exclude;
+
+ /**
+ * Comma separated set of properties to write to the properties file. If provided, only the properties matching
+ * those supplied here will be written to the properties file.
+ *
+ * @parameter property="properties.include"
+ */
+ private String include;
+
+ /* (non-Javadoc)
+ * @see org.apache.maven.plugin.AbstractMojo#execute()
+ */
+ @Override
+ @SuppressFBWarnings({"NP_UNWRITTEN_FIELD","UWF_UNWRITTEN_FIELD"})
+ public void execute() throws MojoExecutionException, MojoFailureException {
+ Properties properties = new Properties();
+ // Add project properties
+ properties.putAll(project.getProperties());
+ if (includeEnvironmentVariables) {
+ // Add environment variables, overriding any existing properties with the same key
+ properties.putAll(getEnvironmentVariables());
+ }
+ if (includeSystemProperties) {
+ // Add system properties, overriding any existing properties with the same key
+ properties.putAll(System.getProperties());
+ }
+
+ // Remove properties as appropriate
+ trim(properties, exclude, include);
+
+ String comment = "# " + new Date() + "\n";
+ List escapeTokens = getEscapeChars(escapeChars);
+
+ getLog().info("Creating " + outputFile);
+ writeProperties(outputFile, comment, properties, escapeTokens);
+ }
+
+ /**
+ * Provides environment variables.
+ * @return environment variables
+ */
+ protected static Properties getEnvironmentVariables() {
+ Properties props = new Properties();
+ for (Entry entry : System.getenv().entrySet()) {
+ props.setProperty(PROPERTY_PREFIX_ENV + entry.getKey(), entry.getValue());
+ }
+ return props;
+ }
+
+ /**
+ * Removes properties which should not be written.
+ * @param properties
+ * @param omitCSV
+ * @param includeCSV
+ * @throws MojoExecutionException
+ */
+ protected void trim(Properties properties, String omitCSV, String includeCSV) throws MojoExecutionException {
+ List omitKeys = getListFromCSV(omitCSV);
+ for (String key : omitKeys) {
+ properties.remove(key);
+ }
+
+ List includeKeys = getListFromCSV(includeCSV);
+// mh: including keys from predefined properties
+ if (includePropertyKeysFromFiles!=null && includePropertyKeysFromFiles.length>0) {
+ for (String currentIncludeLoc : includePropertyKeysFromFiles) {
+ if (validate(currentIncludeLoc)) {
+ Properties p = getProperties(currentIncludeLoc);
+ for (String key : p.stringPropertyNames()) {
+ includeKeys.add(key);
+ }
+ }
+ }
+ }
+ if (includeKeys!=null && !includeKeys.isEmpty()) {
+// removing only when include keys provided
+ Set keys = properties.stringPropertyNames();
+ for (String key : keys) {
+ if (!includeKeys.contains(key)) {
+ properties.remove(key);
+ }
+ }
+ }
+ }
+
+ /**
+ * Checks whether file exists.
+ * @param location
+ * @return true when exists, false otherwise.
+ */
+ protected boolean exists(String location) {
+ if (StringUtils.isBlank(location)) {
+ return false;
+ }
+ File file = new File(location);
+ if (file.exists()) {
+ return true;
+ }
+ ResourceLoader loader = new DefaultResourceLoader();
+ Resource resource = loader.getResource(location);
+ return resource.exists();
+ }
+
+ /**
+ * Validates resource location.
+ * @param location
+ * @return true when valid, false otherwise
+ * @throws MojoExecutionException
+ */
+ protected boolean validate(String location) throws MojoExecutionException {
+ boolean exists = exists(location);
+ if (exists) {
+ return true;
+ }
+ if (quiet) {
+ getLog().info("Ignoring non-existent properties file '" + location + "'");
+ return false;
+ } else {
+ throw new MojoExecutionException("Non-existent properties file '" + location + "'");
+ }
+ }
+
+ /**
+ * Provides input stream.
+ * @param location
+ * @return input stream
+ * @throws IOException
+ */
+ protected InputStream getInputStream(String location) throws IOException {
+ File file = new File(location);
+ if (file.exists()) {
+ return new FileInputStream(location);
+ }
+ ResourceLoader loader = new DefaultResourceLoader();
+ Resource resource = loader.getResource(location);
+ return resource.getInputStream();
+ }
+
+ /**
+ * Creates properties for given location.
+ * @param location
+ * @return properties for given location
+ * @throws MojoExecutionException
+ */
+ protected Properties getProperties(String location) throws MojoExecutionException {
+ InputStream in = null;
+ try {
+ Properties properties = new Properties();
+ in = getInputStream(location);
+ if (location.toLowerCase().endsWith(".xml")) {
+ properties.loadFromXML(in);
+ } else {
+ properties.load(in);
+ }
+ return properties;
+ } catch (IOException e) {
+ throw new MojoExecutionException("Error reading properties file " + location, e);
+ } finally {
+ IOUtils.closeQuietly(in);
+ }
+ }
+
+ /**
+ * Provides escape characters.
+ * @param escapeChars
+ * @return escape characters
+ */
+ protected List getEscapeChars(String escapeChars) {
+ List tokens = getListFromCSV(escapeChars);
+ List realTokens = new ArrayList();
+ for (String token : tokens) {
+ String realToken = getRealToken(token);
+ realTokens.add(realToken);
+ }
+ return realTokens;
+ }
+
+ /**
+ * Provides real token.
+ * @param token
+ * @return real token
+ */
+ protected String getRealToken(String token) {
+ if (token.equalsIgnoreCase("CR")) {
+ return CR;
+ } else if (token.equalsIgnoreCase("LF")) {
+ return LF;
+ } else if (token.equalsIgnoreCase("TAB")) {
+ return TAB;
+ } else {
+ return token;
+ }
+ }
+
+ /**
+ * Returns content.
+ * @param comment
+ * @param properties
+ * @param escapeTokens
+ * @return content
+ */
+ protected String getContent(String comment, Properties properties, List escapeTokens) {
+ List names = new ArrayList(properties.stringPropertyNames());
+ Collections.sort(names);
+ StringBuilder sb = new StringBuilder();
+ if (!StringUtils.isBlank(comment)) {
+ sb.append(comment);
+ }
+ for (String name : names) {
+ String value = properties.getProperty(name);
+ String escapedValue = escape(value, escapeTokens);
+ sb.append(name + "=" + escapedValue + "\n");
+ }
+ return sb.toString();
+ }
+
+ /**
+ * Writes properties to given file.
+ * @param file
+ * @param comment
+ * @param properties
+ * @param escapeTokens
+ * @throws MojoExecutionException
+ */
+ protected void writeProperties(File file, String comment, Properties properties, List escapeTokens)
+ throws MojoExecutionException {
+ try {
+ String content = getContent(comment, properties, escapeTokens);
+ FileUtils.writeStringToFile(file, content, ENCODING_UTF8);
+ } catch (IOException e) {
+ throw new MojoExecutionException("Error creating properties file", e);
+ }
+ }
+
+ /**
+ * Escapes characters.
+ * @param s
+ * @param escapeChars
+ * @return
+ */
+ protected String escape(String s, List escapeChars) {
+ String result = s;
+ for (String escapeChar : escapeChars) {
+ result = result.replace(escapeChar, getReplacementToken(escapeChar));
+ }
+ return result;
+ }
+
+ /**
+ * Provides replacement token.
+ * @param escapeChar
+ * @return replacement token
+ */
+ protected String getReplacementToken(String escapeChar) {
+ if (escapeChar.equals(CR)) {
+ return "\\r";
+ } else if (escapeChar.equals(LF)) {
+ return "\\n";
+ } else if (escapeChar.equals(TAB)) {
+ return "\\t";
+ } else {
+ return "\\" + escapeChar;
+ }
+ }
+
+ /**
+ * Returns list from csv.
+ * @param csv
+ * @return list of values generated from CSV
+ */
+ protected static final List getListFromCSV(String csv) {
+ if (StringUtils.isBlank(csv)) {
+ return new ArrayList();
+ }
+ List list = new ArrayList();
+ String[] tokens = StringUtils.split(csv, ",");
+ for (String token : tokens) {
+ list.add(token.trim());
+ }
+ return list;
+ }
+
+ public void setIncludeSystemProperties(boolean includeSystemProperties) {
+ this.includeSystemProperties = includeSystemProperties;
+ }
+
+ public void setEscapeChars(String escapeChars) {
+ this.escapeChars = escapeChars;
+ }
+
+ public void setIncludeEnvironmentVariables(boolean includeEnvironmentVariables) {
+ this.includeEnvironmentVariables = includeEnvironmentVariables;
+ }
+
+ public void setExclude(String exclude) {
+ this.exclude = exclude;
+ }
+
+ public void setInclude(String include) {
+ this.include = include;
+ }
+
+ public void setQuiet(boolean quiet) {
+ this.quiet = quiet;
+ }
+
+ /**
+ * Sets property files for which keys properties should be included.
+ * @param includePropertyKeysFromFiles
+ */
+ public void setIncludePropertyKeysFromFiles(
+ String[] includePropertyKeysFromFiles) {
+ if (includePropertyKeysFromFiles!=null) {
+ this.includePropertyKeysFromFiles = Arrays.copyOf(
+ includePropertyKeysFromFiles,
+ includePropertyKeysFromFiles.length);
+ }
+ }
+
+}
\ No newline at end of file
diff --git a/dhp-build/dhp-build-properties-maven-plugin/src/test/java/eu/dnetlib/maven/plugin/properties/GenerateOoziePropertiesMojoTest.java b/dhp-build/dhp-build-properties-maven-plugin/src/test/java/eu/dnetlib/maven/plugin/properties/GenerateOoziePropertiesMojoTest.java
new file mode 100644
index 0000000000..8a763c1bdd
--- /dev/null
+++ b/dhp-build/dhp-build-properties-maven-plugin/src/test/java/eu/dnetlib/maven/plugin/properties/GenerateOoziePropertiesMojoTest.java
@@ -0,0 +1,101 @@
+package eu.dnetlib.maven.plugin.properties;
+
+import static eu.dnetlib.maven.plugin.properties.GenerateOoziePropertiesMojo.PROPERTY_NAME_SANDBOX_NAME;
+import static eu.dnetlib.maven.plugin.properties.GenerateOoziePropertiesMojo.PROPERTY_NAME_WF_SOURCE_DIR;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNull;
+
+import org.junit.Before;
+import org.junit.Test;
+
+/**
+ * @author mhorst
+ *
+ */
+public class GenerateOoziePropertiesMojoTest {
+
+ private GenerateOoziePropertiesMojo mojo = new GenerateOoziePropertiesMojo();
+
+ @Before
+ public void clearSystemProperties() {
+ System.clearProperty(PROPERTY_NAME_SANDBOX_NAME);
+ System.clearProperty(PROPERTY_NAME_WF_SOURCE_DIR);
+ }
+
+ @Test
+ public void testExecuteEmpty() throws Exception {
+ // execute
+ mojo.execute();
+
+ // assert
+ assertNull(System.getProperty(PROPERTY_NAME_SANDBOX_NAME));
+ }
+
+ @Test
+ public void testExecuteSandboxNameAlreadySet() throws Exception {
+ // given
+ String workflowSourceDir = "eu/dnetlib/iis/wf/transformers";
+ String sandboxName = "originalSandboxName";
+ System.setProperty(PROPERTY_NAME_WF_SOURCE_DIR, workflowSourceDir);
+ System.setProperty(PROPERTY_NAME_SANDBOX_NAME, sandboxName);
+
+ // execute
+ mojo.execute();
+
+ // assert
+ assertEquals(sandboxName, System.getProperty(PROPERTY_NAME_SANDBOX_NAME));
+ }
+
+ @Test
+ public void testExecuteEmptyWorkflowSourceDir() throws Exception {
+ // given
+ String workflowSourceDir = "";
+ System.setProperty(PROPERTY_NAME_WF_SOURCE_DIR, workflowSourceDir);
+
+ // execute
+ mojo.execute();
+
+ // assert
+ assertNull(System.getProperty(PROPERTY_NAME_SANDBOX_NAME));
+ }
+
+ @Test
+ public void testExecuteNullSandboxNameGenerated() throws Exception {
+ // given
+ String workflowSourceDir = "eu/dnetlib/iis/";
+ System.setProperty(PROPERTY_NAME_WF_SOURCE_DIR, workflowSourceDir);
+
+ // execute
+ mojo.execute();
+
+ // assert
+ assertNull(System.getProperty(PROPERTY_NAME_SANDBOX_NAME));
+ }
+
+ @Test
+ public void testExecute() throws Exception {
+ // given
+ String workflowSourceDir = "eu/dnetlib/iis/wf/transformers";
+ System.setProperty(PROPERTY_NAME_WF_SOURCE_DIR, workflowSourceDir);
+
+ // execute
+ mojo.execute();
+
+ // assert
+ assertEquals("wf/transformers", System.getProperty(PROPERTY_NAME_SANDBOX_NAME));
+ }
+
+ @Test
+ public void testExecuteWithoutRoot() throws Exception {
+ // given
+ String workflowSourceDir = "wf/transformers";
+ System.setProperty(PROPERTY_NAME_WF_SOURCE_DIR, workflowSourceDir);
+
+ // execute
+ mojo.execute();
+
+ // assert
+ assertEquals("wf/transformers", System.getProperty(PROPERTY_NAME_SANDBOX_NAME));
+ }
+
+}
diff --git a/dhp-build/dhp-build-properties-maven-plugin/src/test/java/eu/dnetlib/maven/plugin/properties/WritePredefinedProjectPropertiesTest.java b/dhp-build/dhp-build-properties-maven-plugin/src/test/java/eu/dnetlib/maven/plugin/properties/WritePredefinedProjectPropertiesTest.java
new file mode 100644
index 0000000000..51d9575ffd
--- /dev/null
+++ b/dhp-build/dhp-build-properties-maven-plugin/src/test/java/eu/dnetlib/maven/plugin/properties/WritePredefinedProjectPropertiesTest.java
@@ -0,0 +1,365 @@
+package eu.dnetlib.maven.plugin.properties;
+
+import static eu.dnetlib.maven.plugin.properties.WritePredefinedProjectProperties.PROPERTY_PREFIX_ENV;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+import static org.mockito.Mockito.doReturn;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
+import java.io.FileOutputStream;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.util.Properties;
+
+import org.apache.maven.plugin.MojoExecutionException;
+import org.apache.maven.project.MavenProject;
+import org.junit.Before;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.TemporaryFolder;
+import org.junit.runner.RunWith;
+import org.mockito.Mock;
+import org.mockito.runners.MockitoJUnitRunner;
+
+
+/**
+ * @author mhorst
+ *
+ */
+@RunWith(MockitoJUnitRunner.class)
+public class WritePredefinedProjectPropertiesTest {
+
+ @Rule
+ public TemporaryFolder testFolder = new TemporaryFolder();
+
+ @Mock
+ private MavenProject mavenProject;
+
+ private WritePredefinedProjectProperties mojo;
+
+ @Before
+ public void init() {
+ mojo = new WritePredefinedProjectProperties();
+ mojo.outputFile = getPropertiesFileLocation();
+ mojo.project = mavenProject;
+ doReturn(new Properties()).when(mavenProject).getProperties();
+ }
+
+ // ----------------------------------- TESTS ---------------------------------------------
+
+ @Test
+ public void testExecuteEmpty() throws Exception {
+ // execute
+ mojo.execute();
+
+ // assert
+ assertTrue(mojo.outputFile.exists());
+ Properties storedProperties = getStoredProperties();
+ assertEquals(0, storedProperties.size());
+ }
+
+ @Test
+ public void testExecuteWithProjectProperties() throws Exception {
+ // given
+ String key = "projectPropertyKey";
+ String value = "projectPropertyValue";
+ Properties projectProperties = new Properties();
+ projectProperties.setProperty(key, value);
+ doReturn(projectProperties).when(mavenProject).getProperties();
+
+ // execute
+ mojo.execute();
+
+ // assert
+ assertTrue(mojo.outputFile.exists());
+ Properties storedProperties = getStoredProperties();
+ assertEquals(1, storedProperties.size());
+ assertTrue(storedProperties.containsKey(key));
+ assertEquals(value, storedProperties.getProperty(key));
+ }
+
+ @Test(expected=MojoExecutionException.class)
+ public void testExecuteWithProjectPropertiesAndInvalidOutputFile() throws Exception {
+ // given
+ String key = "projectPropertyKey";
+ String value = "projectPropertyValue";
+ Properties projectProperties = new Properties();
+ projectProperties.setProperty(key, value);
+ doReturn(projectProperties).when(mavenProject).getProperties();
+ mojo.outputFile = testFolder.getRoot();
+
+ // execute
+ mojo.execute();
+ }
+
+ @Test
+ public void testExecuteWithProjectPropertiesExclusion() throws Exception {
+ // given
+ String key = "projectPropertyKey";
+ String value = "projectPropertyValue";
+ String excludedKey = "excludedPropertyKey";
+ String excludedValue = "excludedPropertyValue";
+ Properties projectProperties = new Properties();
+ projectProperties.setProperty(key, value);
+ projectProperties.setProperty(excludedKey, excludedValue);
+ doReturn(projectProperties).when(mavenProject).getProperties();
+ mojo.setExclude(excludedKey);
+
+ // execute
+ mojo.execute();
+
+ // assert
+ assertTrue(mojo.outputFile.exists());
+ Properties storedProperties = getStoredProperties();
+ assertEquals(1, storedProperties.size());
+ assertTrue(storedProperties.containsKey(key));
+ assertEquals(value, storedProperties.getProperty(key));
+ }
+
+ @Test
+ public void testExecuteWithProjectPropertiesInclusion() throws Exception {
+ // given
+ String key = "projectPropertyKey";
+ String value = "projectPropertyValue";
+ String includedKey = "includedPropertyKey";
+ String includedValue = "includedPropertyValue";
+ Properties projectProperties = new Properties();
+ projectProperties.setProperty(key, value);
+ projectProperties.setProperty(includedKey, includedValue);
+ doReturn(projectProperties).when(mavenProject).getProperties();
+ mojo.setInclude(includedKey);
+
+ // execute
+ mojo.execute();
+
+ // assert
+ assertTrue(mojo.outputFile.exists());
+ Properties storedProperties = getStoredProperties();
+ assertEquals(1, storedProperties.size());
+ assertTrue(storedProperties.containsKey(includedKey));
+ assertEquals(includedValue, storedProperties.getProperty(includedKey));
+ }
+
+ @Test
+ public void testExecuteIncludingPropertyKeysFromFile() throws Exception {
+ // given
+ String key = "projectPropertyKey";
+ String value = "projectPropertyValue";
+ String includedKey = "includedPropertyKey";
+ String includedValue = "includedPropertyValue";
+ Properties projectProperties = new Properties();
+ projectProperties.setProperty(key, value);
+ projectProperties.setProperty(includedKey, includedValue);
+ doReturn(projectProperties).when(mavenProject).getProperties();
+
+ File includedPropertiesFile = new File(testFolder.getRoot(), "included.properties");
+ Properties includedProperties = new Properties();
+ includedProperties.setProperty(includedKey, "irrelevantValue");
+ includedProperties.store(new FileWriter(includedPropertiesFile), null);
+
+ mojo.setIncludePropertyKeysFromFiles(new String[] {includedPropertiesFile.getAbsolutePath()});
+
+ // execute
+ mojo.execute();
+
+ // assert
+ assertTrue(mojo.outputFile.exists());
+ Properties storedProperties = getStoredProperties();
+ assertEquals(1, storedProperties.size());
+ assertTrue(storedProperties.containsKey(includedKey));
+ assertEquals(includedValue, storedProperties.getProperty(includedKey));
+ }
+
+ @Test
+ public void testExecuteIncludingPropertyKeysFromClasspathResource() throws Exception {
+ // given
+ String key = "projectPropertyKey";
+ String value = "projectPropertyValue";
+ String includedKey = "includedPropertyKey";
+ String includedValue = "includedPropertyValue";
+ Properties projectProperties = new Properties();
+ projectProperties.setProperty(key, value);
+ projectProperties.setProperty(includedKey, includedValue);
+ doReturn(projectProperties).when(mavenProject).getProperties();
+
+ mojo.setIncludePropertyKeysFromFiles(new String[] {"/eu/dnetlib/maven/plugin/properties/included.properties"});
+
+ // execute
+ mojo.execute();
+
+ // assert
+ assertTrue(mojo.outputFile.exists());
+ Properties storedProperties = getStoredProperties();
+ assertEquals(1, storedProperties.size());
+ assertTrue(storedProperties.containsKey(includedKey));
+ assertEquals(includedValue, storedProperties.getProperty(includedKey));
+ }
+
+ @Test(expected=MojoExecutionException.class)
+ public void testExecuteIncludingPropertyKeysFromBlankLocation() throws Exception {
+ // given
+ String key = "projectPropertyKey";
+ String value = "projectPropertyValue";
+ String includedKey = "includedPropertyKey";
+ String includedValue = "includedPropertyValue";
+ Properties projectProperties = new Properties();
+ projectProperties.setProperty(key, value);
+ projectProperties.setProperty(includedKey, includedValue);
+ doReturn(projectProperties).when(mavenProject).getProperties();
+
+ mojo.setIncludePropertyKeysFromFiles(new String[] {""});
+
+ // execute
+ mojo.execute();
+ }
+
+ @Test
+ public void testExecuteIncludingPropertyKeysFromXmlFile() throws Exception {
+ // given
+ String key = "projectPropertyKey";
+ String value = "projectPropertyValue";
+ String includedKey = "includedPropertyKey";
+ String includedValue = "includedPropertyValue";
+ Properties projectProperties = new Properties();
+ projectProperties.setProperty(key, value);
+ projectProperties.setProperty(includedKey, includedValue);
+ doReturn(projectProperties).when(mavenProject).getProperties();
+
+ File includedPropertiesFile = new File(testFolder.getRoot(), "included.xml");
+ Properties includedProperties = new Properties();
+ includedProperties.setProperty(includedKey, "irrelevantValue");
+ includedProperties.storeToXML(new FileOutputStream(includedPropertiesFile), null);
+
+ mojo.setIncludePropertyKeysFromFiles(new String[] {includedPropertiesFile.getAbsolutePath()});
+
+ // execute
+ mojo.execute();
+
+ // assert
+ assertTrue(mojo.outputFile.exists());
+ Properties storedProperties = getStoredProperties();
+ assertEquals(1, storedProperties.size());
+ assertTrue(storedProperties.containsKey(includedKey));
+ assertEquals(includedValue, storedProperties.getProperty(includedKey));
+ }
+
+ @Test(expected=MojoExecutionException.class)
+ public void testExecuteIncludingPropertyKeysFromInvalidXmlFile() throws Exception {
+ // given
+ String key = "projectPropertyKey";
+ String value = "projectPropertyValue";
+ String includedKey = "includedPropertyKey";
+ String includedValue = "includedPropertyValue";
+ Properties projectProperties = new Properties();
+ projectProperties.setProperty(key, value);
+ projectProperties.setProperty(includedKey, includedValue);
+ doReturn(projectProperties).when(mavenProject).getProperties();
+
+ File includedPropertiesFile = new File(testFolder.getRoot(), "included.xml");
+ Properties includedProperties = new Properties();
+ includedProperties.setProperty(includedKey, "irrelevantValue");
+ includedProperties.store(new FileOutputStream(includedPropertiesFile), null);
+
+ mojo.setIncludePropertyKeysFromFiles(new String[] {includedPropertiesFile.getAbsolutePath()});
+
+ // execute
+ mojo.execute();
+ }
+
+ @Test
+ public void testExecuteWithQuietModeOn() throws Exception {
+ // given
+ mojo.setQuiet(true);
+ mojo.setIncludePropertyKeysFromFiles(new String[] {"invalid location"});
+
+ // execute
+ mojo.execute();
+
+ // assert
+ assertTrue(mojo.outputFile.exists());
+ Properties storedProperties = getStoredProperties();
+ assertEquals(0, storedProperties.size());
+ }
+
+ @Test(expected=MojoExecutionException.class)
+ public void testExecuteIncludingPropertyKeysFromInvalidFile() throws Exception {
+ // given
+ mojo.setIncludePropertyKeysFromFiles(new String[] {"invalid location"});
+
+ // execute
+ mojo.execute();
+ }
+
+ @Test
+ public void testExecuteWithEnvironmentProperties() throws Exception {
+ // given
+ mojo.setIncludeEnvironmentVariables(true);
+
+ // execute
+ mojo.execute();
+
+ // assert
+ assertTrue(mojo.outputFile.exists());
+ Properties storedProperties = getStoredProperties();
+ assertTrue(storedProperties.size() > 0);
+ for (Object currentKey : storedProperties.keySet()) {
+ assertTrue(((String)currentKey).startsWith(PROPERTY_PREFIX_ENV));
+ }
+ }
+
+ @Test
+ public void testExecuteWithSystemProperties() throws Exception {
+ // given
+ String key = "systemPropertyKey";
+ String value = "systemPropertyValue";
+ System.setProperty(key, value);
+ mojo.setIncludeSystemProperties(true);
+
+ // execute
+ mojo.execute();
+
+ // assert
+ assertTrue(mojo.outputFile.exists());
+ Properties storedProperties = getStoredProperties();
+ assertTrue(storedProperties.size() > 0);
+ assertTrue(storedProperties.containsKey(key));
+ assertEquals(value, storedProperties.getProperty(key));
+ }
+
+ @Test
+ public void testExecuteWithSystemPropertiesAndEscapeChars() throws Exception {
+ // given
+ String key = "systemPropertyKey ";
+ String value = "systemPropertyValue";
+ System.setProperty(key, value);
+ mojo.setIncludeSystemProperties(true);
+ String escapeChars = "cr,lf,tab,|";
+ mojo.setEscapeChars(escapeChars);
+
+ // execute
+ mojo.execute();
+
+ // assert
+ assertTrue(mojo.outputFile.exists());
+ Properties storedProperties = getStoredProperties();
+ assertTrue(storedProperties.size() > 0);
+ assertFalse(storedProperties.containsKey(key));
+ assertTrue(storedProperties.containsKey(key.trim()));
+ assertEquals(value, storedProperties.getProperty(key.trim()));
+ }
+
+ // ----------------------------------- PRIVATE -------------------------------------------
+
+ private File getPropertiesFileLocation() {
+ return new File(testFolder.getRoot(), "test.properties");
+ }
+
+ private Properties getStoredProperties() throws FileNotFoundException, IOException {
+ Properties properties = new Properties();
+ properties.load(new FileInputStream(getPropertiesFileLocation()));
+ return properties;
+ }
+}
diff --git a/dhp-build/dhp-build-properties-maven-plugin/src/test/resources/eu/dnetlib/maven/plugin/properties/included.properties b/dhp-build/dhp-build-properties-maven-plugin/src/test/resources/eu/dnetlib/maven/plugin/properties/included.properties
new file mode 100644
index 0000000000..3c79fe6cb2
--- /dev/null
+++ b/dhp-build/dhp-build-properties-maven-plugin/src/test/resources/eu/dnetlib/maven/plugin/properties/included.properties
@@ -0,0 +1 @@
+includedPropertyKey=irrelevantValue
\ No newline at end of file
diff --git a/dhp-build/dhp-build-properties-maven-plugin/target/classes/META-INF/maven/plugin.xml b/dhp-build/dhp-build-properties-maven-plugin/target/classes/META-INF/maven/plugin.xml
new file mode 100644
index 0000000000..03188dc533
--- /dev/null
+++ b/dhp-build/dhp-build-properties-maven-plugin/target/classes/META-INF/maven/plugin.xml
@@ -0,0 +1,281 @@
+
+
+ dhp-build-properties-maven-plugin
+
+ eu.dnetlib.dhp
+ dhp-build-properties-maven-plugin
+ 1.0.0-SNAPSHOT
+ dhp-build-properties
+ false
+ true
+
+
+ generate-properties
+ Generates oozie properties which were not provided from commandline.
+ false
+ true
+ false
+ false
+ false
+ true
+ eu.dnetlib.maven.plugin.properties.GenerateOoziePropertiesMojo
+ java
+ per-lookup
+ once-per-session
+ false
+
+
+
+ write-project-properties
+ Writes project properties for the keys listed in specified properties files.
+Based on:
+http://site.kuali.org/maven/plugins/properties-maven-plugin/1.3.2/write-project-properties-mojo.html
+ false
+ true
+ false
+ false
+ false
+ true
+ eu.dnetlib.maven.plugin.properties.WritePredefinedProjectProperties
+ java
+ per-lookup
+ once-per-session
+ false
+
+
+ properties.escapeChars
+ java.lang.String
+ false
+ true
+ Comma separated list of characters to escape when writing property values. cr=carriage return, lf=linefeed,
+tab=tab. Any other values are taken literally.
+
+
+ properties.exclude
+ java.lang.String
+ false
+ true
+ Comma separated set of properties to exclude when writing the properties file
+
+
+ properties.include
+ java.lang.String
+ false
+ true
+ Comma separated set of properties to write to the properties file. If provided, only the properties matching
+those supplied here will be written to the properties file.
+
+
+ properties.includeEnvironmentVariables
+ boolean
+ false
+ true
+ If true, the plugin will include environment variables when writing the properties file. Environment variables
+are prefixed with "env". Environment variables override project properties.
+
+
+ properties.includePropertyKeysFromFiles
+ java.lang.String[]
+ false
+ true
+
+
+
+ properties.includeSystemProperties
+ boolean
+ false
+ true
+ If true, the plugin will include system properties when writing the properties file. System properties override
+both environment variables and project properties.
+
+
+ properties.outputFile
+ java.io.File
+ true
+ true
+ The file that properties will be written to
+
+
+ project
+ org.apache.maven.project.MavenProject
+ true
+ false
+
+
+
+ properties.quiet
+ boolean
+ false
+ true
+ If true, the plugin will silently ignore any non-existent properties files, and the build will continue
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ org.apache.maven
+ maven-plugin-api
+ jar
+ 2.0
+
+
+ org.apache.maven
+ maven-project
+ jar
+ 2.0
+
+
+ org.apache.maven
+ maven-profile
+ jar
+ 2.0
+
+
+ org.apache.maven
+ maven-model
+ jar
+ 2.0
+
+
+ org.apache.maven
+ maven-artifact-manager
+ jar
+ 2.0
+
+
+ org.apache.maven
+ maven-repository-metadata
+ jar
+ 2.0
+
+
+ org.apache.maven.wagon
+ wagon-provider-api
+ jar
+ 1.0-alpha-5
+
+
+ org.codehaus.plexus
+ plexus-utils
+ jar
+ 1.0.4
+
+
+ org.apache.maven
+ maven-artifact
+ jar
+ 2.0
+
+
+ org.codehaus.plexus
+ plexus-container-default
+ jar
+ 1.0-alpha-8
+
+
+ classworlds
+ classworlds
+ jar
+ 1.1-alpha-2
+
+
+ org.kuali.maven.plugins
+ properties-maven-plugin
+ jar
+ 1.3.2
+
+
+ org.springframework
+ spring-core
+ jar
+ 3.1.1.RELEASE
+
+
+ org.springframework
+ spring-asm
+ jar
+ 3.1.1.RELEASE
+
+
+ org.jasypt
+ jasypt
+ jar
+ 1.9.0
+
+
+ org.kuali.maven.common
+ maven-kuali-common
+ jar
+ 1.2.8
+
+
+ org.apache.ant
+ ant
+ jar
+ 1.8.2
+
+
+ org.apache.ant
+ ant-launcher
+ jar
+ 1.8.2
+
+
+ org.codehaus.plexus
+ plexus-interpolation
+ jar
+ 1.15
+
+
+ commons-lang
+ commons-lang
+ jar
+ 2.6
+
+
+ commons-io
+ commons-io
+ jar
+ 2.5
+
+
+ org.slf4j
+ jcl-over-slf4j
+ jar
+ 1.6.4
+
+
+ org.slf4j
+ slf4j-api
+ jar
+ 1.7.22
+
+
+ org.slf4j
+ slf4j-log4j12
+ jar
+ 1.7.22
+
+
+ log4j
+ log4j
+ jar
+ 1.2.17
+
+
+ javax.servlet
+ javax.servlet-api
+ jar
+ 3.1.0
+
+
+
\ No newline at end of file
diff --git a/dhp-build/dhp-build-properties-maven-plugin/target/classes/eu/dnetlib/maven/plugin/properties/GenerateOoziePropertiesMojo.class b/dhp-build/dhp-build-properties-maven-plugin/target/classes/eu/dnetlib/maven/plugin/properties/GenerateOoziePropertiesMojo.class
new file mode 100644
index 0000000000..3eeb323f7b
Binary files /dev/null and b/dhp-build/dhp-build-properties-maven-plugin/target/classes/eu/dnetlib/maven/plugin/properties/GenerateOoziePropertiesMojo.class differ
diff --git a/dhp-build/dhp-build-properties-maven-plugin/target/classes/eu/dnetlib/maven/plugin/properties/WritePredefinedProjectProperties.class b/dhp-build/dhp-build-properties-maven-plugin/target/classes/eu/dnetlib/maven/plugin/properties/WritePredefinedProjectProperties.class
new file mode 100644
index 0000000000..e09929deab
Binary files /dev/null and b/dhp-build/dhp-build-properties-maven-plugin/target/classes/eu/dnetlib/maven/plugin/properties/WritePredefinedProjectProperties.class differ
diff --git a/dhp-build/dhp-build-properties-maven-plugin/target/maven-status/maven-compiler-plugin/compile/default-compile/createdFiles.lst b/dhp-build/dhp-build-properties-maven-plugin/target/maven-status/maven-compiler-plugin/compile/default-compile/createdFiles.lst
new file mode 100644
index 0000000000..5c141f830e
--- /dev/null
+++ b/dhp-build/dhp-build-properties-maven-plugin/target/maven-status/maven-compiler-plugin/compile/default-compile/createdFiles.lst
@@ -0,0 +1,2 @@
+eu/dnetlib/maven/plugin/properties/WritePredefinedProjectProperties.class
+eu/dnetlib/maven/plugin/properties/GenerateOoziePropertiesMojo.class
diff --git a/dhp-build/dhp-build-properties-maven-plugin/target/maven-status/maven-compiler-plugin/compile/default-compile/inputFiles.lst b/dhp-build/dhp-build-properties-maven-plugin/target/maven-status/maven-compiler-plugin/compile/default-compile/inputFiles.lst
new file mode 100644
index 0000000000..cac9348aa5
--- /dev/null
+++ b/dhp-build/dhp-build-properties-maven-plugin/target/maven-status/maven-compiler-plugin/compile/default-compile/inputFiles.lst
@@ -0,0 +1,2 @@
+/Users/claudio/workspace/dnet-hadoop/dhp-build/dhp-build-properties-maven-plugin/src/main/java/eu/dnetlib/maven/plugin/properties/GenerateOoziePropertiesMojo.java
+/Users/claudio/workspace/dnet-hadoop/dhp-build/dhp-build-properties-maven-plugin/src/main/java/eu/dnetlib/maven/plugin/properties/WritePredefinedProjectProperties.java
diff --git a/dhp-build/pom.xml b/dhp-build/pom.xml
new file mode 100644
index 0000000000..a930af4eab
--- /dev/null
+++ b/dhp-build/pom.xml
@@ -0,0 +1,16 @@
+
+
+ 4.0.0
+
+ eu.dnetlib.dhp
+ dhp
+ 1.0.0-SNAPSHOT
+
+ dhp-build
+ pom
+
+ dhp-build-assembly-resources
+ dhp-build-properties-maven-plugin
+
+
+
diff --git a/dhp-common/pom.xml b/dhp-common/pom.xml
new file mode 100644
index 0000000000..42b8864fa6
--- /dev/null
+++ b/dhp-common/pom.xml
@@ -0,0 +1,177 @@
+
+
+ 4.0.0
+
+
+ eu.dnetlib.dhp
+ dhp
+ 1.0.0-SNAPSHOT
+
+
+ dhp-common
+ jar
+
+
+
+
+ ${project.groupId}
+ dhp-schemas
+ ${project.version}
+
+
+
+ org.apache.oozie
+ oozie-core
+ provided
+
+
+
+ org.apache.hadoop
+ hadoop-mapreduce-client-core
+
+
+
+ org.apache.hadoop
+ hadoop-common
+
+
+
+ org.apache.spark
+ spark-core_2.10
+
+
+
+ org.apache.spark
+ spark-sql_2.10
+
+
+
+ org.apache.avro
+ avro
+
+
+
+ org.apache.avro
+ avro-mapred
+ hadoop2
+
+
+
+ org.apache.commons
+ commons-lang3
+
+
+
+
+ org.springframework
+ spring-beans
+
+
+
+ com.beust
+ jcommander
+
+
+
+ org.apache.pig
+ pig
+
+
+
+ com.linkedin.datafu
+ datafu
+
+
+
+ commons-beanutils
+ commons-beanutils
+
+
+
+ commons-io
+ commons-io
+
+
+
+ org.jdom
+ jdom
+
+
+
+
+
+
+
+ net.alchim31.maven
+ scala-maven-plugin
+
+
+
+
+ org.apache.avro
+ avro-maven-plugin
+
+
+ generate-test-sources
+
+ schema
+ idl-protocol
+
+
+ String
+
+
+
+
+
+
+ org.codehaus.mojo
+ build-helper-maven-plugin
+
+
+ add-test-sources
+ generate-test-sources
+
+ add-test-source
+
+
+
+
+
+
+
+
+
+
+
+
+ org.apache.maven.plugins
+ maven-jar-plugin
+
+
+
+ test-jar
+
+
+
+
+
+
+ org.apache.maven.plugins
+ maven-surefire-plugin
+
+ eu.dnetlib.iis.common.IntegrationTest
+
+
+
+
+ org.apache.maven.plugins
+ maven-failsafe-plugin
+
+
+
+
+
+
diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/FsShellPermissions.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/FsShellPermissions.java
new file mode 100644
index 0000000000..7fbcd8fef8
--- /dev/null
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/FsShellPermissions.java
@@ -0,0 +1,106 @@
+package eu.dnetlib.dhp.common;
+
+import java.lang.reflect.Method;
+import java.util.Arrays;
+import java.util.LinkedList;
+
+import org.apache.hadoop.conf.Configurable;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.FsShell;
+import org.springframework.beans.BeanUtils;
+import org.springframework.util.ClassUtils;
+import org.springframework.util.ReflectionUtils;
+
+/**
+ * Extracted from:
+ * https://github.com/spring-projects/spring-hadoop/blob/master/spring-hadoop-core/src/main/java/org/springframework/data/hadoop/fs/FsShellPermissions.java
+ *
+ * Utility class for accessing Hadoop FsShellPermissions (which is not public)
+ * without having to duplicate its code.
+ * @author Costin Leau
+ *
+ */
+public class FsShellPermissions {
+
+ private static boolean IS_HADOOP_20X = ClassUtils.isPresent("org.apache.hadoop.fs.FsShellPermissions$Chmod",
+ FsShellPermissions.class.getClassLoader());
+
+ public enum Op {
+ CHOWN("-chown"), CHMOD("-chmod"), CHGRP("-chgrp");
+
+ private final String cmd;
+
+ Op(String cmd) {
+ this.cmd = cmd;
+ }
+
+ public String getCmd() {
+ return cmd;
+ }
+ }
+
+ // TODO: move this into Spring Core (but add JDK 1.5 compatibility first)
+ @SafeVarargs
+ static T[] concatAll(T[] first, T[]... rest) {
+ // can add some sanity checks
+ int totalLength = first.length;
+ for (T[] array : rest) {
+ totalLength += array.length;
+ }
+ T[] result = Arrays.copyOf(first, totalLength);
+ int offset = first.length;
+ for (T[] array : rest) {
+ System.arraycopy(array, 0, result, offset, array.length);
+ offset += array.length;
+ }
+ return result;
+ }
+
+ public static void changePermissions(FileSystem fs, Configuration config,
+ Op op, boolean recursive, String group, String uri) {
+ changePermissions(fs, config, op, recursive, group, new String[] {uri});
+ }
+
+ public static void changePermissions(FileSystem fs, Configuration config,
+ Op op, boolean recursive, String group, String... uris) {
+ String[] argvs;
+ if (recursive) {
+ argvs = new String[1];
+ argvs[0] = "-R";
+ } else {
+ argvs = new String[0];
+ }
+ argvs = concatAll(argvs, new String[] { group }, uris);
+
+ // Hadoop 1.0.x
+ if (!IS_HADOOP_20X) {
+ Class> cls = ClassUtils.resolveClassName("org.apache.hadoop.fs.FsShellPermissions", config.getClass().getClassLoader());
+ Object[] args = new Object[] { fs, op.getCmd(), argvs, 0, new FsShell(config) };
+
+ Method m = ReflectionUtils.findMethod(cls, "changePermissions", FileSystem.class, String.class, String[].class, int.class, FsShell.class);
+ ReflectionUtils.makeAccessible(m);
+ ReflectionUtils.invokeMethod(m, null, args);
+ }
+ // Hadoop 2.x
+ else {
+ Class> cmd = ClassUtils.resolveClassName("org.apache.hadoop.fs.shell.Command", config.getClass().getClassLoader());
+ Class> targetClz = ClassUtils.resolveClassName("org.apache.hadoop.fs.FsShellPermissions$Chmod", config.getClass().getClassLoader());
+ Configurable target = (Configurable) BeanUtils.instantiate(targetClz);
+ target.setConf(config);
+ // run(String...) swallows the exceptions - re-implement it here
+ //
+ LinkedList args = new LinkedList(Arrays.asList(argvs));
+ try {
+ Method m = ReflectionUtils.findMethod(cmd, "processOptions", LinkedList.class);
+ ReflectionUtils.makeAccessible(m);
+ ReflectionUtils.invokeMethod(m, target, args);
+ m = ReflectionUtils.findMethod(cmd, "processRawArguments", LinkedList.class);
+ ReflectionUtils.makeAccessible(m);
+ ReflectionUtils.invokeMethod(m, target, args);
+ } catch (IllegalStateException ex){
+ throw new RuntimeException("Cannot change permissions/ownership " + ex);
+ }
+ }
+ }
+}
diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/InfoSpaceConstants.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/InfoSpaceConstants.java
new file mode 100644
index 0000000000..1ce7cd4266
--- /dev/null
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/InfoSpaceConstants.java
@@ -0,0 +1,75 @@
+package eu.dnetlib.dhp.common;
+
+import java.io.UnsupportedEncodingException;
+
+/**
+ * InfoSpaceConstants constants.
+ *
+ * @author mhorst
+ *
+ */
+public final class InfoSpaceConstants {
+
+ public static final float CONFIDENCE_TO_TRUST_LEVEL_FACTOR = 0.9f;
+
+ public static final String ENCODING_UTF8 = "utf-8";
+
+ public static final char ROW_PREFIX_SEPARATOR = '|';
+
+ public static final String ID_NAMESPACE_SEPARATOR = "::";
+ public static final String CLASSIFICATION_HIERARCHY_SEPARATOR = ID_NAMESPACE_SEPARATOR;
+ public static final String INFERENCE_PROVENANCE_SEPARATOR = ID_NAMESPACE_SEPARATOR;
+
+ public static final String ROW_PREFIX_RESULT = "50|";
+ public static final String ROW_PREFIX_PROJECT = "40|";
+ public static final String ROW_PREFIX_PERSON = "30|";
+ public static final String ROW_PREFIX_ORGANIZATION = "20|";
+ public static final String ROW_PREFIX_DATASOURCE = "10|";
+
+ public static final String QUALIFIER_BODY_STRING = "body";
+ public static final byte[] QUALIFIER_BODY;
+
+ public static final String SEMANTIC_CLASS_MAIN_TITLE = "main title";
+ public static final String SEMANTIC_CLASS_PUBLICATION = "publication";
+ public static final String SEMANTIC_CLASS_UNKNOWN = "UNKNOWN";
+
+ public static final String SEMANTIC_SCHEME_DNET_PERSON_ROLES = "dnet:personroles";
+ public static final String SEMANTIC_SCHEME_DNET_RELATIONS_RESULT_RESULT = "dnet:result_result_relations";
+ public static final String SEMANTIC_SCHEME_DNET_RELATIONS_RESULT_PROJECT = "dnet:result_project_relations";
+
+ public static final String SEMANTIC_SCHEME_DNET_TITLE = "dnet:dataCite_title";
+ public static final String SEMANTIC_SCHEME_DNET_TITLE_TYPOLOGIES = "dnet:title_typologies";
+ public static final String SEMANTIC_SCHEME_DNET_RESULT_TYPOLOGIES = "dnet:result_typologies";
+ public static final String SEMANTIC_SCHEME_DNET_PROVENANCE_ACTIONS = "dnet:provenanceActions";
+ public static final String SEMANTIC_SCHEME_DNET_LANGUAGES = "dnet:languages";
+ public static final String SEMANTIC_SCHEME_DNET_PID_TYPES = "dnet:pid_types";
+ public static final String SEMANTIC_SCHEME_DNET_CLASSIFICATION_TAXONOMIES = "dnet:subject_classification_typologies";
+
+ // resultResult citation and similarity related
+ public static final String SEMANTIC_SCHEME_DNET_DATASET_PUBLICATION_RELS = "dnet:dataset_publication_rels";
+
+ public static final String SEMANTIC_CLASS_TAXONOMIES_ARXIV = "arxiv";
+ public static final String SEMANTIC_CLASS_TAXONOMIES_WOS = "wos";
+ public static final String SEMANTIC_CLASS_TAXONOMIES_DDC = "ddc";
+ public static final String SEMANTIC_CLASS_TAXONOMIES_MESHEUROPMC = "mesheuropmc";
+ public static final String SEMANTIC_CLASS_TAXONOMIES_ACM = "acm";
+
+ public static final String EXTERNAL_ID_TYPE_INSTANCE_URL = "dnet:instance-url";
+ public static final String EXTERNAL_ID_TYPE_UNKNOWN = "unknown";
+
+ // publication types class ids
+ public static final String SEMANTIC_CLASS_INSTANCE_TYPE_ARTICLE = "0001";
+ public static final String SEMANTIC_CLASS_INSTANCE_TYPE_DATASET = "0021";
+
+ static {
+ try {
+ QUALIFIER_BODY = QUALIFIER_BODY_STRING.getBytes(ENCODING_UTF8);
+
+ } catch (UnsupportedEncodingException e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ private InfoSpaceConstants() {
+ }
+}
diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/WorkflowRuntimeParameters.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/WorkflowRuntimeParameters.java
new file mode 100644
index 0000000000..e71d69027b
--- /dev/null
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/WorkflowRuntimeParameters.java
@@ -0,0 +1,74 @@
+package eu.dnetlib.dhp.common;
+
+import java.util.Map;
+
+import org.apache.commons.lang.StringUtils;
+import org.apache.hadoop.conf.Configuration;
+
+/**
+ * Utility class holding parameter names and method simplifying access to parameters from hadoop context.
+ * @author mhorst
+ *
+ */
+public final class WorkflowRuntimeParameters {
+
+ public static final String OOZIE_ACTION_OUTPUT_FILENAME = "oozie.action.output.properties";
+
+ public static final char DEFAULT_CSV_DELIMITER = ',';
+
+ public static final String UNDEFINED_NONEMPTY_VALUE = "$UNDEFINED$";
+
+ // default values
+ public static final String DNET_SERVICE_READ_TIMEOUT_DEFAULT_VALUE = "60000";
+ public static final String DNET_SERVICE_CONNECTION_TIMEOUT_DEFAULT_VALUE = "60000";
+ // parameter names
+ public static final String DNET_SERVICE_CLIENT_READ_TIMEOUT = "dnet.service.client.read.timeout";
+ public static final String DNET_SERVICE_CLIENT_CONNECTION_TIMEOUT = "dnet.service.client.connection.timeout";
+
+ // ----------------- CONSTRUCTORS -----------------------------
+
+ private WorkflowRuntimeParameters() {}
+
+ /**
+ * Retrieves parameter from hadoop context configuration when set to value different than {@link WorkflowRuntimeParameters#UNDEFINED_NONEMPTY_VALUE}.
+ */
+ public static String getParamValue(String paramName, Configuration configuration) {
+ String paramValue = configuration.get(paramName);
+ if (StringUtils.isNotBlank(paramValue) && !UNDEFINED_NONEMPTY_VALUE.equals(paramValue)) {
+ return paramValue;
+ } else {
+ return null;
+ }
+ }
+
+ /**
+ * Retrieves {@link Integer} parameter from hadoop context configuration when set to non-empty value different than {@link WorkflowRuntimeParameters#UNDEFINED_NONEMPTY_VALUE}.
+ * Null is returned when parameter was not set.
+ * @throws {@link NumberFormatException} if parameter value does not contain a parsable integer
+ */
+ public static Integer getIntegerParamValue(String paramName, Configuration configuration) throws NumberFormatException {
+ String paramValue = getParamValue(paramName, configuration);
+ return paramValue!=null?Integer.valueOf(paramValue):null;
+ }
+
+ /**
+ * Retrieves parameter from hadoop context configuration when set to value different than {@link WorkflowRuntimeParameters#UNDEFINED_NONEMPTY_VALUE}.
+ * If requested parameter was not set, fallback parameter is retrieved using the same logic.
+ */
+ public static String getParamValue(String paramName, String fallbackParamName, Configuration configuration) {
+ String resultCandidate = getParamValue(paramName, configuration);
+ return resultCandidate!=null?resultCandidate:getParamValue(fallbackParamName, configuration);
+ }
+
+ /**
+ * Provides parameter value. Returns default value when entry not found among parameters.
+ *
+ * @param paramName parameter name
+ * @param defaultValue parameter default value to be returned when entry not found among parameters
+ * @param parameters map of parameters
+ */
+ public static String getParamValue(String paramName, String defaultValue, Map parameters) {
+ return parameters.containsKey(paramName)?parameters.get(paramName):defaultValue;
+ }
+
+}
diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/counter/NamedCounters.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/counter/NamedCounters.java
new file mode 100644
index 0000000000..5b37d31f17
--- /dev/null
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/counter/NamedCounters.java
@@ -0,0 +1,111 @@
+package eu.dnetlib.dhp.common.counter;
+
+import java.io.Serializable;
+import java.util.Collection;
+import java.util.Map;
+
+import com.google.common.base.Preconditions;
+import com.google.common.collect.Maps;
+
+/**
+ * Class that groups several counters which are identified by name (String value).
+ *
+ * @author madryk
+ */
+public class NamedCounters implements Serializable {
+
+ private static final long serialVersionUID = 1L;
+
+
+ private final Map counters;
+
+
+ //------------------------ CONSTRUCTORS --------------------------
+
+ /**
+ * Creates {@link NamedCounters} with empty initial counters.
+ */
+ public NamedCounters() {
+ this.counters = Maps.newHashMap();
+ }
+
+ /**
+ * Creates {@link NamedCounters} with initial counters.
+ * Starting value of initial counters is zero.
+ *
+ * @param initialCounterNames - names of initial counters
+ */
+ public NamedCounters(String[] initialCounterNames) {
+ Preconditions.checkNotNull(initialCounterNames);
+
+ this.counters = Maps.newHashMap();
+
+ for (String initialCounterName : initialCounterNames) {
+ this.counters.put(initialCounterName, 0L);
+ }
+ }
+
+ /**
+ * Creates {@link NamedCounters} with initial counters.
+ * Starting value of initial counters is zero.
+ *
+ * @param initialCounterNamesEnumClass - enum class providing names of initial counters
+ */
+ public > NamedCounters(Class initialCounterNamesEnumClass) {
+ Preconditions.checkNotNull(initialCounterNamesEnumClass);
+
+ this.counters = Maps.newHashMap();
+ Enum>[] enumConstants = initialCounterNamesEnumClass.getEnumConstants();
+
+ for (int i=0; i
+ * Internally uses {@link #increment(String, Long)}
+ */
+ public void increment(String counterName) {
+ increment(counterName, 1L);
+ }
+
+ /**
+ * Increments value of a counter with the name specified as parameter by the given value.
+ * If current instance of {@link NamedCounters} does not contain counter
+ * with provided name, then before incrementing counter will be created with starting
+ * value equal to zero.
+ */
+ public void increment(String counterName, Long incrementValue) {
+
+ long oldValue = counters.getOrDefault(counterName, 0L);
+ counters.put(counterName, oldValue + incrementValue);
+ }
+
+ /**
+ * Returns current value of a counter with the name specified as parameter.
+ *
+ * @throws IllegalArgumentException when {@link NamedCounters} does not contain counter
+ * with provided name
+ */
+ public long currentValue(String counterName) {
+
+ if (!counters.containsKey(counterName)) {
+ throw new IllegalArgumentException("Couldn't find counter with name: " + counterName);
+ }
+
+ return counters.get(counterName);
+ }
+
+ /**
+ * Returns names of currently tracked counters.
+ */
+ public Collection counterNames() {
+ return counters.keySet();
+ }
+
+}
diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/counter/NamedCountersAccumulableParam.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/counter/NamedCountersAccumulableParam.java
new file mode 100644
index 0000000000..6686432dd7
--- /dev/null
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/counter/NamedCountersAccumulableParam.java
@@ -0,0 +1,48 @@
+package eu.dnetlib.dhp.common.counter;
+
+import org.apache.spark.AccumulableParam;
+
+import scala.Tuple2;
+
+/**
+ * Spark {@link AccumulableParam} for tracking multiple counter values using {@link NamedCounters}.
+ *
+ * @author madryk
+ */
+public class NamedCountersAccumulableParam implements AccumulableParam> {
+
+ private static final long serialVersionUID = 1L;
+
+
+ //------------------------ LOGIC --------------------------
+
+ /**
+ * Increments {@link NamedCounters} counter with the name same as the first element of passed incrementValue tuple
+ * by value defined in the second element of incrementValue tuple.
+ */
+ @Override
+ public NamedCounters addAccumulator(NamedCounters counters, Tuple2 incrementValue) {
+ counters.increment(incrementValue._1, incrementValue._2);
+ return counters;
+ }
+
+ /**
+ * Merges two passed {@link NamedCounters}.
+ */
+ @Override
+ public NamedCounters addInPlace(NamedCounters counters1, NamedCounters counters2) {
+ for (String counterName2 : counters2.counterNames()) {
+ counters1.increment(counterName2, counters2.currentValue(counterName2));
+ }
+ return counters1;
+ }
+
+ /**
+ * Returns passed initialCounters value without any modifications.
+ */
+ @Override
+ public NamedCounters zero(NamedCounters initialCounters) {
+ return initialCounters;
+ }
+
+}
diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/counter/NamedCountersFileWriter.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/counter/NamedCountersFileWriter.java
new file mode 100644
index 0000000000..bebb82b6e0
--- /dev/null
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/counter/NamedCountersFileWriter.java
@@ -0,0 +1,52 @@
+package eu.dnetlib.dhp.common.counter;
+
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.OutputStream;
+import java.util.Properties;
+
+/**
+ * Writer of {@link NamedCounters} object into a properties file.
+ *
+ * @author madryk
+ */
+public class NamedCountersFileWriter {
+
+
+ //------------------------ LOGIC --------------------------
+
+ /**
+ * Writes {@link NamedCounters} as a properties file located under
+ * provided filePath.
+ *
+ * @throws IOException if writing to properties file resulted in an error
+ */
+ public void writeCounters(NamedCounters counters, String filePath) throws IOException {
+
+ Properties counterProperties = buildPropertiesFromCounters(counters);
+
+ File file = new File(filePath);
+ try (OutputStream os = new FileOutputStream(file)) {
+
+ counterProperties.store(os, null);
+
+ }
+
+ }
+
+
+ //------------------------ PRIVATE --------------------------
+
+ private Properties buildPropertiesFromCounters(NamedCounters counters) {
+
+ Properties properties = new Properties();
+
+ for (String counterName : counters.counterNames()) {
+ long count = counters.currentValue(counterName);
+ properties.put(counterName, String.valueOf(count));
+ }
+
+ return properties;
+ }
+}
diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/fault/FaultUtils.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/fault/FaultUtils.java
new file mode 100644
index 0000000000..bcc6494b2a
--- /dev/null
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/fault/FaultUtils.java
@@ -0,0 +1,67 @@
+package eu.dnetlib.dhp.common.fault;
+
+import java.io.PrintWriter;
+import java.io.StringWriter;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+
+import eu.dnetlib.dhp.audit.schemas.Cause;
+import eu.dnetlib.dhp.audit.schemas.Fault;
+
+/**
+ * {@link Fault} related utilities.
+ * @author mhorst
+ *
+ */
+public final class FaultUtils {
+
+ // ---------------------- CONSTRUCTORS -------------------
+
+ private FaultUtils() {}
+
+ // ---------------------- LOGIC --------------------------
+
+ /**
+ * Generates {@link Fault} instance based on {@link Throwable}.
+ * @param entityId entity identifier
+ * @param throwable
+ * @param auditSupplementaryData
+ * @return {@link Fault} instance generated for {@link Throwable}
+ */
+ public static Fault exceptionToFault(CharSequence entityId, Throwable throwable,
+ Map auditSupplementaryData) {
+ Fault.Builder faultBuilder = Fault.newBuilder();
+ faultBuilder.setInputObjectId(entityId);
+ faultBuilder.setTimestamp(System.currentTimeMillis());
+ faultBuilder.setCode(throwable.getClass().getName());
+ faultBuilder.setMessage(throwable.getMessage());
+ StringWriter strWriter = new StringWriter();
+ PrintWriter pw = new PrintWriter(strWriter);
+ throwable.printStackTrace(pw);
+ pw.close();
+ faultBuilder.setStackTrace(strWriter.toString());
+ if (throwable.getCause()!=null) {
+ faultBuilder.setCauses(appendThrowableToCauses(
+ throwable.getCause(), new ArrayList()));
+ }
+ if (auditSupplementaryData!=null && !auditSupplementaryData.isEmpty()) {
+ faultBuilder.setSupplementaryData(auditSupplementaryData);
+ }
+ return faultBuilder.build();
+ }
+
+ protected static List appendThrowableToCauses(Throwable e, List causes) {
+ Cause.Builder causeBuilder = Cause.newBuilder();
+ causeBuilder.setCode(e.getClass().getName());
+ causeBuilder.setMessage(e.getMessage());
+ causes.add(causeBuilder.build());
+ if (e.getCause()!=null) {
+ return appendThrowableToCauses(
+ e.getCause(),causes);
+ } else {
+ return causes;
+ }
+ }
+
+}
diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/CmdLineParser.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/CmdLineParser.java
new file mode 100644
index 0000000000..b6106044b7
--- /dev/null
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/CmdLineParser.java
@@ -0,0 +1,98 @@
+package eu.dnetlib.dhp.common.java;
+
+import org.apache.commons.cli.CommandLine;
+import org.apache.commons.cli.CommandLineParser;
+import org.apache.commons.cli.GnuParser;
+import org.apache.commons.cli.HelpFormatter;
+import org.apache.commons.cli.Option;
+import org.apache.commons.cli.OptionBuilder;
+import org.apache.commons.cli.Options;
+import org.apache.commons.cli.ParseException;
+import org.apache.commons.lang3.StringUtils;
+
+/**
+ *
+ * @author Mateusz Kobos
+ *
+ */
+@SuppressWarnings("deprecation")
+public final class CmdLineParser {
+ /** HACK: make the names of various types of parameters of the program
+ * more readable, e.g. "--Input_person=..." instead of "-Iperson=...",
+ * "--Output_merged=..." instead of "-Omerged=...". I wasn't able to
+ * get such notation so far using the Apache CLI. */
+ public static final String constructorPrefix = "C";
+ public static final String inputPrefix = "I";
+ public static final String outputPrefix = "O";
+ public static final String specialParametersPrefix = "S";
+ /** HACK: This field should be removed since this list of special
+ * parameters is empty, thus not used anywhere.*/
+ public static final String[] mandatorySpecialParameters = new String[]{};
+ public static final String processParametersPrefix = "P";
+
+ // ------------------------- CONSTRUCTORS ------------------------------
+
+ private CmdLineParser() {}
+
+ // ------------------------- LOGIC -------------------------------------
+
+ public static CommandLine parse(String[] args) {
+ Options options = new Options();
+ @SuppressWarnings("static-access")
+ Option constructorParams = OptionBuilder.withArgName("STRING")
+ .hasArg()
+ .withDescription("Constructor parameter")
+ .withLongOpt("ConstructorParam")
+ .create(constructorPrefix);
+ options.addOption(constructorParams);
+ @SuppressWarnings("static-access")
+ Option inputs = OptionBuilder.withArgName("portName=URI")
+ .hasArgs(2)
+ .withValueSeparator()
+ .withDescription("Path binding for a given input port")
+ .withLongOpt("Input")
+ .create(inputPrefix);
+ options.addOption(inputs);
+ @SuppressWarnings("static-access")
+ Option outputs = OptionBuilder.withArgName("portName=URI")
+ .hasArgs(2)
+ .withValueSeparator()
+ .withDescription("Path binding for a given output port")
+ .create(outputPrefix);
+ options.addOption(outputs);
+ @SuppressWarnings("static-access")
+ Option specialParameter = OptionBuilder.withArgName("parameter_name=string")
+ .hasArgs(2)
+ .withValueSeparator()
+ .withDescription(String.format("Value of special parameter. "
+ + "These are the mandatory parameters={%s}",
+ StringUtils.join(mandatorySpecialParameters, ",")))
+ .create(specialParametersPrefix);
+ options.addOption(specialParameter);
+ @SuppressWarnings("static-access")
+ Option otherParameter = OptionBuilder.withArgName("parameter_name=string")
+ .hasArgs(2)
+ .withValueSeparator()
+ .withDescription(
+ String.format("Value of some other parameter."))
+ .create(processParametersPrefix);
+ options.addOption(otherParameter);
+
+ Option help = new Option("help", "print this message");
+ options.addOption(help);
+
+ CommandLineParser parser = new GnuParser();
+ try {
+ CommandLine cmdLine = parser.parse(options, args);
+ if(cmdLine.hasOption("help")){
+ HelpFormatter formatter = new HelpFormatter();
+ formatter.printHelp("", options );
+ System.exit(1);
+ }
+ return cmdLine;
+ } catch (ParseException e) {
+ throw new CmdLineParserException("Parsing command line arguments failed", e);
+ }
+
+ }
+}
diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/CmdLineParserException.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/CmdLineParserException.java
new file mode 100644
index 0000000000..bbcad8d84a
--- /dev/null
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/CmdLineParserException.java
@@ -0,0 +1,21 @@
+package eu.dnetlib.dhp.common.java;
+
+/**
+ * Command line parsing exception
+ * @author Mateusz Kobos
+ *
+ */
+public class CmdLineParserException extends RuntimeException {
+ /**
+ *
+ */
+ private static final long serialVersionUID = 9219928547611876284L;
+
+ public CmdLineParserException(String message){
+ super(message);
+ }
+
+ public CmdLineParserException(String message, Throwable cause){
+ super(message, cause);
+ }
+}
diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/CmdLineParserForProcessConstruction.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/CmdLineParserForProcessConstruction.java
new file mode 100644
index 0000000000..2c65d08926
--- /dev/null
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/CmdLineParserForProcessConstruction.java
@@ -0,0 +1,45 @@
+package eu.dnetlib.dhp.common.java;
+
+import java.lang.reflect.Constructor;
+
+import org.apache.commons.cli.CommandLine;
+
+/**
+ * Handles parsing the command line arguments provided by the Oozie
+ * to create a {@link Process}
+ * @author Mateusz Kobos
+ *
+ */
+public class CmdLineParserForProcessConstruction {
+ public Process run(CommandLine cmdLine){
+ String[] args = cmdLine.getArgs();
+ if(args.length != 1){
+ throw new CmdLineParserException("The name of the class has "+
+ "to be specified as the first agrument");
+ }
+ String className = args[0];
+
+ String[] constructorParams = cmdLine.getOptionValues(
+ CmdLineParser.constructorPrefix);
+ if(constructorParams == null){
+ constructorParams = new String[0];
+ }
+ try {
+ Class> processClass = Class.forName(className);
+ Constructor> processConstructor = null;
+ if(constructorParams.length == 0){
+ try{
+ processConstructor = processClass.getConstructor();
+ return (Process) processConstructor.newInstance();
+ } catch(NoSuchMethodException ex){
+ }
+ }
+ processConstructor = processClass.getConstructor(String[].class);
+ return (Process) processConstructor.newInstance(
+ (Object)constructorParams);
+ } catch (Exception e) {
+ throw new CmdLineParserException(String.format(
+ "Problem while creating class \"%s\"", className), e);
+ }
+ }
+}
diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/CmdLineParserForProcessRunParameters.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/CmdLineParserForProcessRunParameters.java
new file mode 100644
index 0000000000..31db331032
--- /dev/null
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/CmdLineParserForProcessRunParameters.java
@@ -0,0 +1,100 @@
+package eu.dnetlib.dhp.common.java;
+
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.Properties;
+import java.util.Set;
+
+import org.apache.commons.cli.CommandLine;
+import org.apache.hadoop.fs.Path;
+
+/**
+ * Handles parsing parameters passed to the {@link Process}
+ * @author Mateusz Kobos
+ *
+ */
+public class CmdLineParserForProcessRunParameters {
+ /** Parse the command line arguments.
+ *
+ * @param cmdLine command line arguments
+ * @param ports names of ports that ought to be extracted from command line
+ */
+ public ProcessParameters run(CommandLine cmdLine, Ports ports) {
+
+ Properties inputProperties = cmdLine.getOptionProperties(
+ CmdLineParser.inputPrefix);
+ assumePortNamesMatch(CmdLineParser.inputPrefix, inputProperties,
+ ports.getInput().keySet());
+ Map inputBindings = getBindings(
+ inputProperties, ports.getInput().keySet());
+
+ Properties outputProperties = cmdLine.getOptionProperties(
+ CmdLineParser.outputPrefix);
+ assumePortNamesMatch(CmdLineParser.outputPrefix, outputProperties,
+ ports.getOutput().keySet());
+ Map outputBindings = getBindings(
+ outputProperties, ports.getOutput().keySet());
+
+ PortBindings bindings = new PortBindings(inputBindings, outputBindings);
+
+ Properties specialProperties = cmdLine.getOptionProperties(
+ CmdLineParser.specialParametersPrefix);
+ assumeContainAllMandatoryParameters(
+ specialProperties, CmdLineParser.mandatorySpecialParameters);
+
+ Properties rawProperties = cmdLine.getOptionProperties(
+ CmdLineParser.processParametersPrefix);
+ Map processParameters = new HashMap();
+ for(Entry
+ *
+ *
All the characters from the "Latin-1 Supplement" and "Latin Extended-A"
+ * Unicode blocks are mapped to the "Basic Latin" block. Characters from other
+ * alphabets are generally left intact, although the decomposable ones may be
+ * affected by the procedure.
+ *
+ * @author Lukasz Bolikowski (bolo@icm.edu.pl)
+ *
+ * @author Łukasz Dumiszewski /just copied from coansys-commons/
+ *
+ */
+public final class DiacriticsRemover {
+
+ private static final Character[] from = {
+ 'Æ', 'Ð', 'Ø', 'Þ', 'ß', 'æ', 'ð', 'ø', 'þ', 'Đ', 'đ', 'Ħ',
+ 'ħ', 'ı', 'ĸ', 'Ł', 'ł', 'Ŋ', 'ŋ', 'Œ', 'œ', 'Ŧ', 'ŧ'};
+ private static final String[] to = {
+ "AE", "D", "O", "Y", "ss", "ae", "d", "o", "y", "D", "d", "H",
+ "h", "i", "q", "L", "l", "N", "n", "OE", "oe", "T", "t"};
+
+ private static Map lookup = buildLookup();
+
+
+ //------------------------ CONSTRUCTORS -------------------
+
+
+ private DiacriticsRemover() {}
+
+
+ //------------------------ LOGIC --------------------------
+
+
+ /**
+ * Removes diacritics from a text.
+ *
+ * @param text Text to process.
+ * @return Text without diacritics.
+ */
+ public static String removeDiacritics(String text) {
+ if (text == null) {
+ return null;
+ }
+
+ String tmp = Normalizer.normalize(text, Normalizer.Form.NFKD);
+
+ StringBuilder builder = new StringBuilder();
+ for (int i = 0; i < tmp.length(); i++) {
+ Character ch = tmp.charAt(i);
+ if (Character.getType(ch) == Character.NON_SPACING_MARK) {
+ continue;
+ }
+
+ if (lookup.containsKey(ch)) {
+ builder.append(lookup.get(ch));
+ } else {
+ builder.append(ch);
+ }
+ }
+
+ return builder.toString();
+ }
+
+
+ //------------------------ PRIVATE --------------------------
+
+ private static Map buildLookup() {
+ if (from.length != to.length) {
+ throw new IllegalStateException();
+ }
+
+ Map _lookup = new HashMap();
+ for (int i = 0; i < from.length; i++) {
+ _lookup.put(from[i], to[i]);
+ }
+
+ return _lookup;
+ }
+}
diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/string/LenientComparisonStringNormalizer.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/string/LenientComparisonStringNormalizer.java
new file mode 100644
index 0000000000..bae64ae38b
--- /dev/null
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/string/LenientComparisonStringNormalizer.java
@@ -0,0 +1,130 @@
+/*
+ * This file is part of CoAnSys project.
+ * Copyright (c) 2012-2015 ICM-UW
+ *
+ * CoAnSys is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+
+ * CoAnSys is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with CoAnSys. If not, see .
+ */
+package eu.dnetlib.dhp.common.string;
+
+import java.io.Serializable;
+import java.util.List;
+
+import org.apache.commons.lang3.StringUtils;
+
+import com.google.common.collect.ImmutableList;
+
+/**
+ * An implementation of {@link StringNormalizer} that normalizes strings for non-strict comparisons
+ * in which one does not care about characters other than letters and digits or about differently written diacritics.
+ *
+ * @author Łukasz Dumiszewski
+ *
+ */
+public final class LenientComparisonStringNormalizer implements StringNormalizer, Serializable {
+
+
+ private static final long serialVersionUID = 1L;
+
+
+ private List whitelistCharacters;
+
+
+ //------------------------ CONSTRUCTORS --------------------------
+
+ public LenientComparisonStringNormalizer() {
+ this(ImmutableList.of());
+ }
+
+ /**
+ * @param whitelistCharacters - non alphanumeric characters that will not be removed
+ * during normalization
+ */
+ public LenientComparisonStringNormalizer(List whitelistCharacters) {
+ this.whitelistCharacters = whitelistCharacters;
+ }
+
+
+ //------------------------ LOGIC --------------------------
+
+
+
+ /**
+ * Normalizes the given value.
+ * The normalized strings are better suited for non-strict comparisons, in which one does NOT care about characters that are
+ * neither letters nor digits; about accidental spaces or different diacritics etc.
+ * This method:
+ *
+ *
Replaces all characters that are not letters or digits with spaces (except those on whitelist characters list)
+ *
Replaces white spaces with spaces
+ *
Trims
+ *
Compacts multi-space gaps to one-space gaps
+ *
Removes diacritics
+ *
Changes characters to lower case
+ *
+ * Returns "" if the passed value is null or blank
+ *
+ * @param value the string to normalize
+ * @see DiacriticsRemover#removeDiacritics(String, boolean)
+ *
+ *
+ */
+ public String normalize(String value) {
+
+ if (StringUtils.isBlank(value)) {
+
+ return "";
+
+ }
+
+
+ String result = value;
+
+ result = DiacriticsRemover.removeDiacritics(result);
+
+ result = removeNonLetterDigitCharacters(result);
+
+ result = result.toLowerCase();
+
+ result = result.trim().replaceAll(" +", " ");
+
+ return result;
+ }
+
+
+
+
+ //------------------------ PRIVATE --------------------------
+
+
+ private String removeNonLetterDigitCharacters(final String value) {
+
+ StringBuilder sb = new StringBuilder();
+
+ for (int i = 0; i < value.length(); ++i) {
+
+ char c = value.charAt(i);
+
+ if (Character.isLetterOrDigit(c) || whitelistCharacters.contains(c)) {
+ sb.append(c);
+ } else {
+ sb.append(' ');
+ }
+ }
+
+ return sb.toString();
+ }
+
+
+
+}
diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/string/StringNormalizer.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/string/StringNormalizer.java
new file mode 100644
index 0000000000..6e28422bc0
--- /dev/null
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/string/StringNormalizer.java
@@ -0,0 +1,16 @@
+package eu.dnetlib.dhp.common.string;
+
+/**
+ * String normalizer.
+ *
+ * @author Łukasz Dumiszewski
+ *
+ */
+public interface StringNormalizer {
+
+ /**
+ * Normalizes the given string value.
+ */
+ String normalize(String value);
+
+}
diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/utils/AvroGsonFactory.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/utils/AvroGsonFactory.java
new file mode 100644
index 0000000000..7fcc0506a0
--- /dev/null
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/utils/AvroGsonFactory.java
@@ -0,0 +1,45 @@
+package eu.dnetlib.dhp.common.utils;
+
+import java.lang.reflect.Type;
+
+import com.google.gson.Gson;
+import com.google.gson.GsonBuilder;
+import com.google.gson.JsonDeserializationContext;
+import com.google.gson.JsonDeserializer;
+import com.google.gson.JsonElement;
+import com.google.gson.JsonParseException;
+
+/**
+ * Factory for gson object that supports serializing avro generated classes
+ *
+ * @author madryk
+ *
+ */
+public final class AvroGsonFactory {
+
+ //------------------------ CONSTRUCTORS -------------------
+
+
+ private AvroGsonFactory() {}
+
+
+ //------------------------ LOGIC --------------------------
+
+ public static Gson create() {
+ GsonBuilder builder = new GsonBuilder();
+
+ builder.registerTypeAdapter(CharSequence.class, new CharSequenceDeserializer());
+
+ return builder.create();
+ }
+
+ public static class CharSequenceDeserializer implements JsonDeserializer {
+
+ @Override
+ public CharSequence deserialize(JsonElement json, Type typeOfT, JsonDeserializationContext context)
+ throws JsonParseException {
+ return json.getAsString();
+ }
+
+ }
+}
diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/utils/AvroUtils.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/utils/AvroUtils.java
new file mode 100644
index 0000000000..44dd218b5e
--- /dev/null
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/utils/AvroUtils.java
@@ -0,0 +1,77 @@
+package eu.dnetlib.dhp.common.utils;
+
+import java.lang.reflect.Field;
+
+import org.apache.avro.Schema;
+
+/**
+ *
+ * @author Mateusz Kobos
+ *
+ */
+public final class AvroUtils {
+
+ public final static String primitiveTypePrefix = "org.apache.avro.Schema.Type.";
+
+
+ //------------------------ CONSTRUCTORS -------------------
+
+
+ private AvroUtils() {}
+
+
+ //------------------------ LOGIC --------------------------
+
+
+ /**
+ * For a given name of a class generated from Avro schema return
+ * a JSON schema.
+ *
+ * Apart from a name of a class you can also give a name of one of enums
+ * defined in {@link org.apache.avro.Schema.Type}; in such case an
+ * appropriate primitive type will be returned.
+ *
+ * @param typeName fully qualified name of a class generated from Avro schema,
+ * e.g. {@code eu.dnetlib.dhp.common.avro.Person},
+ * or a fully qualified name of enum defined by
+ * {@link org.apache.avro.Schema.Type},
+ * e.g. {@link org.apache.avro.Schema.Type.STRING}.
+ * @return JSON string
+ */
+ public static Schema toSchema(String typeName) {
+ Schema schema = null;
+ if(typeName.startsWith(primitiveTypePrefix)){
+ String shortName = typeName.substring(
+ primitiveTypePrefix.length(), typeName.length());
+ schema = getPrimitiveTypeSchema(shortName);
+ } else {
+ schema = getAvroClassSchema(typeName);
+ }
+ return schema;
+ }
+
+ private static Schema getPrimitiveTypeSchema(String shortName){
+ Schema.Type type = Schema.Type.valueOf(shortName);
+ return Schema.create(type);
+ }
+
+ private static Schema getAvroClassSchema(String className){
+ try {
+ Class> avroClass = Class.forName(className);
+ Field f = avroClass.getDeclaredField("SCHEMA$");
+ return (Schema) f.get(null);
+ } catch (ClassNotFoundException e) {
+ throw new RuntimeException(
+ "Class \""+className+"\" does not exist", e);
+ } catch (SecurityException e) {
+ throw new RuntimeException(e);
+ } catch (NoSuchFieldException e) {
+ throw new RuntimeException(e);
+ } catch (IllegalArgumentException e) {
+ throw new RuntimeException(e);
+ } catch (IllegalAccessException e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+}
diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/utils/ByteArrayUtils.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/utils/ByteArrayUtils.java
new file mode 100644
index 0000000000..152271ab7c
--- /dev/null
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/utils/ByteArrayUtils.java
@@ -0,0 +1,45 @@
+package eu.dnetlib.dhp.common.utils;
+
+/**
+ * Byte array utility class.
+ * @author mhorst
+ *
+ */
+public final class ByteArrayUtils {
+
+ //------------------------ CONSTRUCTORS -------------------
+
+ private ByteArrayUtils() {}
+
+ //------------------------ LOGIC --------------------------
+
+ /**
+ * Does this byte array begin with match array content?
+ * @param source Byte array to examine
+ * @param match Byte array to locate in source
+ * @return true If the starting bytes are equal
+ */
+ public static boolean startsWith(byte[] source, byte[] match) {
+ return startsWith(source, 0, match);
+ }
+
+ /**
+ * Does this byte array begin with match array content?
+ * @param source Byte array to examine
+ * @param offset An offset into the source array
+ * @param match Byte array to locate in source
+ * @return true If the starting bytes are equal
+ */
+ public static boolean startsWith(byte[] source, int offset, byte[] match) {
+ if (match.length > (source.length - offset)) {
+ return false;
+ }
+ for (int i = 0; i < match.length; i++) {
+ if (source[offset + i] != match[i]) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+}
diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/utils/EmptyDatastoreVerifierProcess.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/utils/EmptyDatastoreVerifierProcess.java
new file mode 100644
index 0000000000..1e6e041498
--- /dev/null
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/utils/EmptyDatastoreVerifierProcess.java
@@ -0,0 +1,89 @@
+package eu.dnetlib.dhp.common.utils;
+
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.OutputStream;
+import java.security.InvalidParameterException;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Properties;
+
+import eu.dnetlib.dhp.common.java.PortBindings;
+import eu.dnetlib.dhp.common.java.Ports;
+import eu.dnetlib.dhp.common.java.Process;
+import eu.dnetlib.dhp.common.java.io.CloseableIterator;
+import eu.dnetlib.dhp.common.java.io.DataStore;
+import eu.dnetlib.dhp.common.java.io.FileSystemPath;
+import eu.dnetlib.dhp.common.java.porttype.AnyPortType;
+import eu.dnetlib.dhp.common.java.porttype.PortType;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+
+import static eu.dnetlib.dhp.common.WorkflowRuntimeParameters.OOZIE_ACTION_OUTPUT_FILENAME;
+
+/**
+ * Simple process verifying whether given datastore is empty.
+ * @author mhorst
+ *
+ */
+public class EmptyDatastoreVerifierProcess implements Process {
+
+ public static final String INPUT_PORT_NAME = "input";
+
+ public static final String DEFAULT_ENCODING = "UTF-8";
+
+ public static final String OUTPUT_PROPERTY_IS_EMPTY = "isEmpty";
+
+ /**
+ * Ports handled by this module.
+ */
+ private final Ports ports;
+
+
+ // ------------------------ CONSTRUCTORS --------------------------
+
+ public EmptyDatastoreVerifierProcess() {
+// preparing ports
+ Map input = new HashMap();
+ input.put(INPUT_PORT_NAME, new AnyPortType());
+ Map output = Collections.emptyMap();
+ ports = new Ports(input, output);
+ }
+
+ @Override
+ public Map getInputPorts() {
+ return ports.getInput();
+ }
+
+ @Override
+ public Map getOutputPorts() {
+ return ports.getOutput();
+ }
+
+ @Override
+ public void run(PortBindings portBindings, Configuration conf, Map parameters) throws Exception {
+ if (!portBindings.getInput().containsKey(INPUT_PORT_NAME)) {
+ throw new InvalidParameterException("missing input port!");
+ }
+
+ try (CloseableIterator> closeableIt = getIterator(conf, portBindings.getInput().get(INPUT_PORT_NAME))) {
+ File file = new File(System.getProperty(OOZIE_ACTION_OUTPUT_FILENAME));
+ Properties props = new Properties();
+ props.setProperty(OUTPUT_PROPERTY_IS_EMPTY, Boolean.toString(!closeableIt.hasNext()));
+ try (OutputStream os = new FileOutputStream(file)) {
+ props.store(os, "");
+ }
+ }
+ }
+
+ /**
+ * Returns iterator over datastore.
+ */
+ protected CloseableIterator> getIterator(Configuration conf, Path path) throws IOException {
+ return DataStore.getReader(new FileSystemPath(FileSystem.get(conf), path));
+ }
+
+}
diff --git a/dhp-schemas/README.md b/dhp-schemas/README.md
new file mode 100644
index 0000000000..473ad4cf19
--- /dev/null
+++ b/dhp-schemas/README.md
@@ -0,0 +1,3 @@
+Description of the project
+--------------------------
+This project defines **serialization schemas** of Avro data store files that are used to pass data between workflow nodes in the system.
diff --git a/dhp-schemas/pom.xml b/dhp-schemas/pom.xml
new file mode 100644
index 0000000000..2c6e18f271
--- /dev/null
+++ b/dhp-schemas/pom.xml
@@ -0,0 +1,62 @@
+
+
+ 4.0.0
+
+
+ eu.dnetlib.dhp
+ dhp
+ 1.0.0-SNAPSHOT
+
+
+ dhp-schemas
+ jar
+
+
+
+ org.apache.avro
+ avro
+
+
+
+
+
+
+
+ org.apache.avro
+ avro-maven-plugin
+
+
+ generate-sources
+
+ schema
+ idl-protocol
+
+
+
+
+
+
+ org.codehaus.mojo
+ build-helper-maven-plugin
+
+
+ add-source
+ generate-sources
+
+ add-source
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/dhp-schemas/src/main/avro/eu/dnetlib/dhp/audit/Fault.avdl b/dhp-schemas/src/main/avro/eu/dnetlib/dhp/audit/Fault.avdl
new file mode 100644
index 0000000000..3bce821a4b
--- /dev/null
+++ b/dhp-schemas/src/main/avro/eu/dnetlib/dhp/audit/Fault.avdl
@@ -0,0 +1,29 @@
+@namespace("eu.dnetlib.dhp.audit.schemas")
+protocol DHP {
+
+ record Cause {
+// generic cause code, root exception class name when derived from exception
+ string code;
+// cause message
+ union { null , string } message = null;
+ }
+
+ record Fault {
+// input object identifier
+ string inputObjectId;
+// fault creation timestamp
+ long timestamp;
+// generic fault code, root exception class name when derived from exception
+ string code;
+// fault message
+ union { null , string } message = null;
+// stack trace
+ union { null , string } stackTrace = null;
+// fault causes, array is indexed with cause depth
+ union { null , array } causes = null;
+// Other supplementary data related to specific type of fault.
+// See parameters description in oozie workflow.xml documentation of modules
+// that use this structure for information what exactly can be stored as supplementary data.
+ union { null , map } supplementaryData = null;
+ }
+}
diff --git a/dhp-schemas/src/main/avro/eu/dnetlib/dhp/common/ReportEntry.avdl b/dhp-schemas/src/main/avro/eu/dnetlib/dhp/common/ReportEntry.avdl
new file mode 100644
index 0000000000..99406b4f06
--- /dev/null
+++ b/dhp-schemas/src/main/avro/eu/dnetlib/dhp/common/ReportEntry.avdl
@@ -0,0 +1,16 @@
+@namespace("eu.dnetlib.dhp.common.schemas")
+protocol DHP{
+
+ enum ReportEntryType {
+ COUNTER, DURATION
+ }
+
+
+ record ReportEntry {
+
+ string key;
+ ReportEntryType type;
+ string value;
+
+ }
+}
diff --git a/dhp-schemas/src/main/avro/eu/dnetlib/dhp/importer/NativeRecord.avdl b/dhp-schemas/src/main/avro/eu/dnetlib/dhp/importer/NativeRecord.avdl
new file mode 100644
index 0000000000..9ad5435fac
--- /dev/null
+++ b/dhp-schemas/src/main/avro/eu/dnetlib/dhp/importer/NativeRecord.avdl
@@ -0,0 +1,21 @@
+@namespace("eu.dnetlib.dhp.importer.schemas")
+protocol DHP {
+
+ enum RecordFormat {
+ XML, JSON
+ }
+
+ record ImportedRecord {
+
+ // record identifier
+ string id;
+
+ RecordFormat format;
+
+ // format name (OAF, OAI_DC, Datacite, etc) for which there is a parser implementation
+ string formatName;
+
+ // record body
+ string body;
+ }
+}
diff --git a/dhp-wf/dhp-wf-import/pom.xml b/dhp-wf/dhp-wf-import/pom.xml
new file mode 100644
index 0000000000..6bf4ba8250
--- /dev/null
+++ b/dhp-wf/dhp-wf-import/pom.xml
@@ -0,0 +1,105 @@
+
+
+
+ eu.dnetlib.dhp
+ dhp-wf
+ 1.0.0-SNAPSHOT
+
+ 4.0.0
+
+ dhp-wf-import
+
+
+
+
+ ${project.groupId}
+ dhp-common
+ ${project.version}
+
+
+
+ ${project.groupId}
+ dhp-common
+ ${project.version}
+ test-jar
+ test
+
+
+
+ ${project.groupId}
+ dhp-schemas
+ ${project.version}
+
+
+
+
+ org.apache.hadoop
+ hadoop-mapreduce-client-core
+
+
+ org.apache.hadoop
+ hadoop-common
+
+
+
+ com.googlecode.json-simple
+ json-simple
+
+
+ commons-cli
+ commons-cli
+
+
+ eu.dnetlib
+ dnet-objectstore-rmi
+
+
+ eu.dnetlib
+ cnr-rmi-api
+
+
+ eu.dnetlib
+ cnr-resultset-client
+
+
+ eu.dnetlib
+ dnet-openaireplus-mapping-utils
+
+
+
+ org.springframework
+ spring-context
+
+
+ org.apache.cxf
+ cxf-rt-frontend-jaxws
+
+
+ com.google.code.gson
+ gson
+
+
+
+ org.apache.spark
+ spark-core_2.10
+
+
+ org.apache.spark
+ spark-sql_2.10
+
+
+ com.databricks
+ spark-avro_2.10
+
+
+ org.mongodb.spark
+ mongo-spark-connector_2.10
+
+
+
+
+
+
+
diff --git a/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/DataFileRecordReceiver.java b/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/DataFileRecordReceiver.java
new file mode 100644
index 0000000000..214d6691dd
--- /dev/null
+++ b/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/DataFileRecordReceiver.java
@@ -0,0 +1,29 @@
+package eu.dnetlib.dhp.wf.importer;
+
+import java.io.IOException;
+
+import org.apache.avro.file.DataFileWriter;
+
+/**
+ * {@link DataFileWriter} based record receiver.
+ * @author mhorst
+ *
+ */
+public class DataFileRecordReceiver implements RecordReceiver {
+
+ private final DataFileWriter writer;
+
+ /**
+ * Default constructor.
+ * @param writer
+ */
+ public DataFileRecordReceiver(DataFileWriter writer) {
+ this.writer = writer;
+ }
+
+ @Override
+ public void receive(T object) throws IOException {
+ this.writer.append(object);
+ }
+
+}
diff --git a/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/DataFileRecordReceiverWithCounter.java b/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/DataFileRecordReceiverWithCounter.java
new file mode 100644
index 0000000000..955f18065c
--- /dev/null
+++ b/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/DataFileRecordReceiverWithCounter.java
@@ -0,0 +1,50 @@
+package eu.dnetlib.dhp.wf.importer;
+
+import java.io.IOException;
+
+import org.apache.avro.file.DataFileWriter;
+
+/**
+ * {@link DataFileWriter} based record receiver with counter of
+ * received records.
+ *
+ * @author madryk
+ */
+public class DataFileRecordReceiverWithCounter extends DataFileRecordReceiver {
+
+ private long receivedCount = 0L;
+
+
+ //------------------------ CONSTRUCTORS --------------------------
+
+ /**
+ * Default constructor
+ *
+ * @param writer - writer of the received records
+ */
+ public DataFileRecordReceiverWithCounter(DataFileWriter writer) {
+ super(writer);
+ }
+
+
+ //------------------------ GETTERS --------------------------
+
+ /**
+ * Returns number of received records
+ */
+ public long getReceivedCount() {
+ return receivedCount;
+ }
+
+
+ //------------------------ LOGIC --------------------------
+
+ /**
+ * Receives passed record and increments the counter.
+ */
+ @Override
+ public void receive(T record) throws IOException {
+ super.receive(record);
+ ++receivedCount;
+ }
+}
diff --git a/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/ImportWorkflowRuntimeParameters.java b/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/ImportWorkflowRuntimeParameters.java
new file mode 100644
index 0000000000..40f673ee08
--- /dev/null
+++ b/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/ImportWorkflowRuntimeParameters.java
@@ -0,0 +1,52 @@
+package eu.dnetlib.dhp.wf.importer;
+
+/**
+ * Import realated workflow parameters.
+ * @author mhorst
+ *
+ */
+public final class ImportWorkflowRuntimeParameters {
+
+ // parameter names
+
+ public static final String IMPORT_INFERENCE_PROVENANCE_BLACKLIST = "import.inference.provenance.blacklist";
+ public static final String IMPORT_SKIP_DELETED_BY_INFERENCE = "import.skip.deleted.by.inference";
+ public static final String IMPORT_TRUST_LEVEL_THRESHOLD = "import.trust.level.threshold";
+ public static final String IMPORT_APPROVED_DATASOURCES_CSV = "import.approved.datasources.csv";
+ public static final String IMPORT_APPROVED_COLUMNFAMILIES_CSV = "import.approved.columnfamilies.csv";
+ public static final String IMPORT_MERGE_BODY_WITH_UPDATES = "import.merge.body.with.updates";
+ public static final String IMPORT_CONTENT_APPROVED_OBJECSTORES_CSV = "import.content.approved.objectstores.csv";
+ public static final String IMPORT_CONTENT_BLACKLISTED_OBJECSTORES_CSV = "import.content.blacklisted.objectstores.csv";
+
+ public static final String IMPORT_CONTENT_OBJECT_STORE_LOC = "import.content.object.store.location";
+ public static final String IMPORT_CONTENT_OBJECT_STORE_IDS_CSV = "import.content.object.store.ids.csv";
+ public static final String IMPORT_CONTENT_MAX_FILE_SIZE_MB = "import.content.max.file.size.mb";
+ public static final String IMPORT_CONTENT_CONNECTION_TIMEOUT = "import.content.connection.timeout";
+ public static final String IMPORT_CONTENT_READ_TIMEOUT = "import.content.read.timeout";
+
+ public static final String IMPORT_MDSTORE_IDS_CSV = "import.mdstore.ids.csv";
+ public static final String IMPORT_MDSTORE_SERVICE_LOCATION = "import.mdstore.service.location";
+ public static final String IMPORT_MDSTORE_RECORD_MAXLENGTH = "import.mdstore.record.maxlength";
+
+ public static final String IMPORT_ISLOOKUP_SERVICE_LOCATION = "import.islookup.service.location";
+ public static final String IMPORT_VOCABULARY_CODE = "import.vocabulary.code";
+ public static final String IMPORT_VOCABULARY_OUTPUT_FILENAME = "import.vocabulary.output.filename";
+
+ public static final String IMPORT_RESULT_SET_CLIENT_READ_TIMEOUT = "import.resultset.client.read.timeout";
+ public static final String IMPORT_RESULT_SET_CLIENT_CONNECTION_TIMEOUT = "import.resultset.client.connection.timeout";
+ public static final String IMPORT_RESULT_SET_PAGESIZE = "import.resultset.pagesize";
+
+
+ public static final String HBASE_ENCODING = "hbase.table.encoding";
+
+ public static final String IMPORT_FACADE_FACTORY_CLASS = "import.facade.factory.classname";
+
+ // default values
+
+ public static final String RESULTSET_READ_TIMEOUT_DEFAULT_VALUE = "60000";
+ public static final String RESULTSET_CONNECTION_TIMEOUT_DEFAULT_VALUE = "60000";
+ public static final String RESULTSET_PAGESIZE_DEFAULT_VALUE = "100";
+
+ private ImportWorkflowRuntimeParameters() {}
+
+}
diff --git a/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/RecordReceiver.java b/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/RecordReceiver.java
new file mode 100644
index 0000000000..c0a5e89507
--- /dev/null
+++ b/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/RecordReceiver.java
@@ -0,0 +1,14 @@
+package eu.dnetlib.dhp.wf.importer;
+
+import java.io.IOException;
+
+/**
+ * Record receiver interface.
+ * @author mhorst
+ *
+ * @param
+ */
+public interface RecordReceiver {
+
+ void receive(T object) throws IOException;
+}
diff --git a/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/AbstractResultSetAwareWebServiceFacade.java b/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/AbstractResultSetAwareWebServiceFacade.java
new file mode 100644
index 0000000000..0a3cd6fb4c
--- /dev/null
+++ b/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/AbstractResultSetAwareWebServiceFacade.java
@@ -0,0 +1,104 @@
+package eu.dnetlib.dhp.wf.importer.facade;
+
+import java.util.Map;
+
+import javax.xml.ws.BindingProvider;
+import javax.xml.ws.wsaddressing.W3CEndpointReferenceBuilder;
+
+import org.apache.log4j.Logger;
+
+import eu.dnetlib.enabling.tools.JaxwsServiceResolverImpl;
+
+/**
+ * Abstract class utilized by all WebService facades.
+ * @author mhorst
+ *
+ */
+public abstract class AbstractResultSetAwareWebServiceFacade {
+
+ private final Logger log = Logger.getLogger(this.getClass());
+
+ /**
+ * Web service.
+ */
+ private final T service;
+
+ /**
+ * ResultSet read timeout.
+ */
+ private final long resultSetReadTimeout;
+
+ /**
+ * ResultSet connection timeout.
+ */
+ private final long resultSetConnectionTimeout;
+
+ /**
+ * ResultSet page size.
+ */
+ private final int resultSetPageSize;
+
+
+ //------------------------ CONSTRUCTORS -------------------
+
+ /**
+ * Instantiates underlying service.
+ * @param clazz webservice class
+ * @param serviceLocation webservice location
+ * @param serviceReadTimeout service read timeout
+ * @param serviceConnectionTimeout service connection timeout
+ * @param resultSetReadTimeout resultset read timeout
+ * @param resultSetConnectionTimeout resultset connection timeout
+ * @param resultSetPageSize resultset page size
+ */
+ protected AbstractResultSetAwareWebServiceFacade(Class clazz, String serviceLocation,
+ long serviceReadTimeout, long serviceConnectionTimeout,
+ long resultSetReadTimeout, long resultSetConnectionTimeout, int resultSetPageSize) {
+ W3CEndpointReferenceBuilder eprBuilder = new W3CEndpointReferenceBuilder();
+ eprBuilder.address(serviceLocation);
+ eprBuilder.build();
+ this.service = new JaxwsServiceResolverImpl().getService(clazz, eprBuilder.build());
+ if (this.service instanceof BindingProvider) {
+ log.info(String.format("setting timeouts for %s: read timeout (%s) and connect timeout (%s)",
+ BindingProvider.class, serviceReadTimeout, serviceConnectionTimeout));
+ final Map requestContext = ((BindingProvider) service).getRequestContext();
+
+ // can't be sure about which will be used. Set them all.
+ requestContext.put("com.sun.xml.internal.ws.request.timeout", serviceReadTimeout);
+ requestContext.put("com.sun.xml.internal.ws.connect.timeout", serviceConnectionTimeout);
+
+ requestContext.put("com.sun.xml.ws.request.timeout", serviceReadTimeout);
+ requestContext.put("com.sun.xml.ws.connect.timeout", serviceConnectionTimeout);
+
+ requestContext.put("javax.xml.ws.client.receiveTimeout", serviceReadTimeout);
+ requestContext.put("javax.xml.ws.client.connectionTimeout", serviceConnectionTimeout);
+ }
+
+ this.resultSetReadTimeout = resultSetReadTimeout;
+ this.resultSetConnectionTimeout = resultSetConnectionTimeout;
+ this.resultSetPageSize = resultSetPageSize;
+ }
+
+
+ //------------------------ GETTERS -------------------------
+
+ public T getService() {
+ return service;
+ }
+
+
+ public long getResultSetReadTimeout() {
+ return resultSetReadTimeout;
+ }
+
+
+ public long getResultSetConnectionTimeout() {
+ return resultSetConnectionTimeout;
+ }
+
+
+ public int getResultSetPageSize() {
+ return resultSetPageSize;
+ }
+
+}
diff --git a/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/ISLookupFacade.java b/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/ISLookupFacade.java
new file mode 100644
index 0000000000..c156ae1cc8
--- /dev/null
+++ b/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/ISLookupFacade.java
@@ -0,0 +1,17 @@
+package eu.dnetlib.dhp.wf.importer.facade;
+
+/**
+ * ISLookup service facade.
+ *
+ * @author mhorst
+ *
+ */
+public interface ISLookupFacade {
+
+ /**
+ * Provides all profiles matching given query
+ * @param xPathQuery XPath query
+ */
+ Iterable searchProfile(String xPathQuery) throws ServiceFacadeException;
+
+}
diff --git a/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/MDStoreFacade.java b/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/MDStoreFacade.java
new file mode 100644
index 0000000000..f50b02b980
--- /dev/null
+++ b/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/MDStoreFacade.java
@@ -0,0 +1,17 @@
+package eu.dnetlib.dhp.wf.importer.facade;
+
+/**
+ * MDStore service facade.
+ *
+ * @author mhorst
+ *
+ */
+public interface MDStoreFacade {
+
+ /**
+ * Delivers all records for given MDStore identifier
+ * @param mdStoreId MDStore identifier
+ */
+ Iterable deliverMDRecords(String mdStoreId) throws ServiceFacadeException;
+
+}
diff --git a/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/ObjectStoreFacade.java b/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/ObjectStoreFacade.java
new file mode 100644
index 0000000000..0e1aa19ef9
--- /dev/null
+++ b/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/ObjectStoreFacade.java
@@ -0,0 +1,19 @@
+package eu.dnetlib.dhp.wf.importer.facade;
+
+/**
+ * ObjectStore service facade.
+ *
+ * @author mhorst
+ *
+ */
+public interface ObjectStoreFacade {
+
+ /**
+ * Returns metadata records from given objectstore created in specified time range.
+ * @param objectStoreId object store identifier
+ * @param from from time in millis
+ * @param until until time in millis
+ */
+ Iterable deliverObjects(String objectStoreId, long from, long until) throws ServiceFacadeException;
+
+}
diff --git a/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/ServiceFacadeException.java b/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/ServiceFacadeException.java
new file mode 100644
index 0000000000..9776306fa2
--- /dev/null
+++ b/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/ServiceFacadeException.java
@@ -0,0 +1,27 @@
+package eu.dnetlib.dhp.wf.importer.facade;
+
+/**
+ * Service facade generic exception.
+ *
+ * @author mhorst
+ *
+ */
+public class ServiceFacadeException extends Exception {
+
+ private static final long serialVersionUID = 0L;
+
+ //------------------------ CONSTRUCTORS -------------------
+
+ public ServiceFacadeException(String message, Throwable cause) {
+ super(message, cause);
+ }
+
+ public ServiceFacadeException(String message) {
+ super(message);
+ }
+
+ public ServiceFacadeException(Throwable cause) {
+ super(cause);
+ }
+
+}
diff --git a/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/ServiceFacadeFactory.java b/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/ServiceFacadeFactory.java
new file mode 100644
index 0000000000..94b9307c47
--- /dev/null
+++ b/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/ServiceFacadeFactory.java
@@ -0,0 +1,20 @@
+package eu.dnetlib.dhp.wf.importer.facade;
+
+import java.util.Map;
+
+/**
+ * Generic service facade factory. All implementations must be instantiable with no-argument construtor.
+ *
+ * @author mhorst
+ *
+ */
+public interface ServiceFacadeFactory {
+
+ /**
+ * Creates service of given type configured with parameters.
+ *
+ * @param parameters service configuration
+ *
+ */
+ T instantiate(Map parameters);
+}
diff --git a/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/ServiceFacadeUtils.java b/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/ServiceFacadeUtils.java
new file mode 100644
index 0000000000..53a76d761d
--- /dev/null
+++ b/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/ServiceFacadeUtils.java
@@ -0,0 +1,80 @@
+package eu.dnetlib.dhp.wf.importer.facade;
+
+import static eu.dnetlib.dhp.wf.importer.ImportWorkflowRuntimeParameters.IMPORT_FACADE_FACTORY_CLASS;
+
+import java.lang.reflect.Constructor;
+import java.util.Map;
+
+import org.apache.commons.lang.StringUtils;
+import org.apache.hadoop.conf.Configuration;
+
+import com.google.common.collect.ImmutableMap;
+
+import eu.dnetlib.dhp.wf.importer.ImportWorkflowRuntimeParameters;
+
+/**
+ * Service facade utility methods.
+ * @author mhorst
+ *
+ */
+public final class ServiceFacadeUtils {
+
+ //------------------------ CONSTRUCTORS -------------------
+
+ private ServiceFacadeUtils() {}
+
+ //------------------------ LOGIC --------------------------
+
+ /**
+ * Instantiates service based on provided parameters.
+ *
+ * Service factory class name is mandatory and has to be provided as {@value ImportWorkflowRuntimeParameters#IMPORT_FACADE_FACTORY_CLASS} parameter.
+ * Other parameters will be used by factory itself. Factory must be instantiable with no-argument construtor.
+ *
+ * @param parameters set of parameters required for service instantiation
+ *
+ */
+ public static T instantiate(Map parameters) throws ServiceFacadeException {
+ String serviceFactoryClassName = parameters.get(IMPORT_FACADE_FACTORY_CLASS);
+ if (StringUtils.isBlank(serviceFactoryClassName)) {
+ throw new ServiceFacadeException("unknown service facade factory, no " + IMPORT_FACADE_FACTORY_CLASS + " parameter provided!");
+ }
+ try {
+ Class> clazz = Class.forName(serviceFactoryClassName);
+ Constructor> constructor = clazz.getConstructor();
+ @SuppressWarnings("unchecked")
+ ServiceFacadeFactory serviceFactory = (ServiceFacadeFactory) constructor.newInstance();
+ return serviceFactory.instantiate(parameters);
+ } catch (Exception e) {
+ throw new ServiceFacadeException("exception occurred while instantiating service by facade factory: " + IMPORT_FACADE_FACTORY_CLASS, e);
+ }
+
+ }
+
+ /**
+ * Instantiates service based on provided configuration.
+ *
+ * Service factory class name is mandatory and has to be provided as {@value ImportWorkflowRuntimeParameters#IMPORT_FACADE_FACTORY_CLASS} configuration entry.
+ * Other parameters will be used by factory itself. Factory must be instantiable with no-argument construtor.
+ *
+ * @param config set of configuration entries required for service instantiation
+ */
+ public static T instantiate(Configuration config) throws ServiceFacadeException {
+ return instantiate(buildParameters(config));
+ }
+
+
+ // ------------------------ PRIVATE --------------------------
+
+ /**
+ * Converts configuration entries into plain map.
+ */
+ private static Map buildParameters(Configuration config) {
+ ImmutableMap.Builder builder = ImmutableMap.builder();
+ for (Map.Entry entry : config) {
+ builder.put(entry);
+ }
+ return builder.build();
+ }
+
+}
diff --git a/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/WebServiceISLookupFacade.java b/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/WebServiceISLookupFacade.java
new file mode 100644
index 0000000000..7c787f2f8c
--- /dev/null
+++ b/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/WebServiceISLookupFacade.java
@@ -0,0 +1,55 @@
+package eu.dnetlib.dhp.wf.importer.facade;
+
+import java.util.Collections;
+
+import org.apache.log4j.Logger;
+
+import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpDocumentNotFoundException;
+import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
+import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
+
+/**
+ * WebService based database facade.
+ *
+ * @author mhorst
+ *
+ */
+public class WebServiceISLookupFacade extends AbstractResultSetAwareWebServiceFacade implements ISLookupFacade {
+
+ private static final Logger log = Logger.getLogger(WebServiceISLookupFacade.class);
+
+
+ //------------------------ CONSTRUCTORS -------------------
+
+ /**
+ * @param serviceLocation database service location
+ * @param serviceReadTimeout service read timeout
+ * @param serviceConnectionTimeout service connection timeout
+ * @param resultSetReadTimeout result set providing database results read timeout
+ * @param resultSetConnectionTimeout result set connection timeout
+ * @param resultSetPageSize result set data chunk size
+ */
+ public WebServiceISLookupFacade(String serviceLocation,
+ long serviceReadTimeout, long serviceConnectionTimeout,
+ long resultSetReadTimeout, long resultSetConnectionTimeout, int resultSetPageSize) {
+ super(ISLookUpService.class, serviceLocation,
+ serviceReadTimeout, serviceConnectionTimeout,
+ resultSetReadTimeout, resultSetConnectionTimeout, resultSetPageSize);
+ }
+
+ //------------------------ LOGIC --------------------------
+
+ @Override
+ public Iterable searchProfile(String xPathQuery) throws ServiceFacadeException {
+ try {
+ return getService().quickSearchProfile(xPathQuery);
+ } catch (ISLookUpDocumentNotFoundException e) {
+ log.error("unable to find profile for query: " + xPathQuery, e);
+ return Collections.emptyList();
+ } catch (ISLookUpException e) {
+ throw new ServiceFacadeException("searching profiles in ISLookup failed with query '" + xPathQuery + "'", e);
+ }
+
+ }
+
+}
diff --git a/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/WebServiceISLookupFacadeFactory.java b/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/WebServiceISLookupFacadeFactory.java
new file mode 100644
index 0000000000..6557ead948
--- /dev/null
+++ b/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/WebServiceISLookupFacadeFactory.java
@@ -0,0 +1,45 @@
+package eu.dnetlib.dhp.wf.importer.facade;
+
+import static eu.dnetlib.dhp.common.WorkflowRuntimeParameters.DNET_SERVICE_CLIENT_CONNECTION_TIMEOUT;
+import static eu.dnetlib.dhp.common.WorkflowRuntimeParameters.DNET_SERVICE_CLIENT_READ_TIMEOUT;
+import static eu.dnetlib.dhp.common.WorkflowRuntimeParameters.DNET_SERVICE_CONNECTION_TIMEOUT_DEFAULT_VALUE;
+import static eu.dnetlib.dhp.common.WorkflowRuntimeParameters.DNET_SERVICE_READ_TIMEOUT_DEFAULT_VALUE;
+import static eu.dnetlib.dhp.wf.importer.ImportWorkflowRuntimeParameters.IMPORT_ISLOOKUP_SERVICE_LOCATION;
+import static eu.dnetlib.dhp.wf.importer.ImportWorkflowRuntimeParameters.IMPORT_RESULT_SET_CLIENT_CONNECTION_TIMEOUT;
+import static eu.dnetlib.dhp.wf.importer.ImportWorkflowRuntimeParameters.IMPORT_RESULT_SET_CLIENT_READ_TIMEOUT;
+import static eu.dnetlib.dhp.wf.importer.ImportWorkflowRuntimeParameters.IMPORT_RESULT_SET_PAGESIZE;
+import static eu.dnetlib.dhp.wf.importer.ImportWorkflowRuntimeParameters.RESULTSET_CONNECTION_TIMEOUT_DEFAULT_VALUE;
+import static eu.dnetlib.dhp.wf.importer.ImportWorkflowRuntimeParameters.RESULTSET_PAGESIZE_DEFAULT_VALUE;
+import static eu.dnetlib.dhp.wf.importer.ImportWorkflowRuntimeParameters.RESULTSET_READ_TIMEOUT_DEFAULT_VALUE;
+
+import java.util.Map;
+
+import com.google.common.base.Preconditions;
+
+import eu.dnetlib.dhp.common.WorkflowRuntimeParameters;
+
+/**
+ * WebService Database service facade factory.
+ *
+ * @author mhorst
+ *
+ */
+public class WebServiceISLookupFacadeFactory implements ServiceFacadeFactory {
+
+
+ //------------------------ LOGIC --------------------------
+
+ @Override
+ public ISLookupFacade instantiate(Map parameters) {
+ Preconditions.checkArgument(parameters.containsKey(IMPORT_ISLOOKUP_SERVICE_LOCATION),
+ "unknown ISLookup service location: no parameter provided: '%s'", IMPORT_ISLOOKUP_SERVICE_LOCATION);
+
+ return new WebServiceISLookupFacade(parameters.get(IMPORT_ISLOOKUP_SERVICE_LOCATION),
+ Long.parseLong(WorkflowRuntimeParameters.getParamValue(DNET_SERVICE_CLIENT_READ_TIMEOUT, DNET_SERVICE_READ_TIMEOUT_DEFAULT_VALUE, parameters)),
+ Long.parseLong(WorkflowRuntimeParameters.getParamValue(DNET_SERVICE_CLIENT_CONNECTION_TIMEOUT, DNET_SERVICE_CONNECTION_TIMEOUT_DEFAULT_VALUE, parameters)),
+ Long.parseLong(WorkflowRuntimeParameters.getParamValue(IMPORT_RESULT_SET_CLIENT_READ_TIMEOUT, RESULTSET_READ_TIMEOUT_DEFAULT_VALUE, parameters)),
+ Long.parseLong(WorkflowRuntimeParameters.getParamValue(IMPORT_RESULT_SET_CLIENT_CONNECTION_TIMEOUT, RESULTSET_CONNECTION_TIMEOUT_DEFAULT_VALUE, parameters)),
+ Integer.parseInt(WorkflowRuntimeParameters.getParamValue(IMPORT_RESULT_SET_PAGESIZE, RESULTSET_PAGESIZE_DEFAULT_VALUE, parameters)));
+ }
+
+}
diff --git a/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/WebServiceMDStoreFacade.java b/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/WebServiceMDStoreFacade.java
new file mode 100644
index 0000000000..d37d020ed9
--- /dev/null
+++ b/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/WebServiceMDStoreFacade.java
@@ -0,0 +1,52 @@
+package eu.dnetlib.dhp.wf.importer.facade;
+
+import javax.xml.ws.wsaddressing.W3CEndpointReference;
+
+import eu.dnetlib.data.mdstore.MDStoreService;
+import eu.dnetlib.data.mdstore.MDStoreServiceException;
+import eu.dnetlib.enabling.resultset.client.ResultSetClientFactory;
+import eu.dnetlib.enabling.tools.JaxwsServiceResolverImpl;
+
+/**
+ * WebService based MDStore facade.
+ *
+ * @author mhorst
+ *
+ */
+public class WebServiceMDStoreFacade extends AbstractResultSetAwareWebServiceFacade implements MDStoreFacade {
+
+
+ //------------------------ CONSTRUCTORS -------------------
+
+ /**
+ * @param serviceLocation MDStore webservice location
+ * @param serviceReadTimeout service read timeout
+ * @param serviceConnectionTimeout service connection timeout
+ * @param resultSetReadTimeout resultset read timeout
+ * @param resultSetConnectionTimeout result set connection timeout
+ * @param resultSetPageSize resultset page size
+ */
+ public WebServiceMDStoreFacade(String serviceLocation,
+ long serviceReadTimeout, long serviceConnectionTimeout,
+ long resultSetReadTimeout, long resultSetConnectionTimeout, int resultSetPageSize) {
+ super(MDStoreService.class, serviceLocation,
+ serviceReadTimeout, serviceConnectionTimeout,
+ resultSetReadTimeout, resultSetConnectionTimeout, resultSetPageSize);
+ }
+
+ //------------------------ LOGIC --------------------------
+
+ @Override
+ public Iterable deliverMDRecords(String mdStoreId) throws ServiceFacadeException {
+ try {
+ W3CEndpointReference eprResult = getService().deliverMDRecords(mdStoreId, null, null, null);
+ ResultSetClientFactory rsFactory = new ResultSetClientFactory(
+ getResultSetPageSize(), getResultSetReadTimeout(), getResultSetConnectionTimeout());
+ rsFactory.setServiceResolver(new JaxwsServiceResolverImpl());
+ return rsFactory.getClient(eprResult);
+ } catch (MDStoreServiceException e) {
+ throw new ServiceFacadeException("delivering records for md store " + mdStoreId + " failed!", e);
+ }
+ }
+
+}
diff --git a/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/WebServiceMDStoreFacadeFactory.java b/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/WebServiceMDStoreFacadeFactory.java
new file mode 100644
index 0000000000..00bb0c3f74
--- /dev/null
+++ b/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/WebServiceMDStoreFacadeFactory.java
@@ -0,0 +1,45 @@
+package eu.dnetlib.dhp.wf.importer.facade;
+
+import static eu.dnetlib.dhp.common.WorkflowRuntimeParameters.DNET_SERVICE_CLIENT_CONNECTION_TIMEOUT;
+import static eu.dnetlib.dhp.common.WorkflowRuntimeParameters.DNET_SERVICE_CLIENT_READ_TIMEOUT;
+import static eu.dnetlib.dhp.common.WorkflowRuntimeParameters.DNET_SERVICE_CONNECTION_TIMEOUT_DEFAULT_VALUE;
+import static eu.dnetlib.dhp.common.WorkflowRuntimeParameters.DNET_SERVICE_READ_TIMEOUT_DEFAULT_VALUE;
+import static eu.dnetlib.dhp.wf.importer.ImportWorkflowRuntimeParameters.IMPORT_MDSTORE_SERVICE_LOCATION;
+import static eu.dnetlib.dhp.wf.importer.ImportWorkflowRuntimeParameters.IMPORT_RESULT_SET_CLIENT_CONNECTION_TIMEOUT;
+import static eu.dnetlib.dhp.wf.importer.ImportWorkflowRuntimeParameters.IMPORT_RESULT_SET_CLIENT_READ_TIMEOUT;
+import static eu.dnetlib.dhp.wf.importer.ImportWorkflowRuntimeParameters.IMPORT_RESULT_SET_PAGESIZE;
+import static eu.dnetlib.dhp.wf.importer.ImportWorkflowRuntimeParameters.RESULTSET_CONNECTION_TIMEOUT_DEFAULT_VALUE;
+import static eu.dnetlib.dhp.wf.importer.ImportWorkflowRuntimeParameters.RESULTSET_PAGESIZE_DEFAULT_VALUE;
+import static eu.dnetlib.dhp.wf.importer.ImportWorkflowRuntimeParameters.RESULTSET_READ_TIMEOUT_DEFAULT_VALUE;
+
+import java.util.Map;
+
+import com.google.common.base.Preconditions;
+
+import eu.dnetlib.dhp.common.WorkflowRuntimeParameters;
+
+/**
+ * WebService MDStore service facade factory.
+ *
+ * @author mhorst
+ *
+ */
+public class WebServiceMDStoreFacadeFactory implements ServiceFacadeFactory {
+
+
+ //------------------------ LOGIC --------------------------
+
+ @Override
+ public WebServiceMDStoreFacade instantiate(Map parameters) {
+ Preconditions.checkArgument(parameters.containsKey(IMPORT_MDSTORE_SERVICE_LOCATION),
+ "unknown MDStore service location: no parameter provided: '%s'", IMPORT_MDSTORE_SERVICE_LOCATION);
+
+ return new WebServiceMDStoreFacade(parameters.get(IMPORT_MDSTORE_SERVICE_LOCATION),
+ Long.parseLong(WorkflowRuntimeParameters.getParamValue(DNET_SERVICE_CLIENT_READ_TIMEOUT, DNET_SERVICE_READ_TIMEOUT_DEFAULT_VALUE, parameters)),
+ Long.parseLong(WorkflowRuntimeParameters.getParamValue(DNET_SERVICE_CLIENT_CONNECTION_TIMEOUT, DNET_SERVICE_CONNECTION_TIMEOUT_DEFAULT_VALUE, parameters)),
+ Long.parseLong(WorkflowRuntimeParameters.getParamValue(IMPORT_RESULT_SET_CLIENT_READ_TIMEOUT, RESULTSET_READ_TIMEOUT_DEFAULT_VALUE, parameters)),
+ Long.parseLong(WorkflowRuntimeParameters.getParamValue(IMPORT_RESULT_SET_CLIENT_CONNECTION_TIMEOUT, RESULTSET_CONNECTION_TIMEOUT_DEFAULT_VALUE, parameters)),
+ Integer.parseInt(WorkflowRuntimeParameters.getParamValue(IMPORT_RESULT_SET_PAGESIZE, RESULTSET_PAGESIZE_DEFAULT_VALUE, parameters)));
+ }
+
+}
diff --git a/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/WebServiceObjectStoreFacade.java b/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/WebServiceObjectStoreFacade.java
new file mode 100644
index 0000000000..6e1aee80b5
--- /dev/null
+++ b/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/WebServiceObjectStoreFacade.java
@@ -0,0 +1,52 @@
+package eu.dnetlib.dhp.wf.importer.facade;
+
+import javax.xml.ws.wsaddressing.W3CEndpointReference;
+
+import eu.dnetlib.data.objectstore.rmi.ObjectStoreService;
+import eu.dnetlib.data.objectstore.rmi.ObjectStoreServiceException;
+import eu.dnetlib.enabling.resultset.client.ResultSetClientFactory;
+import eu.dnetlib.enabling.tools.JaxwsServiceResolverImpl;
+
+/**
+ * WebService based ObjectStore facade.
+ *
+ * @author mhorst
+ *
+ */
+public class WebServiceObjectStoreFacade extends AbstractResultSetAwareWebServiceFacade implements ObjectStoreFacade {
+
+
+ //------------------------ CONSTRUCTORS -------------------
+
+ /**
+ * @param serviceLocation ObjectStore webservice location
+ * @param serviceReadTimeout service read timeout
+ * @param serviceConnectionTimeout service connection timeout
+ * @param resultSetReadTimeout resultset read timeout
+ * @param resultSetConnectionTimeout result set connection timeout
+ * @param resultSetPageSize resultset page size
+ */
+ public WebServiceObjectStoreFacade(String serviceLocation,
+ long serviceReadTimeout, long serviceConnectionTimeout,
+ long resultSetReadTimeout, long resultSetConnectionTimeout, int resultSetPageSize) {
+ super(ObjectStoreService.class, serviceLocation,
+ serviceReadTimeout, serviceConnectionTimeout,
+ resultSetReadTimeout, resultSetConnectionTimeout, resultSetPageSize);
+ }
+
+ //------------------------ LOGIC --------------------------
+
+ @Override
+ public Iterable deliverObjects(String objectStoreId, long from, long until) throws ServiceFacadeException {
+ try {
+ W3CEndpointReference eprResult = getService().deliverObjects(objectStoreId, from, until);
+ ResultSetClientFactory rsFactory = new ResultSetClientFactory(
+ getResultSetPageSize(), getResultSetReadTimeout(), getResultSetConnectionTimeout());
+ rsFactory.setServiceResolver(new JaxwsServiceResolverImpl());
+ return rsFactory.getClient(eprResult);
+ } catch (ObjectStoreServiceException e) {
+ throw new ServiceFacadeException("delivering records for object store " + objectStoreId + " failed!", e);
+ }
+ }
+
+}
diff --git a/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/WebServiceObjectStoreFacadeFactory.java b/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/WebServiceObjectStoreFacadeFactory.java
new file mode 100644
index 0000000000..9c77c45464
--- /dev/null
+++ b/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/WebServiceObjectStoreFacadeFactory.java
@@ -0,0 +1,44 @@
+package eu.dnetlib.dhp.wf.importer.facade;
+
+import static eu.dnetlib.dhp.common.WorkflowRuntimeParameters.DNET_SERVICE_CLIENT_CONNECTION_TIMEOUT;
+import static eu.dnetlib.dhp.common.WorkflowRuntimeParameters.DNET_SERVICE_CLIENT_READ_TIMEOUT;
+import static eu.dnetlib.dhp.common.WorkflowRuntimeParameters.DNET_SERVICE_CONNECTION_TIMEOUT_DEFAULT_VALUE;
+import static eu.dnetlib.dhp.common.WorkflowRuntimeParameters.DNET_SERVICE_READ_TIMEOUT_DEFAULT_VALUE;
+import static eu.dnetlib.dhp.wf.importer.ImportWorkflowRuntimeParameters.IMPORT_CONTENT_OBJECT_STORE_LOC;
+import static eu.dnetlib.dhp.wf.importer.ImportWorkflowRuntimeParameters.IMPORT_RESULT_SET_CLIENT_CONNECTION_TIMEOUT;
+import static eu.dnetlib.dhp.wf.importer.ImportWorkflowRuntimeParameters.IMPORT_RESULT_SET_CLIENT_READ_TIMEOUT;
+import static eu.dnetlib.dhp.wf.importer.ImportWorkflowRuntimeParameters.IMPORT_RESULT_SET_PAGESIZE;
+import static eu.dnetlib.dhp.wf.importer.ImportWorkflowRuntimeParameters.RESULTSET_CONNECTION_TIMEOUT_DEFAULT_VALUE;
+import static eu.dnetlib.dhp.wf.importer.ImportWorkflowRuntimeParameters.RESULTSET_PAGESIZE_DEFAULT_VALUE;
+import static eu.dnetlib.dhp.wf.importer.ImportWorkflowRuntimeParameters.RESULTSET_READ_TIMEOUT_DEFAULT_VALUE;
+
+import java.util.Map;
+
+import com.google.common.base.Preconditions;
+
+import eu.dnetlib.dhp.common.WorkflowRuntimeParameters;
+
+/**
+ * WebService ObjectStore facade factory.
+ *
+ * @author mhorst
+ *
+ */
+public class WebServiceObjectStoreFacadeFactory implements ServiceFacadeFactory {
+
+ //------------------------ LOGIC --------------------------
+
+ @Override
+ public WebServiceObjectStoreFacade instantiate(Map parameters) {
+ Preconditions.checkArgument(parameters.containsKey(IMPORT_CONTENT_OBJECT_STORE_LOC),
+ "unknown object store service location: no parameter provided: '%s'", IMPORT_CONTENT_OBJECT_STORE_LOC);
+
+ return new WebServiceObjectStoreFacade(parameters.get(IMPORT_CONTENT_OBJECT_STORE_LOC),
+ Long.parseLong(WorkflowRuntimeParameters.getParamValue(DNET_SERVICE_CLIENT_READ_TIMEOUT, DNET_SERVICE_READ_TIMEOUT_DEFAULT_VALUE, parameters)),
+ Long.parseLong(WorkflowRuntimeParameters.getParamValue(DNET_SERVICE_CLIENT_CONNECTION_TIMEOUT, DNET_SERVICE_CONNECTION_TIMEOUT_DEFAULT_VALUE, parameters)),
+ Long.parseLong(WorkflowRuntimeParameters.getParamValue(IMPORT_RESULT_SET_CLIENT_READ_TIMEOUT, RESULTSET_READ_TIMEOUT_DEFAULT_VALUE, parameters)),
+ Long.parseLong(WorkflowRuntimeParameters.getParamValue(IMPORT_RESULT_SET_CLIENT_CONNECTION_TIMEOUT, RESULTSET_CONNECTION_TIMEOUT_DEFAULT_VALUE, parameters)),
+ Integer.parseInt(WorkflowRuntimeParameters.getParamValue(IMPORT_RESULT_SET_PAGESIZE, RESULTSET_PAGESIZE_DEFAULT_VALUE, parameters)));
+ }
+
+}
diff --git a/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/mdrecord/MDRecordHandler.java b/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/mdrecord/MDRecordHandler.java
new file mode 100644
index 0000000000..5d61f06a5c
--- /dev/null
+++ b/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/mdrecord/MDRecordHandler.java
@@ -0,0 +1,94 @@
+package eu.dnetlib.dhp.wf.importer.mdrecord;
+
+import java.util.Stack;
+
+import org.xml.sax.Attributes;
+import org.xml.sax.SAXException;
+import org.xml.sax.helpers.DefaultHandler;
+
+import eu.dnetlib.dhp.common.InfoSpaceConstants;
+
+/**
+ * MDRecord handler extracting record identifier.
+ *
+ * Notice: writer is not being closed by handler. Created outside, let it be closed outside as well.
+ * @author mhorst
+ *
+ */
+public class MDRecordHandler extends DefaultHandler {
+
+ public static final String ELEM_OBJ_IDENTIFIER = "objIdentifier";
+
+ private static final String ELEM_HEADER = "header";
+
+ private Stack parents;
+
+ private StringBuilder currentValue = new StringBuilder();
+
+ private String recordId;
+
+
+ // ------------------------ LOGIC --------------------------
+
+ @Override
+ public void startDocument() throws SAXException {
+ parents = new Stack();
+ recordId = null;
+ }
+
+ @Override
+ public void startElement(String uri, String localName, String qName,
+ Attributes attributes) throws SAXException {
+ if (this.recordId == null) {
+ if (isWithinElement(localName, ELEM_OBJ_IDENTIFIER, ELEM_HEADER)) {
+// identifierType attribute is mandatory
+ this.currentValue = new StringBuilder();
+ }
+ this.parents.push(localName);
+ }
+ }
+
+ @Override
+ public void endElement(String uri, String localName, String qName)
+ throws SAXException {
+ if (this.recordId == null) {
+ this.parents.pop();
+ if (isWithinElement(localName, ELEM_OBJ_IDENTIFIER, ELEM_HEADER)) {
+ this.recordId = InfoSpaceConstants.ROW_PREFIX_RESULT + this.currentValue.toString().trim();
+ }
+// resetting current value;
+ this.currentValue = null;
+ }
+ }
+
+ @Override
+ public void endDocument() throws SAXException {
+ parents.clear();
+ parents = null;
+ }
+
+ @Override
+ public void characters(char[] ch, int start, int length)
+ throws SAXException {
+ if (this.currentValue!=null) {
+ this.currentValue.append(ch, start, length);
+ }
+ }
+
+ /**
+ * @return record identifier
+ */
+ public String getRecordId() {
+ return recordId;
+ }
+
+ // ------------------------ PRIVATE --------------------------
+
+ private boolean isWithinElement(String localName, String expectedElement, String expectedParent) {
+ return localName.equals(expectedElement) && !this.parents.isEmpty() &&
+ expectedParent.equals(this.parents.peek());
+ }
+
+
+}
+
diff --git a/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/mdrecord/MDStoreRecordsImporter.java b/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/mdrecord/MDStoreRecordsImporter.java
new file mode 100644
index 0000000000..4610938514
--- /dev/null
+++ b/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/mdrecord/MDStoreRecordsImporter.java
@@ -0,0 +1,157 @@
+package eu.dnetlib.dhp.wf.importer.mdrecord;
+
+import java.io.IOException;
+import java.io.StringReader;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Map;
+import javax.xml.parsers.SAXParser;
+import javax.xml.parsers.SAXParserFactory;
+
+import com.google.common.base.Preconditions;
+import eu.dnetlib.dhp.common.WorkflowRuntimeParameters;
+import eu.dnetlib.dhp.common.counter.NamedCounters;
+import eu.dnetlib.dhp.common.counter.NamedCountersFileWriter;
+import eu.dnetlib.dhp.common.java.PortBindings;
+import eu.dnetlib.dhp.common.java.Process;
+import eu.dnetlib.dhp.common.java.io.DataStore;
+import eu.dnetlib.dhp.common.java.io.FileSystemPath;
+import eu.dnetlib.dhp.common.java.porttype.AvroPortType;
+import eu.dnetlib.dhp.common.java.porttype.PortType;
+import eu.dnetlib.dhp.importer.schemas.ImportedRecord;
+import eu.dnetlib.dhp.importer.schemas.RecordFormat;
+import eu.dnetlib.dhp.wf.importer.facade.MDStoreFacade;
+import eu.dnetlib.dhp.wf.importer.facade.ServiceFacadeUtils;
+import org.apache.avro.file.DataFileWriter;
+import org.apache.commons.lang3.StringUtils;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.log4j.Logger;
+import org.xml.sax.InputSource;
+
+import static eu.dnetlib.dhp.common.WorkflowRuntimeParameters.OOZIE_ACTION_OUTPUT_FILENAME;
+import static eu.dnetlib.dhp.wf.importer.ImportWorkflowRuntimeParameters.IMPORT_MDSTORE_IDS_CSV;
+import static eu.dnetlib.dhp.wf.importer.ImportWorkflowRuntimeParameters.IMPORT_MDSTORE_RECORD_MAXLENGTH;
+
+/**
+ * {@link MDStoreFacade} based metadata records importer.
+ * @author mhorst
+ *
+ */
+public class MDStoreRecordsImporter implements Process {
+
+ protected static final String COUNTER_NAME_TOTAL = "TOTAL";
+
+ protected static final String COUNTER_NAME_SIZE_EXCEEDED = "SIZE_EXCEEDED";
+
+ protected static final String PORT_OUT_MDRECORDS = "mdrecords";
+
+ private static final Logger log = Logger.getLogger(MDStoreRecordsImporter.class);
+
+ private final static int progressLogInterval = 100000;
+
+ private final NamedCountersFileWriter countersWriter = new NamedCountersFileWriter();
+
+ private final Map outputPorts = new HashMap();
+
+
+ //------------------------ CONSTRUCTORS -------------------
+
+ public MDStoreRecordsImporter() {
+ outputPorts.put(PORT_OUT_MDRECORDS, new AvroPortType(ImportedRecord.SCHEMA$));
+ }
+
+ //------------------------ LOGIC --------------------------
+
+ @Override
+ public Map getInputPorts() {
+ return Collections.emptyMap();
+ }
+
+ @Override
+ public Map getOutputPorts() {
+ return outputPorts;
+ }
+
+ @Override
+ public void run(PortBindings portBindings, Configuration conf,
+ Map parameters) throws Exception {
+
+ Preconditions.checkArgument(parameters.containsKey(IMPORT_MDSTORE_IDS_CSV),
+ "unknown mdstore identifier, required parameter '%s' is missing!", IMPORT_MDSTORE_IDS_CSV);
+ String mdStoreIdsCSV = parameters.get(IMPORT_MDSTORE_IDS_CSV);
+ int recordMaxLength = parameters.containsKey(IMPORT_MDSTORE_RECORD_MAXLENGTH)?
+ Integer.parseInt(parameters.get(IMPORT_MDSTORE_RECORD_MAXLENGTH)):Integer.MAX_VALUE;
+
+ NamedCounters counters = new NamedCounters(new String[] { COUNTER_NAME_TOTAL, COUNTER_NAME_SIZE_EXCEEDED });
+
+ if (StringUtils.isNotBlank(mdStoreIdsCSV) && !WorkflowRuntimeParameters.UNDEFINED_NONEMPTY_VALUE.equals(mdStoreIdsCSV)) {
+
+ String[] mdStoreIds = StringUtils.split(mdStoreIdsCSV, WorkflowRuntimeParameters.DEFAULT_CSV_DELIMITER);
+
+ try (DataFileWriter recordWriter = getWriter(FileSystem.get(conf), portBindings)) {
+
+ MDStoreFacade mdStoreFacade = ServiceFacadeUtils.instantiate(parameters);
+
+ SAXParserFactory parserFactory = SAXParserFactory.newInstance();
+ parserFactory.setNamespaceAware(true);
+ SAXParser saxParser = parserFactory.newSAXParser();
+ MDRecordHandler mdRecordHandler = new MDRecordHandler();
+
+ long startTime = System.currentTimeMillis();
+ int currentCount = 0;
+
+ for (String mdStoreId : mdStoreIds) {
+ for (String mdRecord : mdStoreFacade.deliverMDRecords(mdStoreId)) {
+ if (!StringUtils.isEmpty(mdRecord)) {
+ if (mdRecord.length() <= recordMaxLength) {
+ saxParser.parse(new InputSource(new StringReader(mdRecord)), mdRecordHandler);
+ String recordId = mdRecordHandler.getRecordId();
+ if (StringUtils.isNotBlank(recordId)) {
+ recordWriter.append(
+ ImportedRecord.newBuilder()
+ .setId(recordId)
+ .setBody(mdRecord)
+ .setFormat(RecordFormat.XML)
+ .build());
+ counters.increment(COUNTER_NAME_TOTAL);
+ } else {
+ log.error("skipping, unable to extract identifier from record: " + mdRecord);
+ }
+ } else {
+ counters.increment(COUNTER_NAME_SIZE_EXCEEDED);
+ log.error("mdstore record maximum length (" + recordMaxLength + "): was exceeded: "
+ + mdRecord.length() + ", record content:\n" + mdRecord);
+ }
+
+ } else {
+ log.error("got empty metadata record from mdstore: " + mdStoreId);
+ }
+ currentCount++;
+ if (currentCount % progressLogInterval == 0) {
+ log.info("current progress: " + currentCount + ", last package of " + progressLogInterval
+ + " processed in " + ((System.currentTimeMillis() - startTime) / 1000) + " secs");
+ startTime = System.currentTimeMillis();
+ }
+ }
+ }
+ log.info("total number of processed records: " + currentCount);
+ }
+ }
+
+ if (counters.currentValue(COUNTER_NAME_TOTAL)==0) {
+ log.warn("parsed 0 metadata records from mdstores: " + mdStoreIdsCSV);
+ }
+ countersWriter.writeCounters(counters, System.getProperty(OOZIE_ACTION_OUTPUT_FILENAME));
+
+ }
+
+ /**
+ * Provides {@link ImportedRecord} writer consuming records.
+ */
+ protected DataFileWriter getWriter(FileSystem fs, PortBindings portBindings) throws IOException {
+ return DataStore.create(
+ new FileSystemPath(fs, portBindings.getOutput().get(PORT_OUT_MDRECORDS)), ImportedRecord.SCHEMA$);
+ }
+
+}
diff --git a/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/mdrecord/MongoRecordImporter.java b/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/mdrecord/MongoRecordImporter.java
new file mode 100644
index 0000000000..4776af22b0
--- /dev/null
+++ b/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/mdrecord/MongoRecordImporter.java
@@ -0,0 +1,48 @@
+package eu.dnetlib.dhp.wf.importer.mdrecord;
+
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Map;
+
+import eu.dnetlib.dhp.common.java.PortBindings;
+import eu.dnetlib.dhp.common.java.Process;
+import eu.dnetlib.dhp.common.java.porttype.PortType;
+import org.apache.hadoop.conf.Configuration;
+
+public class MongoRecordImporter implements Process {
+
+ private final Map outputPorts = new HashMap();
+
+ @Override
+ public Map getInputPorts() {
+ return Collections.emptyMap();
+ }
+
+ @Override
+ public Map getOutputPorts() {
+ return outputPorts;
+ }
+
+ @Override
+ public void run(final PortBindings portBindings, final Configuration conf, final Map parameters) throws Exception {
+
+ /*
+ SparkSession spark = SparkSession.builder()
+ .master("local")
+ .appName("MongoSparkConnectorIntro")
+ .config("spark.mongodb.input.uri", "mongodb://127.0.0.1/test.myCollection")
+ .config("spark.mongodb.output.uri", "mongodb://127.0.0.1/test.myCollection")
+ .getOrCreate();
+
+ // Create a JavaSparkContext using the SparkSession's SparkContext object
+ JavaSparkContext jsc = new JavaSparkContext(spark.sparkContext());
+
+ // More application logic would go here...
+
+ jsc.close();
+ */
+
+ }
+
+
+}
diff --git a/dhp-wf/dhp-wf-import/src/main/resources/eu/dnetlib/dhp/wf/importer/mdrecord/oozie_app/workflow.xml b/dhp-wf/dhp-wf-import/src/main/resources/eu/dnetlib/dhp/wf/importer/mdrecord/oozie_app/workflow.xml
new file mode 100644
index 0000000000..dee61af910
--- /dev/null
+++ b/dhp-wf/dhp-wf-import/src/main/resources/eu/dnetlib/dhp/wf/importer/mdrecord/oozie_app/workflow.xml
@@ -0,0 +1,124 @@
+
+
+
+
+ mdstore_facade_factory_classname
+ eu.dnetlib.dhp.wf.importer.facade.WebServiceMDStoreFacadeFactory
+ ServiceFacadeFactory implementation class name producing eu.dnetlib.dhp.wf.importer.facade.WebServiceMDStoreFacade
+
+
+ mdstore_service_location
+ $UNDEFINED$
+ mdstore service location
+
+
+ mdstore_ids_csv
+ $UNDEFINED$
+ comma separated mdstore identifiers
+
+
+ mdstore_record_maxlength
+ 500000
+ maximum allowed length of mdstore record
+
+
+ output
+ ImportedRecord avro datastore output holding mdrecords
+
+
+ output_report_root_path
+ base directory for storing reports
+
+
+ output_report_relative_path
+ import_mdrecord
+ directory for storing report (relative to output_report_root_path)
+
+
+ dnet_service_client_read_timeout
+ 60000
+ DNet service client reading timeout (expressed in milliseconds)
+
+
+ dnet_service_client_connection_timeout
+ 60000
+ DNet service client connection timeout (expressed in milliseconds)
+
+
+ resultset_client_read_timeout
+ 60000
+ result set client reading timeout (expressed in milliseconds)
+
+
+ resultset_client_connection_timeout
+ 60000
+ result set client connection timeout (expressed in milliseconds)
+
+
+ report_properties_prefix
+ import.mdrecord
+ report entry related to total number of imported records
+
+
+
+
+ ${jobTracker}
+ ${nameNode}
+
+
+ mapreduce.job.queuename
+ ${queueName}
+
+
+ oozie.launcher.mapred.job.queue.name
+ ${oozieLauncherQueueName}
+
+
+
+
+
+
+
+
+
+
+
+
+ eu.dnetlib.dhp.common.java.ProcessWrapper
+ eu.dnetlib.dhp.wf.importer.mdrecord.MDStoreRecordsImporter
+
+ -Pimport.mdstore.service.location=${mdstore_service_location}
+ -Pimport.mdstore.ids.csv=${mdstore_ids_csv}
+ -Pimport.mdstore.record.maxlength=${mdstore_record_maxlength}
+
+ -Pimport.resultset.client.read.timeout=${resultset_client_read_timeout}
+ -Pimport.resultset.client.connection.timeout=${resultset_client_connection_timeout}
+ -Pdnet.service.client.read.timeout=${dnet_service_client_read_timeout}
+ -Pdnet.service.client.connection.timeout=${dnet_service_client_connection_timeout}
+
+ -Pimport.facade.factory.classname=${mdstore_facade_factory_classname}
+ -Omdrecords=${output}
+
+
+
+
+
+
+
+
+ eu.dnetlib.dhp.common.java.ProcessWrapper
+ eu.dnetlib.dhp.common.report.ReportGenerator
+ -Preport.${report_properties_prefix}.total=${wf:actionData('mdrecord-importer')['TOTAL']}
+ -Preport.${report_properties_prefix}.invalid.sizeExceeded=${wf:actionData('mdrecord-importer')['SIZE_EXCEEDED']}
+ -Oreport=${output_report_root_path}/${output_report_relative_path}
+
+
+
+
+
+
+ Unfortunately, the process failed -- error message:
+ [${wf:errorMessage(wf:lastErrorNode())}]
+
+
+
diff --git a/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/DataFileRecordReceiver.class b/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/DataFileRecordReceiver.class
new file mode 100644
index 0000000000..5bd02d3603
Binary files /dev/null and b/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/DataFileRecordReceiver.class differ
diff --git a/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/DataFileRecordReceiverWithCounter.class b/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/DataFileRecordReceiverWithCounter.class
new file mode 100644
index 0000000000..46f329580a
Binary files /dev/null and b/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/DataFileRecordReceiverWithCounter.class differ
diff --git a/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/ImportWorkflowRuntimeParameters.class b/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/ImportWorkflowRuntimeParameters.class
new file mode 100644
index 0000000000..13a46a67ad
Binary files /dev/null and b/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/ImportWorkflowRuntimeParameters.class differ
diff --git a/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/RecordReceiver.class b/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/RecordReceiver.class
new file mode 100644
index 0000000000..fb8394cb5a
Binary files /dev/null and b/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/RecordReceiver.class differ
diff --git a/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/facade/AbstractResultSetAwareWebServiceFacade.class b/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/facade/AbstractResultSetAwareWebServiceFacade.class
new file mode 100644
index 0000000000..abb39ad860
Binary files /dev/null and b/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/facade/AbstractResultSetAwareWebServiceFacade.class differ
diff --git a/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/facade/ISLookupFacade.class b/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/facade/ISLookupFacade.class
new file mode 100644
index 0000000000..7689db9d69
Binary files /dev/null and b/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/facade/ISLookupFacade.class differ
diff --git a/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/facade/MDStoreFacade.class b/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/facade/MDStoreFacade.class
new file mode 100644
index 0000000000..ec0403005b
Binary files /dev/null and b/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/facade/MDStoreFacade.class differ
diff --git a/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/facade/ObjectStoreFacade.class b/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/facade/ObjectStoreFacade.class
new file mode 100644
index 0000000000..e9e7a2cbdf
Binary files /dev/null and b/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/facade/ObjectStoreFacade.class differ
diff --git a/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/facade/ServiceFacadeException.class b/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/facade/ServiceFacadeException.class
new file mode 100644
index 0000000000..03ce390c2d
Binary files /dev/null and b/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/facade/ServiceFacadeException.class differ
diff --git a/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/facade/ServiceFacadeFactory.class b/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/facade/ServiceFacadeFactory.class
new file mode 100644
index 0000000000..18e211bf9c
Binary files /dev/null and b/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/facade/ServiceFacadeFactory.class differ
diff --git a/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/facade/ServiceFacadeUtils.class b/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/facade/ServiceFacadeUtils.class
new file mode 100644
index 0000000000..d7663b0dd8
Binary files /dev/null and b/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/facade/ServiceFacadeUtils.class differ
diff --git a/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/facade/WebServiceISLookupFacade.class b/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/facade/WebServiceISLookupFacade.class
new file mode 100644
index 0000000000..aff40623b0
Binary files /dev/null and b/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/facade/WebServiceISLookupFacade.class differ
diff --git a/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/facade/WebServiceISLookupFacadeFactory.class b/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/facade/WebServiceISLookupFacadeFactory.class
new file mode 100644
index 0000000000..c9ac132382
Binary files /dev/null and b/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/facade/WebServiceISLookupFacadeFactory.class differ
diff --git a/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/facade/WebServiceMDStoreFacade.class b/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/facade/WebServiceMDStoreFacade.class
new file mode 100644
index 0000000000..cbd0403cbe
Binary files /dev/null and b/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/facade/WebServiceMDStoreFacade.class differ
diff --git a/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/facade/WebServiceMDStoreFacadeFactory.class b/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/facade/WebServiceMDStoreFacadeFactory.class
new file mode 100644
index 0000000000..b7c9aaee6c
Binary files /dev/null and b/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/facade/WebServiceMDStoreFacadeFactory.class differ
diff --git a/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/facade/WebServiceObjectStoreFacade.class b/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/facade/WebServiceObjectStoreFacade.class
new file mode 100644
index 0000000000..8ffb1d81bb
Binary files /dev/null and b/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/facade/WebServiceObjectStoreFacade.class differ
diff --git a/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/facade/WebServiceObjectStoreFacadeFactory.class b/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/facade/WebServiceObjectStoreFacadeFactory.class
new file mode 100644
index 0000000000..b6a1ceca93
Binary files /dev/null and b/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/facade/WebServiceObjectStoreFacadeFactory.class differ
diff --git a/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/mdrecord/MDRecordHandler.class b/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/mdrecord/MDRecordHandler.class
new file mode 100644
index 0000000000..092c87c701
Binary files /dev/null and b/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/mdrecord/MDRecordHandler.class differ
diff --git a/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/mdrecord/MDStoreRecordsImporter.class b/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/mdrecord/MDStoreRecordsImporter.class
new file mode 100644
index 0000000000..254a7f6478
Binary files /dev/null and b/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/mdrecord/MDStoreRecordsImporter.class differ
diff --git a/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/mdrecord/MongoRecordImporter.class b/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/mdrecord/MongoRecordImporter.class
new file mode 100644
index 0000000000..5079c03852
Binary files /dev/null and b/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/mdrecord/MongoRecordImporter.class differ
diff --git a/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/mdrecord/oozie_app/workflow.xml b/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/mdrecord/oozie_app/workflow.xml
new file mode 100644
index 0000000000..dee61af910
--- /dev/null
+++ b/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/mdrecord/oozie_app/workflow.xml
@@ -0,0 +1,124 @@
+
+
+
+
+ mdstore_facade_factory_classname
+ eu.dnetlib.dhp.wf.importer.facade.WebServiceMDStoreFacadeFactory
+ ServiceFacadeFactory implementation class name producing eu.dnetlib.dhp.wf.importer.facade.WebServiceMDStoreFacade
+
+
+ mdstore_service_location
+ $UNDEFINED$
+ mdstore service location
+
+
+ mdstore_ids_csv
+ $UNDEFINED$
+ comma separated mdstore identifiers
+
+
+ mdstore_record_maxlength
+ 500000
+ maximum allowed length of mdstore record
+
+
+ output
+ ImportedRecord avro datastore output holding mdrecords
+
+
+ output_report_root_path
+ base directory for storing reports
+
+
+ output_report_relative_path
+ import_mdrecord
+ directory for storing report (relative to output_report_root_path)
+
+
+ dnet_service_client_read_timeout
+ 60000
+ DNet service client reading timeout (expressed in milliseconds)
+
+
+ dnet_service_client_connection_timeout
+ 60000
+ DNet service client connection timeout (expressed in milliseconds)
+
+
+ resultset_client_read_timeout
+ 60000
+ result set client reading timeout (expressed in milliseconds)
+
+
+ resultset_client_connection_timeout
+ 60000
+ result set client connection timeout (expressed in milliseconds)
+
+
+ report_properties_prefix
+ import.mdrecord
+ report entry related to total number of imported records
+
+
+
+
+ ${jobTracker}
+ ${nameNode}
+
+
+ mapreduce.job.queuename
+ ${queueName}
+
+
+ oozie.launcher.mapred.job.queue.name
+ ${oozieLauncherQueueName}
+
+
+
+
+
+
+
+
+
+
+
+
+ eu.dnetlib.dhp.common.java.ProcessWrapper
+ eu.dnetlib.dhp.wf.importer.mdrecord.MDStoreRecordsImporter
+
+ -Pimport.mdstore.service.location=${mdstore_service_location}
+ -Pimport.mdstore.ids.csv=${mdstore_ids_csv}
+ -Pimport.mdstore.record.maxlength=${mdstore_record_maxlength}
+
+ -Pimport.resultset.client.read.timeout=${resultset_client_read_timeout}
+ -Pimport.resultset.client.connection.timeout=${resultset_client_connection_timeout}
+ -Pdnet.service.client.read.timeout=${dnet_service_client_read_timeout}
+ -Pdnet.service.client.connection.timeout=${dnet_service_client_connection_timeout}
+
+ -Pimport.facade.factory.classname=${mdstore_facade_factory_classname}
+ -Omdrecords=${output}
+
+
+
+
+
+
+
+
+ eu.dnetlib.dhp.common.java.ProcessWrapper
+ eu.dnetlib.dhp.common.report.ReportGenerator
+ -Preport.${report_properties_prefix}.total=${wf:actionData('mdrecord-importer')['TOTAL']}
+ -Preport.${report_properties_prefix}.invalid.sizeExceeded=${wf:actionData('mdrecord-importer')['SIZE_EXCEEDED']}
+ -Oreport=${output_report_root_path}/${output_report_relative_path}
+
+
+
+
+
+
+ Unfortunately, the process failed -- error message:
+ [${wf:errorMessage(wf:lastErrorNode())}]
+
+
+
diff --git a/dhp-wf/dhp-wf-import/target/maven-status/maven-compiler-plugin/compile/default-compile/createdFiles.lst b/dhp-wf/dhp-wf-import/target/maven-status/maven-compiler-plugin/compile/default-compile/createdFiles.lst
new file mode 100644
index 0000000000..672248f223
--- /dev/null
+++ b/dhp-wf/dhp-wf-import/target/maven-status/maven-compiler-plugin/compile/default-compile/createdFiles.lst
@@ -0,0 +1,20 @@
+eu/dnetlib/dhp/wf/importer/RecordReceiver.class
+eu/dnetlib/dhp/wf/importer/facade/ObjectStoreFacade.class
+eu/dnetlib/dhp/wf/importer/facade/WebServiceISLookupFacade.class
+eu/dnetlib/dhp/wf/importer/facade/WebServiceMDStoreFacade.class
+eu/dnetlib/dhp/wf/importer/facade/ServiceFacadeFactory.class
+eu/dnetlib/dhp/wf/importer/facade/MDStoreFacade.class
+eu/dnetlib/dhp/wf/importer/facade/AbstractResultSetAwareWebServiceFacade.class
+eu/dnetlib/dhp/wf/importer/mdrecord/MDStoreRecordsImporter.class
+eu/dnetlib/dhp/wf/importer/DataFileRecordReceiver.class
+eu/dnetlib/dhp/wf/importer/mdrecord/MongoRecordImporter.class
+eu/dnetlib/dhp/wf/importer/facade/ServiceFacadeUtils.class
+eu/dnetlib/dhp/wf/importer/facade/ServiceFacadeException.class
+eu/dnetlib/dhp/wf/importer/DataFileRecordReceiverWithCounter.class
+eu/dnetlib/dhp/wf/importer/facade/ISLookupFacade.class
+eu/dnetlib/dhp/wf/importer/facade/WebServiceObjectStoreFacadeFactory.class
+eu/dnetlib/dhp/wf/importer/mdrecord/MDRecordHandler.class
+eu/dnetlib/dhp/wf/importer/facade/WebServiceObjectStoreFacade.class
+eu/dnetlib/dhp/wf/importer/facade/WebServiceISLookupFacadeFactory.class
+eu/dnetlib/dhp/wf/importer/ImportWorkflowRuntimeParameters.class
+eu/dnetlib/dhp/wf/importer/facade/WebServiceMDStoreFacadeFactory.class
diff --git a/dhp-wf/dhp-wf-import/target/maven-status/maven-compiler-plugin/compile/default-compile/inputFiles.lst b/dhp-wf/dhp-wf-import/target/maven-status/maven-compiler-plugin/compile/default-compile/inputFiles.lst
new file mode 100644
index 0000000000..e9820d1d98
--- /dev/null
+++ b/dhp-wf/dhp-wf-import/target/maven-status/maven-compiler-plugin/compile/default-compile/inputFiles.lst
@@ -0,0 +1,20 @@
+/Users/claudio/workspace/dnet-hadoop/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/ISLookupFacade.java
+/Users/claudio/workspace/dnet-hadoop/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/RecordReceiver.java
+/Users/claudio/workspace/dnet-hadoop/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/mdrecord/MDRecordHandler.java
+/Users/claudio/workspace/dnet-hadoop/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/mdrecord/MDStoreRecordsImporter.java
+/Users/claudio/workspace/dnet-hadoop/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/WebServiceISLookupFacade.java
+/Users/claudio/workspace/dnet-hadoop/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/WebServiceMDStoreFacade.java
+/Users/claudio/workspace/dnet-hadoop/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/MDStoreFacade.java
+/Users/claudio/workspace/dnet-hadoop/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/DataFileRecordReceiver.java
+/Users/claudio/workspace/dnet-hadoop/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/ServiceFacadeException.java
+/Users/claudio/workspace/dnet-hadoop/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/AbstractResultSetAwareWebServiceFacade.java
+/Users/claudio/workspace/dnet-hadoop/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/DataFileRecordReceiverWithCounter.java
+/Users/claudio/workspace/dnet-hadoop/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/WebServiceMDStoreFacadeFactory.java
+/Users/claudio/workspace/dnet-hadoop/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/ObjectStoreFacade.java
+/Users/claudio/workspace/dnet-hadoop/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/WebServiceObjectStoreFacadeFactory.java
+/Users/claudio/workspace/dnet-hadoop/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/WebServiceObjectStoreFacade.java
+/Users/claudio/workspace/dnet-hadoop/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/ImportWorkflowRuntimeParameters.java
+/Users/claudio/workspace/dnet-hadoop/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/ServiceFacadeUtils.java
+/Users/claudio/workspace/dnet-hadoop/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/mdrecord/MongoRecordImporter.java
+/Users/claudio/workspace/dnet-hadoop/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/WebServiceISLookupFacadeFactory.java
+/Users/claudio/workspace/dnet-hadoop/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/ServiceFacadeFactory.java
diff --git a/dhp-wf/pom.xml b/dhp-wf/pom.xml
new file mode 100644
index 0000000000..4c0aa666ac
--- /dev/null
+++ b/dhp-wf/pom.xml
@@ -0,0 +1,249 @@
+
+
+ 4.0.0
+
+
+ eu.dnetlib.dhp
+ dhp
+ 1.0.0-SNAPSHOT
+
+
+ dhp-wf
+ pom
+
+
+ dhp-wf-import
+
+
+
+ yyyy-MM-dd_HH_mm
+
+
+ oozie-package
+
+ src/test/resources/define/path/pointing/to/directory/holding/oozie_app
+ oozie_app
+ default
+ default
+ default
+ primed
+
+ runtime
+
+ true
+
+ ${user.home}/.dhp/application.properties
+
+ ${maven.build.timestamp}
+
+ ${project.version}
+ true
+
+
+
+
+
+
+ org.apache.oozie
+ oozie-client
+
+
+ net.schmizz
+ sshj
+ test
+
+
+
+
+
+ oozie-package
+
+
+
+ org.apache.maven.plugins
+ maven-enforcer-plugin
+ 1.4.1
+
+
+ enforce-connection-properties-file-existence
+ initialize
+
+ enforce
+
+
+
+
+
+ ${dhpConnectionProperties}
+
+
+ The file with connection properties could not be found. Please, create the ${dhpConnectionProperties} file or set the location to another already created file by using
+ -DdhpConnectionProperties property.
+
+
+
+ true
+
+
+
+
+
+ org.apache.maven.plugins
+ maven-dependency-plugin
+
+
+ copy dependencies
+ prepare-package
+
+ copy-dependencies
+
+
+ ${oozie.package.dependencies.include.scope}
+ ${oozie.package.dependencies.exclude.scope}
+ true
+
+
+
+
+
+
+ org.apache.maven.plugins
+ maven-jar-plugin
+
+
+ attach-test-resources-package
+ prepare-package
+
+ test-jar
+
+
+ ${oozie.package.skip.test.jar}
+
+
+
+
+
+
+
+ pl.project13.maven
+ git-commit-id-plugin
+ 2.1.11
+
+
+
+ revision
+
+
+
+
+ true
+ yyyy-MM-dd'T'HH:mm:ssZ
+ true
+ target/${oozie.package.file.name}/${oozieAppDir}/version.properties
+
+
+
+ org.apache.maven.plugins
+ maven-assembly-plugin
+ 3.0.0
+
+
+ assembly-oozie-installer
+ package
+
+ single
+
+
+ false
+ ${oozie.package.file.name}_shell_scripts
+
+ oozie-installer
+
+
+
+
+
+
+
+
+
+ maven-antrun-plugin
+
+
+
+ installer-copy-custom
+ process-resources
+
+ run
+
+
+
+
+
+
+
+
+
+
+ package
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ run
+
+
+
+
+
+
+
+
+
+
+
diff --git a/pom.xml b/pom.xml
new file mode 100644
index 0000000000..8861cbe25f
--- /dev/null
+++ b/pom.xml
@@ -0,0 +1,953 @@
+
+
+ 4.0.0
+
+ eu.dnetlib.dhp
+ dhp
+ 1.0.0-SNAPSHOT
+ pom
+
+ http://www.d-net.research-infrastructures.eu
+
+
+
+ The Apache Software License, Version 2.0
+ http://www.apache.org/licenses/LICENSE-2.0.txt
+ repo
+ A business-friendly OSS license
+
+
+
+
+ dhp-build
+ dhp-common
+ dhp-schemas
+ dhp-wf
+
+
+
+ Redmine
+ https://issue.openaire.research-infrastructures.eu/projects/openaire
+
+
+
+ jenkins
+ https://jenkins-dnet.d4science.org/
+
+
+
+ scm:git:ssh://git@github.com/???
+ scm:git:ssh://git@github.com/???.git
+ https://github.com/???
+ HEAD
+
+
+
+
+
+
+
+
+ dnet45-releases
+ D-Net 45 releases
+ http://maven.research-infrastructures.eu/nexus/content/repositories/dnet45-releases
+ default
+
+ false
+
+
+ true
+
+
+
+ dnet45-bootstrap-release
+ dnet45 bootstrap release
+ http://maven.research-infrastructures.eu/nexus/content/repositories/dnet45-bootstrap-release
+ default
+
+ false
+
+
+ true
+
+
+
+
+ cloudera
+ Cloudera Repository
+ https://repository.cloudera.com/artifactory/cloudera-repos
+
+ true
+
+
+ false
+
+
+
+
+
+
+
+ org.slf4j
+ slf4j-api
+ 1.7.22
+
+
+ org.slf4j
+ slf4j-log4j12
+ 1.7.22
+
+
+ log4j
+ log4j
+ 1.2.17
+
+
+ javax.servlet
+ javax.servlet-api
+ 3.1.0
+ runtime
+
+
+ junit
+ junit
+ 4.12
+ test
+
+
+ org.hamcrest
+ hamcrest-all
+ 1.3
+ test
+
+
+ org.mockito
+ mockito-all
+ 1.10.19
+ test
+
+
+ org.powermock
+ powermock-core
+ 1.6.6
+ test
+
+
+ com.google.code.findbugs
+ annotations
+ 3.0.1
+ provided
+
+
+ com.google.code.findbugs
+ jsr305
+ 3.0.1
+ provided
+
+
+
+
+
+
+
+ org.apache.hadoop
+ hadoop-common
+ ${dhp.hadoop.version}
+ provided
+
+
+ servlet-api
+ javax.servlet
+
+
+
+
+
+ org.apache.hadoop
+ hadoop-yarn-common
+ ${dhp.hadoop.version}
+ provided
+
+
+ org.apache.hadoop
+ hadoop-mapreduce-client-common
+ ${dhp.hadoop.version}
+ provided
+
+
+
+ org.apache.hadoop
+ hadoop-yarn-common
+ ${dhp.hadoop.version}
+ test
+ test-jar
+
+
+
+ org.apache.hadoop
+ hadoop-mapreduce-client-common
+ ${dhp.hadoop.version}
+ test
+ test-jar
+
+
+
+ org.apache.hadoop
+ hadoop-mapreduce-client-app
+ ${dhp.hadoop.version}
+ provided
+
+
+ servlet-api
+ javax.servlet
+
+
+
+
+
+ org.apache.hadoop
+ hadoop-mapreduce-client-core
+ ${dhp.hadoop.version}
+ provided
+
+
+ servlet-api
+ javax.servlet
+
+
+
+ org.apache.calcite
+ calcite-core
+
+
+ org.apache.calcite
+ calcite-avatica
+
+
+
+
+
+ org.apache.hadoop
+ hadoop-hdfs
+ ${dhp.hadoop.version}
+
+
+
+ org.apache.hadoop
+ hadoop-hdfs
+ ${dhp.hadoop.version}
+ test-jar
+ test
+
+
+
+
+
+ org.apache.oozie
+ oozie-core
+ ${dhp.oozie.version}
+
+
+
+ servlet-api
+ javax.servlet
+
+
+
+ org.apache.calcite
+ calcite-core
+
+
+ org.apache.calcite
+ calcite-avatica
+
+
+
+
+
+ org.apache.oozie
+ oozie-client
+ ${dhp.oozie.version}
+
+
+
+ slf4j-simple
+ org.slf4j
+
+
+
+
+
+ org.apache.spark
+ spark-core_2.10
+ ${dhp.spark.version}
+ provided
+
+
+
+ org.apache.spark
+ spark-sql_2.10
+ ${dhp.spark.version}
+ provided
+
+
+
+ com.databricks
+ spark-avro_2.10
+ 1.1.0-${dhp.cdh.version}
+
+
+
+ com.databricks
+ spark-csv_2.10
+ 1.5.0
+
+
+
+ pl.edu.icm.spark-utils
+ spark-utils
+ 1.0.0
+
+
+
+ org.mongodb.spark
+ mongo-spark-connector_2.10
+ 2.2.1
+ provided
+
+
+
+
+
+ org.apache.avro
+ avro
+ ${dhp.avro.version}
+
+
+
+ org.apache.avro
+ avro-mapred
+ ${dhp.avro.version}
+ hadoop2
+
+
+
+ servlet-api
+ org.mortbay.jetty
+
+
+ netty
+ io.netty
+
+
+
+
+
+
+
+ org.apache.pig
+ pig
+ ${dhp.pig.version}
+ provided
+
+
+ org.mortbay.jetty
+ servlet-api-2.5
+
+
+ servlet-api
+ javax.servlet
+
+
+
+
+
+ org.apache.pig
+ piggybank
+ ${dhp.pig.version}
+ provided
+
+
+
+ org.apache.pig
+ pigunit
+ ${dhp.pig.version}
+
+
+
+
+ pl.edu.icm.cermine
+ cermine-impl
+ 1.13
+
+
+
+
+ pl.edu.icm.coansys
+ models
+ ${dhp.coansys.version}
+
+
+
+ pl.edu.icm.coansys
+ citation-matching-core-code
+ ${dhp.coansys.version}
+
+
+
+ org.apache.avro
+ avro-mapred
+
+
+ org.apache.hadoop
+ hadoop-client
+
+
+ org.apache.hadoop
+ hadoop-yarn-api
+
+
+ org.apache.hadoop
+ hadoop-yarn-client
+
+
+ org.apache.hadoop
+ hadoop-mapreduce-client-common
+
+
+ org.apache.hadoop
+ hadoop-mapreduce-client-app
+
+
+ org.apache.hadoop
+ hadoop-mapreduce-client-core
+
+
+
+ org.slf4j
+ slf4j-simple
+
+
+
+
+
+
+
+ eu.dnetlib
+ dnet-openaireplus-mapping-utils
+ [6.0.0, 7.0.0)
+
+
+
+ org.apache.hadoop
+ hadoop-core
+
+
+ org.apache.hadoop
+ hadoop-hdfs
+
+
+ org.apache.zookeeper
+ zookeeper
+
+
+ jgrapht
+ jgrapht
+
+
+
+
+
+ eu.dnetlib
+ dnet-objectstore-rmi
+ [2.0.0, 3.0.0)
+
+
+
+ eu.dnetlib
+ cnr-rmi-api
+ [2.0.0, 3.0.0)
+
+
+
+ eu.dnetlib
+ cnr-resultset-client
+ [2.0.0, 3.0.0)
+
+
+
+ org.springframework
+ spring-web
+
+
+ org.springframework
+ spring-webmvc
+
+
+
+
+
+ eu.dnetlib
+ dnet-actionmanager-common
+ [6.0.0, 7.0.0)
+
+
+
+ apache
+ commons-logging
+
+
+
+
+
+ eu.dnetlib
+ cnr-service-utils
+ [1.0.0, 2.0.0)
+
+
+
+
+ com.beust
+ jcommander
+ 1.60
+
+
+
+ com.google.code.gson
+ gson
+ 2.8.0
+
+
+
+ com.googlecode.json-simple
+ json-simple
+ 1.1.1
+
+
+
+ org.apache.commons
+ commons-lang3
+ 3.5
+
+
+
+ org.apache.commons
+ commons-collections4
+ 4.1
+
+
+
+ com.thoughtworks.xstream
+ xstream
+ 1.4.9
+
+
+
+ xalan
+ xalan
+ 2.7.2
+
+
+
+ xml-apis
+ xml-apis
+ 1.4.01
+
+
+
+ org.jdom
+ jdom
+ 1.1.3
+
+
+
+ org.jsoup
+ jsoup
+ 1.10.2
+
+
+
+ net.sf.opencsv
+ opencsv
+ 2.3
+
+
+
+ com.googlecode.protobuf-java-format
+ protobuf-java-format
+ 1.2
+
+
+
+ com.google.protobuf
+ protobuf-java
+ 2.5.0
+
+
+
+
+ com.google.guava
+ guava
+ 12.0
+
+
+
+ commons-cli
+ commons-cli
+ 1.3.1
+
+
+
+ commons-io
+ commons-io
+ 2.5
+
+
+
+ de.sven-jacobs
+ loremipsum
+ 1.0
+
+
+
+ net.schmizz
+ sshj
+ 0.10.0
+
+
+
+
+ org.bouncycastle
+ bcprov-jdk15on
+ 1.50
+
+
+
+
+ com.thoughtworks.paranamer
+ paranamer
+ 2.8
+
+
+
+
+ com.linkedin.datafu
+ datafu
+ 1.2.0
+
+
+
+ org.apache.cxf
+ cxf-rt-frontend-jaxws
+ ${cxf.version}
+
+
+
+
+ com.sun.xml.bind
+ jaxb-impl
+ 2.2.7
+ runtime
+
+
+
+ org.springframework
+ spring-beans
+ ${spring.version}
+
+
+
+ org.springframework
+ spring-context
+ ${spring.version}
+
+
+
+ org.scala-lang
+ scala-library
+ ${scala.version}
+
+
+
+ commons-beanutils
+ commons-beanutils
+ 1.9.3
+
+
+
+ org.apache.curator
+ curator-test
+ 3.3.0
+ test
+
+
+
+
+
+
+
+ target
+ target/classes
+ ${project.artifactId}-${project.version}
+ target/test-classes
+
+
+
+
+
+ org.apache.maven.plugins
+ maven-compiler-plugin
+ 3.6.0
+
+
+ 1.8
+ ${project.build.sourceEncoding}
+
+
+
+
+ org.apache.maven.plugins
+ maven-jar-plugin
+ 3.0.2
+
+
+
+ org.apache.maven.plugins
+ maven-source-plugin
+ 3.0.1
+
+
+ attach-sources
+ verify
+
+ jar-no-fork
+
+
+
+
+
+
+ org.apache.maven.plugins
+ maven-surefire-plugin
+ 2.19.1
+
+ true
+
+
+
+
+ org.apache.maven.plugins
+ maven-javadoc-plugin
+ 2.10.4
+
+ true
+
+
+
+
+ org.apache.maven.plugins
+ maven-dependency-plugin
+ 3.0.0
+
+
+
+ net.alchim31.maven
+ scala-maven-plugin
+ 3.2.2
+
+
+ scala-compile-first
+ process-resources
+
+ add-source
+ compile
+
+
+
+ scala-test-compile
+ process-test-resources
+
+ testCompile
+
+
+
+
+
+
+
+ org.apache.avro
+ avro-maven-plugin
+ 1.7.7
+
+
+
+ org.codehaus.mojo
+ build-helper-maven-plugin
+ 1.12
+
+
+
+
+ org.eclipse.m2e
+ lifecycle-mapping
+ 1.0.0
+
+
+
+
+
+
+ org.apache.avro
+
+
+ avro-maven-plugin
+
+
+ [1.7.4,)
+
+
+ idl-protocol
+ schema
+
+
+
+
+
+
+
+
+
+ org.codehaus.mojo
+
+
+ build-helper-maven-plugin
+
+
+ [1.7,)
+
+
+ add-source
+
+
+
+
+
+
+
+
+
+ org.apache.maven.plugins
+
+
+ maven-plugin-plugin
+
+
+ [3.2,)
+
+
+ descriptor
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ org.apache.maven.plugins
+ maven-release-plugin
+ 2.5.3
+
+
+
+ org.jacoco
+ jacoco-maven-plugin
+ 0.7.9
+
+
+ **/schemas/*
+ **/com/cloudera/**/*
+ **/org/apache/avro/io/**/*
+
+
+
+
+ default-prepare-agent
+
+ prepare-agent
+
+
+
+ default-report
+ prepare-package
+
+ report
+
+
+
+
+
+
+
+
+ org.apache.maven.wagon
+ wagon-ssh
+ 2.10
+
+
+
+
+
+
+
+
+ dnet45-snapshots
+ DNet45 Snapshots
+ http://maven.research-infrastructures.eu/nexus/content/repositories/dnet45-snapshots
+ default
+
+
+ dnet45-releases
+ http://maven.research-infrastructures.eu/nexus/content/repositories/dnet45-releases
+
+
+
+
+
+
+ org.apache.maven.plugins
+ maven-javadoc-plugin
+ 2.10.4
+
+ true
+
+
+
+
+
+
+ UTF-8
+ UTF-8
+ [4.2.5.RELEASE]
+ [3.1.5]
+
+ cdh5.9.0
+
+ 4.1.0-${dhp.cdh.version}
+ 0.12.0-${dhp.cdh.version}
+ 1.7.6-${dhp.cdh.version}
+ 2.6.0-${dhp.cdh.version}
+ 1.6.0-${dhp.cdh.version}
+ 2.10.6
+
+
+