diff --git a/README.md b/README.md new file mode 100644 index 0000000000..1a13ebf052 --- /dev/null +++ b/README.md @@ -0,0 +1 @@ +# dnet-hadoop diff --git a/dhp-build/dhp-build-assembly-resources/README.markdown b/dhp-build/dhp-build-assembly-resources/README.markdown new file mode 100644 index 0000000000..efee5fa457 --- /dev/null +++ b/dhp-build/dhp-build-assembly-resources/README.markdown @@ -0,0 +1,7 @@ +Module utilized by `dhp-wf`. + +Contains all required resources by this parent module: + +* assembly XML definitions +* build shell scripts +* oozie package commands for uploading, running and monitoring oozie workflows diff --git a/dhp-build/dhp-build-assembly-resources/pom.xml b/dhp-build/dhp-build-assembly-resources/pom.xml new file mode 100644 index 0000000000..2d2543505f --- /dev/null +++ b/dhp-build/dhp-build-assembly-resources/pom.xml @@ -0,0 +1,24 @@ + + + + 4.0.0 + + + eu.dnetlib.dhp + dhp-build + 1.0.0-SNAPSHOT + + + dhp-build-assembly-resources + jar + + + + + org.apache.maven.plugins + maven-compiler-plugin + + + + + diff --git a/dhp-build/dhp-build-assembly-resources/src/main/resources/assemblies/oozie-installer.xml b/dhp-build/dhp-build-assembly-resources/src/main/resources/assemblies/oozie-installer.xml new file mode 100644 index 0000000000..1419c5b1c3 --- /dev/null +++ b/dhp-build/dhp-build-assembly-resources/src/main/resources/assemblies/oozie-installer.xml @@ -0,0 +1,32 @@ + + + oozie-installer + + dir + + + + + true + ${project.build.directory}/assembly-resources/commands + + / + + **/* + + 0755 + unix + + + / + diff --git a/dhp-build/dhp-build-assembly-resources/src/main/resources/assemblies/tests.xml b/dhp-build/dhp-build-assembly-resources/src/main/resources/assemblies/tests.xml new file mode 100644 index 0000000000..bf679e6529 --- /dev/null +++ b/dhp-build/dhp-build-assembly-resources/src/main/resources/assemblies/tests.xml @@ -0,0 +1,24 @@ + + + tests + + jar + + false + + + ${project.build.testOutputDirectory} + + + + + + \ No newline at end of file diff --git a/dhp-build/dhp-build-assembly-resources/src/main/resources/commands/get_working_dir.sh b/dhp-build/dhp-build-assembly-resources/src/main/resources/commands/get_working_dir.sh new file mode 100644 index 0000000000..e9d55f0d7e --- /dev/null +++ b/dhp-build/dhp-build-assembly-resources/src/main/resources/commands/get_working_dir.sh @@ -0,0 +1,3 @@ +#!/bin/bash +hadoop fs -get ${workingDir} + diff --git a/dhp-build/dhp-build-assembly-resources/src/main/resources/commands/print_working_dir.sh b/dhp-build/dhp-build-assembly-resources/src/main/resources/commands/print_working_dir.sh new file mode 100644 index 0000000000..c79839ea49 --- /dev/null +++ b/dhp-build/dhp-build-assembly-resources/src/main/resources/commands/print_working_dir.sh @@ -0,0 +1,5 @@ +#!/bin/bash +echo "" +echo "---->Contents of the working directory" +hadoop fs -ls ${workingDir} + diff --git a/dhp-build/dhp-build-assembly-resources/src/main/resources/commands/readme.markdown b/dhp-build/dhp-build-assembly-resources/src/main/resources/commands/readme.markdown new file mode 100644 index 0000000000..3e049c18b7 --- /dev/null +++ b/dhp-build/dhp-build-assembly-resources/src/main/resources/commands/readme.markdown @@ -0,0 +1,5 @@ +Execute the scripts in the following order: + +1. `upload_workflow.sh` +2. `run_workflow.sh` +3. `print_working_dir.sh` or `get_working_dir.sh` diff --git a/dhp-build/dhp-build-assembly-resources/src/main/resources/commands/run_workflow.sh b/dhp-build/dhp-build-assembly-resources/src/main/resources/commands/run_workflow.sh new file mode 100644 index 0000000000..fee3d77370 --- /dev/null +++ b/dhp-build/dhp-build-assembly-resources/src/main/resources/commands/run_workflow.sh @@ -0,0 +1,10 @@ +#!/bin/bash + +if [ $# = 0 ] ; then + oozie job -oozie ${oozieServiceLoc} -config job.properties -run +else + oozie job -oozie ${oozieServiceLoc} -config $1/job.properties -run +fi + + + diff --git a/dhp-build/dhp-build-assembly-resources/src/main/resources/commands/upload_workflow.sh b/dhp-build/dhp-build-assembly-resources/src/main/resources/commands/upload_workflow.sh new file mode 100644 index 0000000000..c5d299c2f0 --- /dev/null +++ b/dhp-build/dhp-build-assembly-resources/src/main/resources/commands/upload_workflow.sh @@ -0,0 +1,34 @@ +#!/bin/bash +exec 3>&1 +BASH_XTRACEFD=3 +set -x ## print every executed command + + +if [ $# = 0 ] ; then + target_dir_root=`pwd`'/${oozieAppDir}' +else + target_dir_root=`readlink -f $1`'/${oozieAppDir}' +fi + +# initial phase, creating symbolic links to jars in all subworkflows +# currently disabled +#libDir=$target_dir_root'/lib' +#dirs=`find $target_dir_root/* -maxdepth 10 -type d` +#for dir in $dirs +#do +# if [ -f $dir/workflow.xml ] +# then +# echo "creating symbolic links to jars in directory: $dir/lib" +# if [ ! -d "$dir/lib" ]; then +# mkdir $dir/lib +# fi +# find $libDir -type f -exec ln -s \{\} $dir/lib \; +# fi +#done + + +#uploading +hadoop fs -rm -r ${sandboxDir} +hadoop fs -mkdir -p ${sandboxDir} +hadoop fs -mkdir -p ${workingDir} +hadoop fs -put $target_dir_root ${sandboxDir} diff --git a/dhp-build/dhp-build-assembly-resources/src/main/resources/project-default.properties b/dhp-build/dhp-build-assembly-resources/src/main/resources/project-default.properties new file mode 100644 index 0000000000..021ecf55be --- /dev/null +++ b/dhp-build/dhp-build-assembly-resources/src/main/resources/project-default.properties @@ -0,0 +1,7 @@ +#sandboxName when not provided explicitly will be generated +sandboxName=${sandboxName} +sandboxDir=/user/${iis.hadoop.frontend.user.name}/${sandboxName} +workingDir=${sandboxDir}/working_dir +oozie.wf.application.path = ${nameNode}${sandboxDir}/${oozieAppDir} +oozieTopWfApplicationPath = ${oozie.wf.application.path} + diff --git a/dhp-build/dhp-build-assembly-resources/target/classes/assemblies/oozie-installer.xml b/dhp-build/dhp-build-assembly-resources/target/classes/assemblies/oozie-installer.xml new file mode 100644 index 0000000000..1419c5b1c3 --- /dev/null +++ b/dhp-build/dhp-build-assembly-resources/target/classes/assemblies/oozie-installer.xml @@ -0,0 +1,32 @@ + + + oozie-installer + + dir + + + + + true + ${project.build.directory}/assembly-resources/commands + + / + + **/* + + 0755 + unix + + + / + diff --git a/dhp-build/dhp-build-assembly-resources/target/classes/assemblies/tests.xml b/dhp-build/dhp-build-assembly-resources/target/classes/assemblies/tests.xml new file mode 100644 index 0000000000..bf679e6529 --- /dev/null +++ b/dhp-build/dhp-build-assembly-resources/target/classes/assemblies/tests.xml @@ -0,0 +1,24 @@ + + + tests + + jar + + false + + + ${project.build.testOutputDirectory} + + + + + + \ No newline at end of file diff --git a/dhp-build/dhp-build-assembly-resources/target/classes/commands/get_working_dir.sh b/dhp-build/dhp-build-assembly-resources/target/classes/commands/get_working_dir.sh new file mode 100644 index 0000000000..e9d55f0d7e --- /dev/null +++ b/dhp-build/dhp-build-assembly-resources/target/classes/commands/get_working_dir.sh @@ -0,0 +1,3 @@ +#!/bin/bash +hadoop fs -get ${workingDir} + diff --git a/dhp-build/dhp-build-assembly-resources/target/classes/commands/print_working_dir.sh b/dhp-build/dhp-build-assembly-resources/target/classes/commands/print_working_dir.sh new file mode 100644 index 0000000000..c79839ea49 --- /dev/null +++ b/dhp-build/dhp-build-assembly-resources/target/classes/commands/print_working_dir.sh @@ -0,0 +1,5 @@ +#!/bin/bash +echo "" +echo "---->Contents of the working directory" +hadoop fs -ls ${workingDir} + diff --git a/dhp-build/dhp-build-assembly-resources/target/classes/commands/readme.markdown b/dhp-build/dhp-build-assembly-resources/target/classes/commands/readme.markdown new file mode 100644 index 0000000000..3e049c18b7 --- /dev/null +++ b/dhp-build/dhp-build-assembly-resources/target/classes/commands/readme.markdown @@ -0,0 +1,5 @@ +Execute the scripts in the following order: + +1. `upload_workflow.sh` +2. `run_workflow.sh` +3. `print_working_dir.sh` or `get_working_dir.sh` diff --git a/dhp-build/dhp-build-assembly-resources/target/classes/commands/run_workflow.sh b/dhp-build/dhp-build-assembly-resources/target/classes/commands/run_workflow.sh new file mode 100644 index 0000000000..fee3d77370 --- /dev/null +++ b/dhp-build/dhp-build-assembly-resources/target/classes/commands/run_workflow.sh @@ -0,0 +1,10 @@ +#!/bin/bash + +if [ $# = 0 ] ; then + oozie job -oozie ${oozieServiceLoc} -config job.properties -run +else + oozie job -oozie ${oozieServiceLoc} -config $1/job.properties -run +fi + + + diff --git a/dhp-build/dhp-build-assembly-resources/target/classes/commands/upload_workflow.sh b/dhp-build/dhp-build-assembly-resources/target/classes/commands/upload_workflow.sh new file mode 100644 index 0000000000..c5d299c2f0 --- /dev/null +++ b/dhp-build/dhp-build-assembly-resources/target/classes/commands/upload_workflow.sh @@ -0,0 +1,34 @@ +#!/bin/bash +exec 3>&1 +BASH_XTRACEFD=3 +set -x ## print every executed command + + +if [ $# = 0 ] ; then + target_dir_root=`pwd`'/${oozieAppDir}' +else + target_dir_root=`readlink -f $1`'/${oozieAppDir}' +fi + +# initial phase, creating symbolic links to jars in all subworkflows +# currently disabled +#libDir=$target_dir_root'/lib' +#dirs=`find $target_dir_root/* -maxdepth 10 -type d` +#for dir in $dirs +#do +# if [ -f $dir/workflow.xml ] +# then +# echo "creating symbolic links to jars in directory: $dir/lib" +# if [ ! -d "$dir/lib" ]; then +# mkdir $dir/lib +# fi +# find $libDir -type f -exec ln -s \{\} $dir/lib \; +# fi +#done + + +#uploading +hadoop fs -rm -r ${sandboxDir} +hadoop fs -mkdir -p ${sandboxDir} +hadoop fs -mkdir -p ${workingDir} +hadoop fs -put $target_dir_root ${sandboxDir} diff --git a/dhp-build/dhp-build-assembly-resources/target/classes/project-default.properties b/dhp-build/dhp-build-assembly-resources/target/classes/project-default.properties new file mode 100644 index 0000000000..021ecf55be --- /dev/null +++ b/dhp-build/dhp-build-assembly-resources/target/classes/project-default.properties @@ -0,0 +1,7 @@ +#sandboxName when not provided explicitly will be generated +sandboxName=${sandboxName} +sandboxDir=/user/${iis.hadoop.frontend.user.name}/${sandboxName} +workingDir=${sandboxDir}/working_dir +oozie.wf.application.path = ${nameNode}${sandboxDir}/${oozieAppDir} +oozieTopWfApplicationPath = ${oozie.wf.application.path} + diff --git a/dhp-build/dhp-build-properties-maven-plugin/README.markdown b/dhp-build/dhp-build-properties-maven-plugin/README.markdown new file mode 100644 index 0000000000..f99c7c1b03 --- /dev/null +++ b/dhp-build/dhp-build-properties-maven-plugin/README.markdown @@ -0,0 +1,6 @@ +Maven plugin module utilized by `dhp-wf` for proper `job.properties` file building. + +It is based on http://site.kuali.org/maven/plugins/properties-maven-plugin/1.3.2/write-project-properties-mojo.html and supplemented with: + +* handling includePropertyKeysFromFiles property allowing writing only properties listed in given property files +As a final outcome only properties listed in `` element and listed as a keys in files from `` element will be written to output file. diff --git a/dhp-build/dhp-build-properties-maven-plugin/pom.xml b/dhp-build/dhp-build-properties-maven-plugin/pom.xml new file mode 100644 index 0000000000..38093f4d18 --- /dev/null +++ b/dhp-build/dhp-build-properties-maven-plugin/pom.xml @@ -0,0 +1,68 @@ + + + + 4.0.0 + + + eu.dnetlib.dhp + dhp-build + 1.0.0-SNAPSHOT + + + dhp-build-properties-maven-plugin + maven-plugin + + + + + org.apache.maven + maven-plugin-api + 2.0 + + + org.apache.maven + maven-project + 2.0 + + + org.kuali.maven.plugins + properties-maven-plugin + 1.3.2 + + + + + + target + target/classes + ${project.artifactId}-${project.version} + target/test-classes + + + org.apache.maven.plugins + maven-compiler-plugin + + + org.apache.maven.plugins + maven-source-plugin + + + attach-sources + verify + + jar-no-fork + + + + + + org.apache.maven.plugins + maven-javadoc-plugin + + true + + + + + + diff --git a/dhp-build/dhp-build-properties-maven-plugin/src/main/java/eu/dnetlib/maven/plugin/properties/GenerateOoziePropertiesMojo.java b/dhp-build/dhp-build-properties-maven-plugin/src/main/java/eu/dnetlib/maven/plugin/properties/GenerateOoziePropertiesMojo.java new file mode 100644 index 0000000000..a3a99cc0c6 --- /dev/null +++ b/dhp-build/dhp-build-properties-maven-plugin/src/main/java/eu/dnetlib/maven/plugin/properties/GenerateOoziePropertiesMojo.java @@ -0,0 +1,71 @@ +package eu.dnetlib.maven.plugin.properties; + +import java.io.File; +import java.util.ArrayList; +import java.util.List; + +import org.apache.commons.lang.ArrayUtils; +import org.apache.commons.lang.StringUtils; +import org.apache.maven.plugin.AbstractMojo; +import org.apache.maven.plugin.MojoExecutionException; +import org.apache.maven.plugin.MojoFailureException; + +/** + * Generates oozie properties which were not provided from commandline. + * @author mhorst + * + * @goal generate-properties + */ +public class GenerateOoziePropertiesMojo extends AbstractMojo { + + public static final String PROPERTY_NAME_WF_SOURCE_DIR = "workflow.source.dir"; + public static final String PROPERTY_NAME_SANDBOX_NAME = "sandboxName"; + + private final String[] limiters = {"iis", "dnetlib", "eu", "dhp"}; + + @Override + public void execute() throws MojoExecutionException, MojoFailureException { + if (System.getProperties().containsKey(PROPERTY_NAME_WF_SOURCE_DIR) && + !System.getProperties().containsKey(PROPERTY_NAME_SANDBOX_NAME)) { + String generatedSandboxName = generateSandboxName(System.getProperties().getProperty( + PROPERTY_NAME_WF_SOURCE_DIR)); + if (generatedSandboxName!=null) { + System.getProperties().setProperty(PROPERTY_NAME_SANDBOX_NAME, + generatedSandboxName); + } else { + System.out.println("unable to generate sandbox name from path: " + + System.getProperties().getProperty(PROPERTY_NAME_WF_SOURCE_DIR)); + } + } + } + + /** + * Generates sandbox name from workflow source directory. + * @param wfSourceDir + * @return generated sandbox name + */ + private String generateSandboxName(String wfSourceDir) { +// utilize all dir names until finding one of the limiters + List sandboxNameParts = new ArrayList(); + String[] tokens = StringUtils.split(wfSourceDir, File.separatorChar); + ArrayUtils.reverse(tokens); + if (tokens.length>0) { + for (String token : tokens) { + for (String limiter : limiters) { + if (limiter.equals(token)) { + return sandboxNameParts.size()>0? + StringUtils.join(sandboxNameParts.toArray()):null; + } + } + if (sandboxNameParts.size()>0) { + sandboxNameParts.add(0, File.separator); + } + sandboxNameParts.add(0, token); + } + return StringUtils.join(sandboxNameParts.toArray()); + } else { + return null; + } + } + +} diff --git a/dhp-build/dhp-build-properties-maven-plugin/src/main/java/eu/dnetlib/maven/plugin/properties/WritePredefinedProjectProperties.java b/dhp-build/dhp-build-properties-maven-plugin/src/main/java/eu/dnetlib/maven/plugin/properties/WritePredefinedProjectProperties.java new file mode 100644 index 0000000000..62f04761a9 --- /dev/null +++ b/dhp-build/dhp-build-properties-maven-plugin/src/main/java/eu/dnetlib/maven/plugin/properties/WritePredefinedProjectProperties.java @@ -0,0 +1,436 @@ +/** + * + * Licensed under the Educational Community License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.opensource.org/licenses/ecl2.php + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package eu.dnetlib.maven.plugin.properties; + +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.Date; +import java.util.List; +import java.util.Map.Entry; +import java.util.Properties; +import java.util.Set; + +import org.apache.commons.io.FileUtils; +import org.apache.commons.io.IOUtils; +import org.apache.commons.lang.StringUtils; +import org.apache.maven.plugin.AbstractMojo; +import org.apache.maven.plugin.MojoExecutionException; +import org.apache.maven.plugin.MojoFailureException; +import org.apache.maven.project.MavenProject; +import org.springframework.core.io.DefaultResourceLoader; +import org.springframework.core.io.Resource; +import org.springframework.core.io.ResourceLoader; + +import edu.umd.cs.findbugs.annotations.SuppressFBWarnings; + +/** + * Writes project properties for the keys listed in specified properties files. + * Based on: + * http://site.kuali.org/maven/plugins/properties-maven-plugin/1.3.2/write-project-properties-mojo.html + + * @author mhorst + * @goal write-project-properties + */ +public class WritePredefinedProjectProperties extends AbstractMojo { + + private static final String CR = "\r"; + private static final String LF = "\n"; + private static final String TAB = "\t"; + protected static final String PROPERTY_PREFIX_ENV = "env."; + private static final String ENCODING_UTF8 = "utf8"; + + /** + * @parameter property="properties.includePropertyKeysFromFiles" + */ + private String[] includePropertyKeysFromFiles; + + /** + * @parameter default-value="${project}" + * @required + * @readonly + */ + protected MavenProject project; + + /** + * The file that properties will be written to + * + * @parameter property="properties.outputFile" + * default-value="${project.build.directory}/properties/project.properties"; + * @required + */ + protected File outputFile; + + /** + * If true, the plugin will silently ignore any non-existent properties files, and the build will continue + * + * @parameter property="properties.quiet" default-value="true" + */ + private boolean quiet; + + /** + * Comma separated list of characters to escape when writing property values. cr=carriage return, lf=linefeed, + * tab=tab. Any other values are taken literally. + * + * @parameter default-value="cr,lf,tab" property="properties.escapeChars" + */ + private String escapeChars; + + /** + * If true, the plugin will include system properties when writing the properties file. System properties override + * both environment variables and project properties. + * + * @parameter default-value="false" property="properties.includeSystemProperties" + */ + private boolean includeSystemProperties; + + /** + * If true, the plugin will include environment variables when writing the properties file. Environment variables + * are prefixed with "env". Environment variables override project properties. + * + * @parameter default-value="false" property="properties.includeEnvironmentVariables" + */ + private boolean includeEnvironmentVariables; + + /** + * Comma separated set of properties to exclude when writing the properties file + * + * @parameter property="properties.exclude" + */ + private String exclude; + + /** + * Comma separated set of properties to write to the properties file. If provided, only the properties matching + * those supplied here will be written to the properties file. + * + * @parameter property="properties.include" + */ + private String include; + + /* (non-Javadoc) + * @see org.apache.maven.plugin.AbstractMojo#execute() + */ + @Override + @SuppressFBWarnings({"NP_UNWRITTEN_FIELD","UWF_UNWRITTEN_FIELD"}) + public void execute() throws MojoExecutionException, MojoFailureException { + Properties properties = new Properties(); + // Add project properties + properties.putAll(project.getProperties()); + if (includeEnvironmentVariables) { + // Add environment variables, overriding any existing properties with the same key + properties.putAll(getEnvironmentVariables()); + } + if (includeSystemProperties) { + // Add system properties, overriding any existing properties with the same key + properties.putAll(System.getProperties()); + } + + // Remove properties as appropriate + trim(properties, exclude, include); + + String comment = "# " + new Date() + "\n"; + List escapeTokens = getEscapeChars(escapeChars); + + getLog().info("Creating " + outputFile); + writeProperties(outputFile, comment, properties, escapeTokens); + } + + /** + * Provides environment variables. + * @return environment variables + */ + protected static Properties getEnvironmentVariables() { + Properties props = new Properties(); + for (Entry entry : System.getenv().entrySet()) { + props.setProperty(PROPERTY_PREFIX_ENV + entry.getKey(), entry.getValue()); + } + return props; + } + + /** + * Removes properties which should not be written. + * @param properties + * @param omitCSV + * @param includeCSV + * @throws MojoExecutionException + */ + protected void trim(Properties properties, String omitCSV, String includeCSV) throws MojoExecutionException { + List omitKeys = getListFromCSV(omitCSV); + for (String key : omitKeys) { + properties.remove(key); + } + + List includeKeys = getListFromCSV(includeCSV); +// mh: including keys from predefined properties + if (includePropertyKeysFromFiles!=null && includePropertyKeysFromFiles.length>0) { + for (String currentIncludeLoc : includePropertyKeysFromFiles) { + if (validate(currentIncludeLoc)) { + Properties p = getProperties(currentIncludeLoc); + for (String key : p.stringPropertyNames()) { + includeKeys.add(key); + } + } + } + } + if (includeKeys!=null && !includeKeys.isEmpty()) { +// removing only when include keys provided + Set keys = properties.stringPropertyNames(); + for (String key : keys) { + if (!includeKeys.contains(key)) { + properties.remove(key); + } + } + } + } + + /** + * Checks whether file exists. + * @param location + * @return true when exists, false otherwise. + */ + protected boolean exists(String location) { + if (StringUtils.isBlank(location)) { + return false; + } + File file = new File(location); + if (file.exists()) { + return true; + } + ResourceLoader loader = new DefaultResourceLoader(); + Resource resource = loader.getResource(location); + return resource.exists(); + } + + /** + * Validates resource location. + * @param location + * @return true when valid, false otherwise + * @throws MojoExecutionException + */ + protected boolean validate(String location) throws MojoExecutionException { + boolean exists = exists(location); + if (exists) { + return true; + } + if (quiet) { + getLog().info("Ignoring non-existent properties file '" + location + "'"); + return false; + } else { + throw new MojoExecutionException("Non-existent properties file '" + location + "'"); + } + } + + /** + * Provides input stream. + * @param location + * @return input stream + * @throws IOException + */ + protected InputStream getInputStream(String location) throws IOException { + File file = new File(location); + if (file.exists()) { + return new FileInputStream(location); + } + ResourceLoader loader = new DefaultResourceLoader(); + Resource resource = loader.getResource(location); + return resource.getInputStream(); + } + + /** + * Creates properties for given location. + * @param location + * @return properties for given location + * @throws MojoExecutionException + */ + protected Properties getProperties(String location) throws MojoExecutionException { + InputStream in = null; + try { + Properties properties = new Properties(); + in = getInputStream(location); + if (location.toLowerCase().endsWith(".xml")) { + properties.loadFromXML(in); + } else { + properties.load(in); + } + return properties; + } catch (IOException e) { + throw new MojoExecutionException("Error reading properties file " + location, e); + } finally { + IOUtils.closeQuietly(in); + } + } + + /** + * Provides escape characters. + * @param escapeChars + * @return escape characters + */ + protected List getEscapeChars(String escapeChars) { + List tokens = getListFromCSV(escapeChars); + List realTokens = new ArrayList(); + for (String token : tokens) { + String realToken = getRealToken(token); + realTokens.add(realToken); + } + return realTokens; + } + + /** + * Provides real token. + * @param token + * @return real token + */ + protected String getRealToken(String token) { + if (token.equalsIgnoreCase("CR")) { + return CR; + } else if (token.equalsIgnoreCase("LF")) { + return LF; + } else if (token.equalsIgnoreCase("TAB")) { + return TAB; + } else { + return token; + } + } + + /** + * Returns content. + * @param comment + * @param properties + * @param escapeTokens + * @return content + */ + protected String getContent(String comment, Properties properties, List escapeTokens) { + List names = new ArrayList(properties.stringPropertyNames()); + Collections.sort(names); + StringBuilder sb = new StringBuilder(); + if (!StringUtils.isBlank(comment)) { + sb.append(comment); + } + for (String name : names) { + String value = properties.getProperty(name); + String escapedValue = escape(value, escapeTokens); + sb.append(name + "=" + escapedValue + "\n"); + } + return sb.toString(); + } + + /** + * Writes properties to given file. + * @param file + * @param comment + * @param properties + * @param escapeTokens + * @throws MojoExecutionException + */ + protected void writeProperties(File file, String comment, Properties properties, List escapeTokens) + throws MojoExecutionException { + try { + String content = getContent(comment, properties, escapeTokens); + FileUtils.writeStringToFile(file, content, ENCODING_UTF8); + } catch (IOException e) { + throw new MojoExecutionException("Error creating properties file", e); + } + } + + /** + * Escapes characters. + * @param s + * @param escapeChars + * @return + */ + protected String escape(String s, List escapeChars) { + String result = s; + for (String escapeChar : escapeChars) { + result = result.replace(escapeChar, getReplacementToken(escapeChar)); + } + return result; + } + + /** + * Provides replacement token. + * @param escapeChar + * @return replacement token + */ + protected String getReplacementToken(String escapeChar) { + if (escapeChar.equals(CR)) { + return "\\r"; + } else if (escapeChar.equals(LF)) { + return "\\n"; + } else if (escapeChar.equals(TAB)) { + return "\\t"; + } else { + return "\\" + escapeChar; + } + } + + /** + * Returns list from csv. + * @param csv + * @return list of values generated from CSV + */ + protected static final List getListFromCSV(String csv) { + if (StringUtils.isBlank(csv)) { + return new ArrayList(); + } + List list = new ArrayList(); + String[] tokens = StringUtils.split(csv, ","); + for (String token : tokens) { + list.add(token.trim()); + } + return list; + } + + public void setIncludeSystemProperties(boolean includeSystemProperties) { + this.includeSystemProperties = includeSystemProperties; + } + + public void setEscapeChars(String escapeChars) { + this.escapeChars = escapeChars; + } + + public void setIncludeEnvironmentVariables(boolean includeEnvironmentVariables) { + this.includeEnvironmentVariables = includeEnvironmentVariables; + } + + public void setExclude(String exclude) { + this.exclude = exclude; + } + + public void setInclude(String include) { + this.include = include; + } + + public void setQuiet(boolean quiet) { + this.quiet = quiet; + } + + /** + * Sets property files for which keys properties should be included. + * @param includePropertyKeysFromFiles + */ + public void setIncludePropertyKeysFromFiles( + String[] includePropertyKeysFromFiles) { + if (includePropertyKeysFromFiles!=null) { + this.includePropertyKeysFromFiles = Arrays.copyOf( + includePropertyKeysFromFiles, + includePropertyKeysFromFiles.length); + } + } + +} \ No newline at end of file diff --git a/dhp-build/dhp-build-properties-maven-plugin/src/test/java/eu/dnetlib/maven/plugin/properties/GenerateOoziePropertiesMojoTest.java b/dhp-build/dhp-build-properties-maven-plugin/src/test/java/eu/dnetlib/maven/plugin/properties/GenerateOoziePropertiesMojoTest.java new file mode 100644 index 0000000000..8a763c1bdd --- /dev/null +++ b/dhp-build/dhp-build-properties-maven-plugin/src/test/java/eu/dnetlib/maven/plugin/properties/GenerateOoziePropertiesMojoTest.java @@ -0,0 +1,101 @@ +package eu.dnetlib.maven.plugin.properties; + +import static eu.dnetlib.maven.plugin.properties.GenerateOoziePropertiesMojo.PROPERTY_NAME_SANDBOX_NAME; +import static eu.dnetlib.maven.plugin.properties.GenerateOoziePropertiesMojo.PROPERTY_NAME_WF_SOURCE_DIR; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNull; + +import org.junit.Before; +import org.junit.Test; + +/** + * @author mhorst + * + */ +public class GenerateOoziePropertiesMojoTest { + + private GenerateOoziePropertiesMojo mojo = new GenerateOoziePropertiesMojo(); + + @Before + public void clearSystemProperties() { + System.clearProperty(PROPERTY_NAME_SANDBOX_NAME); + System.clearProperty(PROPERTY_NAME_WF_SOURCE_DIR); + } + + @Test + public void testExecuteEmpty() throws Exception { + // execute + mojo.execute(); + + // assert + assertNull(System.getProperty(PROPERTY_NAME_SANDBOX_NAME)); + } + + @Test + public void testExecuteSandboxNameAlreadySet() throws Exception { + // given + String workflowSourceDir = "eu/dnetlib/iis/wf/transformers"; + String sandboxName = "originalSandboxName"; + System.setProperty(PROPERTY_NAME_WF_SOURCE_DIR, workflowSourceDir); + System.setProperty(PROPERTY_NAME_SANDBOX_NAME, sandboxName); + + // execute + mojo.execute(); + + // assert + assertEquals(sandboxName, System.getProperty(PROPERTY_NAME_SANDBOX_NAME)); + } + + @Test + public void testExecuteEmptyWorkflowSourceDir() throws Exception { + // given + String workflowSourceDir = ""; + System.setProperty(PROPERTY_NAME_WF_SOURCE_DIR, workflowSourceDir); + + // execute + mojo.execute(); + + // assert + assertNull(System.getProperty(PROPERTY_NAME_SANDBOX_NAME)); + } + + @Test + public void testExecuteNullSandboxNameGenerated() throws Exception { + // given + String workflowSourceDir = "eu/dnetlib/iis/"; + System.setProperty(PROPERTY_NAME_WF_SOURCE_DIR, workflowSourceDir); + + // execute + mojo.execute(); + + // assert + assertNull(System.getProperty(PROPERTY_NAME_SANDBOX_NAME)); + } + + @Test + public void testExecute() throws Exception { + // given + String workflowSourceDir = "eu/dnetlib/iis/wf/transformers"; + System.setProperty(PROPERTY_NAME_WF_SOURCE_DIR, workflowSourceDir); + + // execute + mojo.execute(); + + // assert + assertEquals("wf/transformers", System.getProperty(PROPERTY_NAME_SANDBOX_NAME)); + } + + @Test + public void testExecuteWithoutRoot() throws Exception { + // given + String workflowSourceDir = "wf/transformers"; + System.setProperty(PROPERTY_NAME_WF_SOURCE_DIR, workflowSourceDir); + + // execute + mojo.execute(); + + // assert + assertEquals("wf/transformers", System.getProperty(PROPERTY_NAME_SANDBOX_NAME)); + } + +} diff --git a/dhp-build/dhp-build-properties-maven-plugin/src/test/java/eu/dnetlib/maven/plugin/properties/WritePredefinedProjectPropertiesTest.java b/dhp-build/dhp-build-properties-maven-plugin/src/test/java/eu/dnetlib/maven/plugin/properties/WritePredefinedProjectPropertiesTest.java new file mode 100644 index 0000000000..51d9575ffd --- /dev/null +++ b/dhp-build/dhp-build-properties-maven-plugin/src/test/java/eu/dnetlib/maven/plugin/properties/WritePredefinedProjectPropertiesTest.java @@ -0,0 +1,365 @@ +package eu.dnetlib.maven.plugin.properties; + +import static eu.dnetlib.maven.plugin.properties.WritePredefinedProjectProperties.PROPERTY_PREFIX_ENV; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; +import static org.mockito.Mockito.doReturn; + +import java.io.File; +import java.io.FileInputStream; +import java.io.FileNotFoundException; +import java.io.FileOutputStream; +import java.io.FileWriter; +import java.io.IOException; +import java.util.Properties; + +import org.apache.maven.plugin.MojoExecutionException; +import org.apache.maven.project.MavenProject; +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.TemporaryFolder; +import org.junit.runner.RunWith; +import org.mockito.Mock; +import org.mockito.runners.MockitoJUnitRunner; + + +/** + * @author mhorst + * + */ +@RunWith(MockitoJUnitRunner.class) +public class WritePredefinedProjectPropertiesTest { + + @Rule + public TemporaryFolder testFolder = new TemporaryFolder(); + + @Mock + private MavenProject mavenProject; + + private WritePredefinedProjectProperties mojo; + + @Before + public void init() { + mojo = new WritePredefinedProjectProperties(); + mojo.outputFile = getPropertiesFileLocation(); + mojo.project = mavenProject; + doReturn(new Properties()).when(mavenProject).getProperties(); + } + + // ----------------------------------- TESTS --------------------------------------------- + + @Test + public void testExecuteEmpty() throws Exception { + // execute + mojo.execute(); + + // assert + assertTrue(mojo.outputFile.exists()); + Properties storedProperties = getStoredProperties(); + assertEquals(0, storedProperties.size()); + } + + @Test + public void testExecuteWithProjectProperties() throws Exception { + // given + String key = "projectPropertyKey"; + String value = "projectPropertyValue"; + Properties projectProperties = new Properties(); + projectProperties.setProperty(key, value); + doReturn(projectProperties).when(mavenProject).getProperties(); + + // execute + mojo.execute(); + + // assert + assertTrue(mojo.outputFile.exists()); + Properties storedProperties = getStoredProperties(); + assertEquals(1, storedProperties.size()); + assertTrue(storedProperties.containsKey(key)); + assertEquals(value, storedProperties.getProperty(key)); + } + + @Test(expected=MojoExecutionException.class) + public void testExecuteWithProjectPropertiesAndInvalidOutputFile() throws Exception { + // given + String key = "projectPropertyKey"; + String value = "projectPropertyValue"; + Properties projectProperties = new Properties(); + projectProperties.setProperty(key, value); + doReturn(projectProperties).when(mavenProject).getProperties(); + mojo.outputFile = testFolder.getRoot(); + + // execute + mojo.execute(); + } + + @Test + public void testExecuteWithProjectPropertiesExclusion() throws Exception { + // given + String key = "projectPropertyKey"; + String value = "projectPropertyValue"; + String excludedKey = "excludedPropertyKey"; + String excludedValue = "excludedPropertyValue"; + Properties projectProperties = new Properties(); + projectProperties.setProperty(key, value); + projectProperties.setProperty(excludedKey, excludedValue); + doReturn(projectProperties).when(mavenProject).getProperties(); + mojo.setExclude(excludedKey); + + // execute + mojo.execute(); + + // assert + assertTrue(mojo.outputFile.exists()); + Properties storedProperties = getStoredProperties(); + assertEquals(1, storedProperties.size()); + assertTrue(storedProperties.containsKey(key)); + assertEquals(value, storedProperties.getProperty(key)); + } + + @Test + public void testExecuteWithProjectPropertiesInclusion() throws Exception { + // given + String key = "projectPropertyKey"; + String value = "projectPropertyValue"; + String includedKey = "includedPropertyKey"; + String includedValue = "includedPropertyValue"; + Properties projectProperties = new Properties(); + projectProperties.setProperty(key, value); + projectProperties.setProperty(includedKey, includedValue); + doReturn(projectProperties).when(mavenProject).getProperties(); + mojo.setInclude(includedKey); + + // execute + mojo.execute(); + + // assert + assertTrue(mojo.outputFile.exists()); + Properties storedProperties = getStoredProperties(); + assertEquals(1, storedProperties.size()); + assertTrue(storedProperties.containsKey(includedKey)); + assertEquals(includedValue, storedProperties.getProperty(includedKey)); + } + + @Test + public void testExecuteIncludingPropertyKeysFromFile() throws Exception { + // given + String key = "projectPropertyKey"; + String value = "projectPropertyValue"; + String includedKey = "includedPropertyKey"; + String includedValue = "includedPropertyValue"; + Properties projectProperties = new Properties(); + projectProperties.setProperty(key, value); + projectProperties.setProperty(includedKey, includedValue); + doReturn(projectProperties).when(mavenProject).getProperties(); + + File includedPropertiesFile = new File(testFolder.getRoot(), "included.properties"); + Properties includedProperties = new Properties(); + includedProperties.setProperty(includedKey, "irrelevantValue"); + includedProperties.store(new FileWriter(includedPropertiesFile), null); + + mojo.setIncludePropertyKeysFromFiles(new String[] {includedPropertiesFile.getAbsolutePath()}); + + // execute + mojo.execute(); + + // assert + assertTrue(mojo.outputFile.exists()); + Properties storedProperties = getStoredProperties(); + assertEquals(1, storedProperties.size()); + assertTrue(storedProperties.containsKey(includedKey)); + assertEquals(includedValue, storedProperties.getProperty(includedKey)); + } + + @Test + public void testExecuteIncludingPropertyKeysFromClasspathResource() throws Exception { + // given + String key = "projectPropertyKey"; + String value = "projectPropertyValue"; + String includedKey = "includedPropertyKey"; + String includedValue = "includedPropertyValue"; + Properties projectProperties = new Properties(); + projectProperties.setProperty(key, value); + projectProperties.setProperty(includedKey, includedValue); + doReturn(projectProperties).when(mavenProject).getProperties(); + + mojo.setIncludePropertyKeysFromFiles(new String[] {"/eu/dnetlib/maven/plugin/properties/included.properties"}); + + // execute + mojo.execute(); + + // assert + assertTrue(mojo.outputFile.exists()); + Properties storedProperties = getStoredProperties(); + assertEquals(1, storedProperties.size()); + assertTrue(storedProperties.containsKey(includedKey)); + assertEquals(includedValue, storedProperties.getProperty(includedKey)); + } + + @Test(expected=MojoExecutionException.class) + public void testExecuteIncludingPropertyKeysFromBlankLocation() throws Exception { + // given + String key = "projectPropertyKey"; + String value = "projectPropertyValue"; + String includedKey = "includedPropertyKey"; + String includedValue = "includedPropertyValue"; + Properties projectProperties = new Properties(); + projectProperties.setProperty(key, value); + projectProperties.setProperty(includedKey, includedValue); + doReturn(projectProperties).when(mavenProject).getProperties(); + + mojo.setIncludePropertyKeysFromFiles(new String[] {""}); + + // execute + mojo.execute(); + } + + @Test + public void testExecuteIncludingPropertyKeysFromXmlFile() throws Exception { + // given + String key = "projectPropertyKey"; + String value = "projectPropertyValue"; + String includedKey = "includedPropertyKey"; + String includedValue = "includedPropertyValue"; + Properties projectProperties = new Properties(); + projectProperties.setProperty(key, value); + projectProperties.setProperty(includedKey, includedValue); + doReturn(projectProperties).when(mavenProject).getProperties(); + + File includedPropertiesFile = new File(testFolder.getRoot(), "included.xml"); + Properties includedProperties = new Properties(); + includedProperties.setProperty(includedKey, "irrelevantValue"); + includedProperties.storeToXML(new FileOutputStream(includedPropertiesFile), null); + + mojo.setIncludePropertyKeysFromFiles(new String[] {includedPropertiesFile.getAbsolutePath()}); + + // execute + mojo.execute(); + + // assert + assertTrue(mojo.outputFile.exists()); + Properties storedProperties = getStoredProperties(); + assertEquals(1, storedProperties.size()); + assertTrue(storedProperties.containsKey(includedKey)); + assertEquals(includedValue, storedProperties.getProperty(includedKey)); + } + + @Test(expected=MojoExecutionException.class) + public void testExecuteIncludingPropertyKeysFromInvalidXmlFile() throws Exception { + // given + String key = "projectPropertyKey"; + String value = "projectPropertyValue"; + String includedKey = "includedPropertyKey"; + String includedValue = "includedPropertyValue"; + Properties projectProperties = new Properties(); + projectProperties.setProperty(key, value); + projectProperties.setProperty(includedKey, includedValue); + doReturn(projectProperties).when(mavenProject).getProperties(); + + File includedPropertiesFile = new File(testFolder.getRoot(), "included.xml"); + Properties includedProperties = new Properties(); + includedProperties.setProperty(includedKey, "irrelevantValue"); + includedProperties.store(new FileOutputStream(includedPropertiesFile), null); + + mojo.setIncludePropertyKeysFromFiles(new String[] {includedPropertiesFile.getAbsolutePath()}); + + // execute + mojo.execute(); + } + + @Test + public void testExecuteWithQuietModeOn() throws Exception { + // given + mojo.setQuiet(true); + mojo.setIncludePropertyKeysFromFiles(new String[] {"invalid location"}); + + // execute + mojo.execute(); + + // assert + assertTrue(mojo.outputFile.exists()); + Properties storedProperties = getStoredProperties(); + assertEquals(0, storedProperties.size()); + } + + @Test(expected=MojoExecutionException.class) + public void testExecuteIncludingPropertyKeysFromInvalidFile() throws Exception { + // given + mojo.setIncludePropertyKeysFromFiles(new String[] {"invalid location"}); + + // execute + mojo.execute(); + } + + @Test + public void testExecuteWithEnvironmentProperties() throws Exception { + // given + mojo.setIncludeEnvironmentVariables(true); + + // execute + mojo.execute(); + + // assert + assertTrue(mojo.outputFile.exists()); + Properties storedProperties = getStoredProperties(); + assertTrue(storedProperties.size() > 0); + for (Object currentKey : storedProperties.keySet()) { + assertTrue(((String)currentKey).startsWith(PROPERTY_PREFIX_ENV)); + } + } + + @Test + public void testExecuteWithSystemProperties() throws Exception { + // given + String key = "systemPropertyKey"; + String value = "systemPropertyValue"; + System.setProperty(key, value); + mojo.setIncludeSystemProperties(true); + + // execute + mojo.execute(); + + // assert + assertTrue(mojo.outputFile.exists()); + Properties storedProperties = getStoredProperties(); + assertTrue(storedProperties.size() > 0); + assertTrue(storedProperties.containsKey(key)); + assertEquals(value, storedProperties.getProperty(key)); + } + + @Test + public void testExecuteWithSystemPropertiesAndEscapeChars() throws Exception { + // given + String key = "systemPropertyKey "; + String value = "systemPropertyValue"; + System.setProperty(key, value); + mojo.setIncludeSystemProperties(true); + String escapeChars = "cr,lf,tab,|"; + mojo.setEscapeChars(escapeChars); + + // execute + mojo.execute(); + + // assert + assertTrue(mojo.outputFile.exists()); + Properties storedProperties = getStoredProperties(); + assertTrue(storedProperties.size() > 0); + assertFalse(storedProperties.containsKey(key)); + assertTrue(storedProperties.containsKey(key.trim())); + assertEquals(value, storedProperties.getProperty(key.trim())); + } + + // ----------------------------------- PRIVATE ------------------------------------------- + + private File getPropertiesFileLocation() { + return new File(testFolder.getRoot(), "test.properties"); + } + + private Properties getStoredProperties() throws FileNotFoundException, IOException { + Properties properties = new Properties(); + properties.load(new FileInputStream(getPropertiesFileLocation())); + return properties; + } +} diff --git a/dhp-build/dhp-build-properties-maven-plugin/src/test/resources/eu/dnetlib/maven/plugin/properties/included.properties b/dhp-build/dhp-build-properties-maven-plugin/src/test/resources/eu/dnetlib/maven/plugin/properties/included.properties new file mode 100644 index 0000000000..3c79fe6cb2 --- /dev/null +++ b/dhp-build/dhp-build-properties-maven-plugin/src/test/resources/eu/dnetlib/maven/plugin/properties/included.properties @@ -0,0 +1 @@ +includedPropertyKey=irrelevantValue \ No newline at end of file diff --git a/dhp-build/dhp-build-properties-maven-plugin/target/classes/META-INF/maven/plugin.xml b/dhp-build/dhp-build-properties-maven-plugin/target/classes/META-INF/maven/plugin.xml new file mode 100644 index 0000000000..03188dc533 --- /dev/null +++ b/dhp-build/dhp-build-properties-maven-plugin/target/classes/META-INF/maven/plugin.xml @@ -0,0 +1,281 @@ + + + dhp-build-properties-maven-plugin + + eu.dnetlib.dhp + dhp-build-properties-maven-plugin + 1.0.0-SNAPSHOT + dhp-build-properties + false + true + + + generate-properties + Generates oozie properties which were not provided from commandline. + false + true + false + false + false + true + eu.dnetlib.maven.plugin.properties.GenerateOoziePropertiesMojo + java + per-lookup + once-per-session + false + + + + write-project-properties + Writes project properties for the keys listed in specified properties files. +Based on: +http://site.kuali.org/maven/plugins/properties-maven-plugin/1.3.2/write-project-properties-mojo.html + false + true + false + false + false + true + eu.dnetlib.maven.plugin.properties.WritePredefinedProjectProperties + java + per-lookup + once-per-session + false + + + properties.escapeChars + java.lang.String + false + true + Comma separated list of characters to escape when writing property values. cr=carriage return, lf=linefeed, +tab=tab. Any other values are taken literally. + + + properties.exclude + java.lang.String + false + true + Comma separated set of properties to exclude when writing the properties file + + + properties.include + java.lang.String + false + true + Comma separated set of properties to write to the properties file. If provided, only the properties matching +those supplied here will be written to the properties file. + + + properties.includeEnvironmentVariables + boolean + false + true + If true, the plugin will include environment variables when writing the properties file. Environment variables +are prefixed with "env". Environment variables override project properties. + + + properties.includePropertyKeysFromFiles + java.lang.String[] + false + true + + + + properties.includeSystemProperties + boolean + false + true + If true, the plugin will include system properties when writing the properties file. System properties override +both environment variables and project properties. + + + properties.outputFile + java.io.File + true + true + The file that properties will be written to + + + project + org.apache.maven.project.MavenProject + true + false + + + + properties.quiet + boolean + false + true + If true, the plugin will silently ignore any non-existent properties files, and the build will continue + + + + + + + + + + + + + + + org.apache.maven + maven-plugin-api + jar + 2.0 + + + org.apache.maven + maven-project + jar + 2.0 + + + org.apache.maven + maven-profile + jar + 2.0 + + + org.apache.maven + maven-model + jar + 2.0 + + + org.apache.maven + maven-artifact-manager + jar + 2.0 + + + org.apache.maven + maven-repository-metadata + jar + 2.0 + + + org.apache.maven.wagon + wagon-provider-api + jar + 1.0-alpha-5 + + + org.codehaus.plexus + plexus-utils + jar + 1.0.4 + + + org.apache.maven + maven-artifact + jar + 2.0 + + + org.codehaus.plexus + plexus-container-default + jar + 1.0-alpha-8 + + + classworlds + classworlds + jar + 1.1-alpha-2 + + + org.kuali.maven.plugins + properties-maven-plugin + jar + 1.3.2 + + + org.springframework + spring-core + jar + 3.1.1.RELEASE + + + org.springframework + spring-asm + jar + 3.1.1.RELEASE + + + org.jasypt + jasypt + jar + 1.9.0 + + + org.kuali.maven.common + maven-kuali-common + jar + 1.2.8 + + + org.apache.ant + ant + jar + 1.8.2 + + + org.apache.ant + ant-launcher + jar + 1.8.2 + + + org.codehaus.plexus + plexus-interpolation + jar + 1.15 + + + commons-lang + commons-lang + jar + 2.6 + + + commons-io + commons-io + jar + 2.5 + + + org.slf4j + jcl-over-slf4j + jar + 1.6.4 + + + org.slf4j + slf4j-api + jar + 1.7.22 + + + org.slf4j + slf4j-log4j12 + jar + 1.7.22 + + + log4j + log4j + jar + 1.2.17 + + + javax.servlet + javax.servlet-api + jar + 3.1.0 + + + \ No newline at end of file diff --git a/dhp-build/dhp-build-properties-maven-plugin/target/classes/eu/dnetlib/maven/plugin/properties/GenerateOoziePropertiesMojo.class b/dhp-build/dhp-build-properties-maven-plugin/target/classes/eu/dnetlib/maven/plugin/properties/GenerateOoziePropertiesMojo.class new file mode 100644 index 0000000000..3eeb323f7b Binary files /dev/null and b/dhp-build/dhp-build-properties-maven-plugin/target/classes/eu/dnetlib/maven/plugin/properties/GenerateOoziePropertiesMojo.class differ diff --git a/dhp-build/dhp-build-properties-maven-plugin/target/classes/eu/dnetlib/maven/plugin/properties/WritePredefinedProjectProperties.class b/dhp-build/dhp-build-properties-maven-plugin/target/classes/eu/dnetlib/maven/plugin/properties/WritePredefinedProjectProperties.class new file mode 100644 index 0000000000..e09929deab Binary files /dev/null and b/dhp-build/dhp-build-properties-maven-plugin/target/classes/eu/dnetlib/maven/plugin/properties/WritePredefinedProjectProperties.class differ diff --git a/dhp-build/dhp-build-properties-maven-plugin/target/maven-status/maven-compiler-plugin/compile/default-compile/createdFiles.lst b/dhp-build/dhp-build-properties-maven-plugin/target/maven-status/maven-compiler-plugin/compile/default-compile/createdFiles.lst new file mode 100644 index 0000000000..5c141f830e --- /dev/null +++ b/dhp-build/dhp-build-properties-maven-plugin/target/maven-status/maven-compiler-plugin/compile/default-compile/createdFiles.lst @@ -0,0 +1,2 @@ +eu/dnetlib/maven/plugin/properties/WritePredefinedProjectProperties.class +eu/dnetlib/maven/plugin/properties/GenerateOoziePropertiesMojo.class diff --git a/dhp-build/dhp-build-properties-maven-plugin/target/maven-status/maven-compiler-plugin/compile/default-compile/inputFiles.lst b/dhp-build/dhp-build-properties-maven-plugin/target/maven-status/maven-compiler-plugin/compile/default-compile/inputFiles.lst new file mode 100644 index 0000000000..cac9348aa5 --- /dev/null +++ b/dhp-build/dhp-build-properties-maven-plugin/target/maven-status/maven-compiler-plugin/compile/default-compile/inputFiles.lst @@ -0,0 +1,2 @@ +/Users/claudio/workspace/dnet-hadoop/dhp-build/dhp-build-properties-maven-plugin/src/main/java/eu/dnetlib/maven/plugin/properties/GenerateOoziePropertiesMojo.java +/Users/claudio/workspace/dnet-hadoop/dhp-build/dhp-build-properties-maven-plugin/src/main/java/eu/dnetlib/maven/plugin/properties/WritePredefinedProjectProperties.java diff --git a/dhp-build/pom.xml b/dhp-build/pom.xml new file mode 100644 index 0000000000..a930af4eab --- /dev/null +++ b/dhp-build/pom.xml @@ -0,0 +1,16 @@ + + + 4.0.0 + + eu.dnetlib.dhp + dhp + 1.0.0-SNAPSHOT + + dhp-build + pom + + dhp-build-assembly-resources + dhp-build-properties-maven-plugin + + + diff --git a/dhp-common/pom.xml b/dhp-common/pom.xml new file mode 100644 index 0000000000..42b8864fa6 --- /dev/null +++ b/dhp-common/pom.xml @@ -0,0 +1,177 @@ + + + 4.0.0 + + + eu.dnetlib.dhp + dhp + 1.0.0-SNAPSHOT + + + dhp-common + jar + + + + + ${project.groupId} + dhp-schemas + ${project.version} + + + + org.apache.oozie + oozie-core + provided + + + + org.apache.hadoop + hadoop-mapreduce-client-core + + + + org.apache.hadoop + hadoop-common + + + + org.apache.spark + spark-core_2.10 + + + + org.apache.spark + spark-sql_2.10 + + + + org.apache.avro + avro + + + + org.apache.avro + avro-mapred + hadoop2 + + + + org.apache.commons + commons-lang3 + + + + + org.springframework + spring-beans + + + + com.beust + jcommander + + + + org.apache.pig + pig + + + + com.linkedin.datafu + datafu + + + + commons-beanutils + commons-beanutils + + + + commons-io + commons-io + + + + org.jdom + jdom + + + + + + + + net.alchim31.maven + scala-maven-plugin + + + + + org.apache.avro + avro-maven-plugin + + + generate-test-sources + + schema + idl-protocol + + + String + + + + + + + org.codehaus.mojo + build-helper-maven-plugin + + + add-test-sources + generate-test-sources + + add-test-source + + + + ${project.build.directory}/generated-test-sources/avro/ + + + + + + + + + org.apache.maven.plugins + maven-jar-plugin + + + + test-jar + + + + + + + org.apache.maven.plugins + maven-surefire-plugin + + eu.dnetlib.iis.common.IntegrationTest + + + + + org.apache.maven.plugins + maven-failsafe-plugin + + + + + + diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/FsShellPermissions.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/FsShellPermissions.java new file mode 100644 index 0000000000..7fbcd8fef8 --- /dev/null +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/FsShellPermissions.java @@ -0,0 +1,106 @@ +package eu.dnetlib.dhp.common; + +import java.lang.reflect.Method; +import java.util.Arrays; +import java.util.LinkedList; + +import org.apache.hadoop.conf.Configurable; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.FsShell; +import org.springframework.beans.BeanUtils; +import org.springframework.util.ClassUtils; +import org.springframework.util.ReflectionUtils; + +/** + * Extracted from: + * https://github.com/spring-projects/spring-hadoop/blob/master/spring-hadoop-core/src/main/java/org/springframework/data/hadoop/fs/FsShellPermissions.java + * + * Utility class for accessing Hadoop FsShellPermissions (which is not public) + * without having to duplicate its code. + * @author Costin Leau + * + */ +public class FsShellPermissions { + + private static boolean IS_HADOOP_20X = ClassUtils.isPresent("org.apache.hadoop.fs.FsShellPermissions$Chmod", + FsShellPermissions.class.getClassLoader()); + + public enum Op { + CHOWN("-chown"), CHMOD("-chmod"), CHGRP("-chgrp"); + + private final String cmd; + + Op(String cmd) { + this.cmd = cmd; + } + + public String getCmd() { + return cmd; + } + } + + // TODO: move this into Spring Core (but add JDK 1.5 compatibility first) + @SafeVarargs + static T[] concatAll(T[] first, T[]... rest) { + // can add some sanity checks + int totalLength = first.length; + for (T[] array : rest) { + totalLength += array.length; + } + T[] result = Arrays.copyOf(first, totalLength); + int offset = first.length; + for (T[] array : rest) { + System.arraycopy(array, 0, result, offset, array.length); + offset += array.length; + } + return result; + } + + public static void changePermissions(FileSystem fs, Configuration config, + Op op, boolean recursive, String group, String uri) { + changePermissions(fs, config, op, recursive, group, new String[] {uri}); + } + + public static void changePermissions(FileSystem fs, Configuration config, + Op op, boolean recursive, String group, String... uris) { + String[] argvs; + if (recursive) { + argvs = new String[1]; + argvs[0] = "-R"; + } else { + argvs = new String[0]; + } + argvs = concatAll(argvs, new String[] { group }, uris); + + // Hadoop 1.0.x + if (!IS_HADOOP_20X) { + Class cls = ClassUtils.resolveClassName("org.apache.hadoop.fs.FsShellPermissions", config.getClass().getClassLoader()); + Object[] args = new Object[] { fs, op.getCmd(), argvs, 0, new FsShell(config) }; + + Method m = ReflectionUtils.findMethod(cls, "changePermissions", FileSystem.class, String.class, String[].class, int.class, FsShell.class); + ReflectionUtils.makeAccessible(m); + ReflectionUtils.invokeMethod(m, null, args); + } + // Hadoop 2.x + else { + Class cmd = ClassUtils.resolveClassName("org.apache.hadoop.fs.shell.Command", config.getClass().getClassLoader()); + Class targetClz = ClassUtils.resolveClassName("org.apache.hadoop.fs.FsShellPermissions$Chmod", config.getClass().getClassLoader()); + Configurable target = (Configurable) BeanUtils.instantiate(targetClz); + target.setConf(config); + // run(String...) swallows the exceptions - re-implement it here + // + LinkedList args = new LinkedList(Arrays.asList(argvs)); + try { + Method m = ReflectionUtils.findMethod(cmd, "processOptions", LinkedList.class); + ReflectionUtils.makeAccessible(m); + ReflectionUtils.invokeMethod(m, target, args); + m = ReflectionUtils.findMethod(cmd, "processRawArguments", LinkedList.class); + ReflectionUtils.makeAccessible(m); + ReflectionUtils.invokeMethod(m, target, args); + } catch (IllegalStateException ex){ + throw new RuntimeException("Cannot change permissions/ownership " + ex); + } + } + } +} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/InfoSpaceConstants.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/InfoSpaceConstants.java new file mode 100644 index 0000000000..1ce7cd4266 --- /dev/null +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/InfoSpaceConstants.java @@ -0,0 +1,75 @@ +package eu.dnetlib.dhp.common; + +import java.io.UnsupportedEncodingException; + +/** + * InfoSpaceConstants constants. + * + * @author mhorst + * + */ +public final class InfoSpaceConstants { + + public static final float CONFIDENCE_TO_TRUST_LEVEL_FACTOR = 0.9f; + + public static final String ENCODING_UTF8 = "utf-8"; + + public static final char ROW_PREFIX_SEPARATOR = '|'; + + public static final String ID_NAMESPACE_SEPARATOR = "::"; + public static final String CLASSIFICATION_HIERARCHY_SEPARATOR = ID_NAMESPACE_SEPARATOR; + public static final String INFERENCE_PROVENANCE_SEPARATOR = ID_NAMESPACE_SEPARATOR; + + public static final String ROW_PREFIX_RESULT = "50|"; + public static final String ROW_PREFIX_PROJECT = "40|"; + public static final String ROW_PREFIX_PERSON = "30|"; + public static final String ROW_PREFIX_ORGANIZATION = "20|"; + public static final String ROW_PREFIX_DATASOURCE = "10|"; + + public static final String QUALIFIER_BODY_STRING = "body"; + public static final byte[] QUALIFIER_BODY; + + public static final String SEMANTIC_CLASS_MAIN_TITLE = "main title"; + public static final String SEMANTIC_CLASS_PUBLICATION = "publication"; + public static final String SEMANTIC_CLASS_UNKNOWN = "UNKNOWN"; + + public static final String SEMANTIC_SCHEME_DNET_PERSON_ROLES = "dnet:personroles"; + public static final String SEMANTIC_SCHEME_DNET_RELATIONS_RESULT_RESULT = "dnet:result_result_relations"; + public static final String SEMANTIC_SCHEME_DNET_RELATIONS_RESULT_PROJECT = "dnet:result_project_relations"; + + public static final String SEMANTIC_SCHEME_DNET_TITLE = "dnet:dataCite_title"; + public static final String SEMANTIC_SCHEME_DNET_TITLE_TYPOLOGIES = "dnet:title_typologies"; + public static final String SEMANTIC_SCHEME_DNET_RESULT_TYPOLOGIES = "dnet:result_typologies"; + public static final String SEMANTIC_SCHEME_DNET_PROVENANCE_ACTIONS = "dnet:provenanceActions"; + public static final String SEMANTIC_SCHEME_DNET_LANGUAGES = "dnet:languages"; + public static final String SEMANTIC_SCHEME_DNET_PID_TYPES = "dnet:pid_types"; + public static final String SEMANTIC_SCHEME_DNET_CLASSIFICATION_TAXONOMIES = "dnet:subject_classification_typologies"; + + // resultResult citation and similarity related + public static final String SEMANTIC_SCHEME_DNET_DATASET_PUBLICATION_RELS = "dnet:dataset_publication_rels"; + + public static final String SEMANTIC_CLASS_TAXONOMIES_ARXIV = "arxiv"; + public static final String SEMANTIC_CLASS_TAXONOMIES_WOS = "wos"; + public static final String SEMANTIC_CLASS_TAXONOMIES_DDC = "ddc"; + public static final String SEMANTIC_CLASS_TAXONOMIES_MESHEUROPMC = "mesheuropmc"; + public static final String SEMANTIC_CLASS_TAXONOMIES_ACM = "acm"; + + public static final String EXTERNAL_ID_TYPE_INSTANCE_URL = "dnet:instance-url"; + public static final String EXTERNAL_ID_TYPE_UNKNOWN = "unknown"; + + // publication types class ids + public static final String SEMANTIC_CLASS_INSTANCE_TYPE_ARTICLE = "0001"; + public static final String SEMANTIC_CLASS_INSTANCE_TYPE_DATASET = "0021"; + + static { + try { + QUALIFIER_BODY = QUALIFIER_BODY_STRING.getBytes(ENCODING_UTF8); + + } catch (UnsupportedEncodingException e) { + throw new RuntimeException(e); + } + } + + private InfoSpaceConstants() { + } +} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/WorkflowRuntimeParameters.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/WorkflowRuntimeParameters.java new file mode 100644 index 0000000000..e71d69027b --- /dev/null +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/WorkflowRuntimeParameters.java @@ -0,0 +1,74 @@ +package eu.dnetlib.dhp.common; + +import java.util.Map; + +import org.apache.commons.lang.StringUtils; +import org.apache.hadoop.conf.Configuration; + +/** + * Utility class holding parameter names and method simplifying access to parameters from hadoop context. + * @author mhorst + * + */ +public final class WorkflowRuntimeParameters { + + public static final String OOZIE_ACTION_OUTPUT_FILENAME = "oozie.action.output.properties"; + + public static final char DEFAULT_CSV_DELIMITER = ','; + + public static final String UNDEFINED_NONEMPTY_VALUE = "$UNDEFINED$"; + + // default values + public static final String DNET_SERVICE_READ_TIMEOUT_DEFAULT_VALUE = "60000"; + public static final String DNET_SERVICE_CONNECTION_TIMEOUT_DEFAULT_VALUE = "60000"; + // parameter names + public static final String DNET_SERVICE_CLIENT_READ_TIMEOUT = "dnet.service.client.read.timeout"; + public static final String DNET_SERVICE_CLIENT_CONNECTION_TIMEOUT = "dnet.service.client.connection.timeout"; + + // ----------------- CONSTRUCTORS ----------------------------- + + private WorkflowRuntimeParameters() {} + + /** + * Retrieves parameter from hadoop context configuration when set to value different than {@link WorkflowRuntimeParameters#UNDEFINED_NONEMPTY_VALUE}. + */ + public static String getParamValue(String paramName, Configuration configuration) { + String paramValue = configuration.get(paramName); + if (StringUtils.isNotBlank(paramValue) && !UNDEFINED_NONEMPTY_VALUE.equals(paramValue)) { + return paramValue; + } else { + return null; + } + } + + /** + * Retrieves {@link Integer} parameter from hadoop context configuration when set to non-empty value different than {@link WorkflowRuntimeParameters#UNDEFINED_NONEMPTY_VALUE}. + * Null is returned when parameter was not set. + * @throws {@link NumberFormatException} if parameter value does not contain a parsable integer + */ + public static Integer getIntegerParamValue(String paramName, Configuration configuration) throws NumberFormatException { + String paramValue = getParamValue(paramName, configuration); + return paramValue!=null?Integer.valueOf(paramValue):null; + } + + /** + * Retrieves parameter from hadoop context configuration when set to value different than {@link WorkflowRuntimeParameters#UNDEFINED_NONEMPTY_VALUE}. + * If requested parameter was not set, fallback parameter is retrieved using the same logic. + */ + public static String getParamValue(String paramName, String fallbackParamName, Configuration configuration) { + String resultCandidate = getParamValue(paramName, configuration); + return resultCandidate!=null?resultCandidate:getParamValue(fallbackParamName, configuration); + } + + /** + * Provides parameter value. Returns default value when entry not found among parameters. + * + * @param paramName parameter name + * @param defaultValue parameter default value to be returned when entry not found among parameters + * @param parameters map of parameters + */ + public static String getParamValue(String paramName, String defaultValue, Map parameters) { + return parameters.containsKey(paramName)?parameters.get(paramName):defaultValue; + } + +} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/counter/NamedCounters.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/counter/NamedCounters.java new file mode 100644 index 0000000000..5b37d31f17 --- /dev/null +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/counter/NamedCounters.java @@ -0,0 +1,111 @@ +package eu.dnetlib.dhp.common.counter; + +import java.io.Serializable; +import java.util.Collection; +import java.util.Map; + +import com.google.common.base.Preconditions; +import com.google.common.collect.Maps; + +/** + * Class that groups several counters which are identified by name (String value). + * + * @author madryk + */ +public class NamedCounters implements Serializable { + + private static final long serialVersionUID = 1L; + + + private final Map counters; + + + //------------------------ CONSTRUCTORS -------------------------- + + /** + * Creates {@link NamedCounters} with empty initial counters. + */ + public NamedCounters() { + this.counters = Maps.newHashMap(); + } + + /** + * Creates {@link NamedCounters} with initial counters.
+ * Starting value of initial counters is zero. + * + * @param initialCounterNames - names of initial counters + */ + public NamedCounters(String[] initialCounterNames) { + Preconditions.checkNotNull(initialCounterNames); + + this.counters = Maps.newHashMap(); + + for (String initialCounterName : initialCounterNames) { + this.counters.put(initialCounterName, 0L); + } + } + + /** + * Creates {@link NamedCounters} with initial counters.
+ * Starting value of initial counters is zero. + * + * @param initialCounterNamesEnumClass - enum class providing names of initial counters + */ + public > NamedCounters(Class initialCounterNamesEnumClass) { + Preconditions.checkNotNull(initialCounterNamesEnumClass); + + this.counters = Maps.newHashMap(); + Enum[] enumConstants = initialCounterNamesEnumClass.getEnumConstants(); + + for (int i=0; i + * Internally uses {@link #increment(String, Long)} + */ + public void increment(String counterName) { + increment(counterName, 1L); + } + + /** + * Increments value of a counter with the name specified as parameter by the given value.
+ * If current instance of {@link NamedCounters} does not contain counter + * with provided name, then before incrementing counter will be created with starting + * value equal to zero. + */ + public void increment(String counterName, Long incrementValue) { + + long oldValue = counters.getOrDefault(counterName, 0L); + counters.put(counterName, oldValue + incrementValue); + } + + /** + * Returns current value of a counter with the name specified as parameter. + * + * @throws IllegalArgumentException when {@link NamedCounters} does not contain counter + * with provided name + */ + public long currentValue(String counterName) { + + if (!counters.containsKey(counterName)) { + throw new IllegalArgumentException("Couldn't find counter with name: " + counterName); + } + + return counters.get(counterName); + } + + /** + * Returns names of currently tracked counters. + */ + public Collection counterNames() { + return counters.keySet(); + } + +} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/counter/NamedCountersAccumulableParam.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/counter/NamedCountersAccumulableParam.java new file mode 100644 index 0000000000..6686432dd7 --- /dev/null +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/counter/NamedCountersAccumulableParam.java @@ -0,0 +1,48 @@ +package eu.dnetlib.dhp.common.counter; + +import org.apache.spark.AccumulableParam; + +import scala.Tuple2; + +/** + * Spark {@link AccumulableParam} for tracking multiple counter values using {@link NamedCounters}. + * + * @author madryk + */ +public class NamedCountersAccumulableParam implements AccumulableParam> { + + private static final long serialVersionUID = 1L; + + + //------------------------ LOGIC -------------------------- + + /** + * Increments {@link NamedCounters} counter with the name same as the first element of passed incrementValue tuple + * by value defined in the second element of incrementValue tuple. + */ + @Override + public NamedCounters addAccumulator(NamedCounters counters, Tuple2 incrementValue) { + counters.increment(incrementValue._1, incrementValue._2); + return counters; + } + + /** + * Merges two passed {@link NamedCounters}. + */ + @Override + public NamedCounters addInPlace(NamedCounters counters1, NamedCounters counters2) { + for (String counterName2 : counters2.counterNames()) { + counters1.increment(counterName2, counters2.currentValue(counterName2)); + } + return counters1; + } + + /** + * Returns passed initialCounters value without any modifications. + */ + @Override + public NamedCounters zero(NamedCounters initialCounters) { + return initialCounters; + } + +} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/counter/NamedCountersFileWriter.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/counter/NamedCountersFileWriter.java new file mode 100644 index 0000000000..bebb82b6e0 --- /dev/null +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/counter/NamedCountersFileWriter.java @@ -0,0 +1,52 @@ +package eu.dnetlib.dhp.common.counter; + +import java.io.File; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.OutputStream; +import java.util.Properties; + +/** + * Writer of {@link NamedCounters} object into a properties file. + * + * @author madryk + */ +public class NamedCountersFileWriter { + + + //------------------------ LOGIC -------------------------- + + /** + * Writes {@link NamedCounters} as a properties file located under + * provided filePath. + * + * @throws IOException if writing to properties file resulted in an error + */ + public void writeCounters(NamedCounters counters, String filePath) throws IOException { + + Properties counterProperties = buildPropertiesFromCounters(counters); + + File file = new File(filePath); + try (OutputStream os = new FileOutputStream(file)) { + + counterProperties.store(os, null); + + } + + } + + + //------------------------ PRIVATE -------------------------- + + private Properties buildPropertiesFromCounters(NamedCounters counters) { + + Properties properties = new Properties(); + + for (String counterName : counters.counterNames()) { + long count = counters.currentValue(counterName); + properties.put(counterName, String.valueOf(count)); + } + + return properties; + } +} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/fault/FaultUtils.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/fault/FaultUtils.java new file mode 100644 index 0000000000..bcc6494b2a --- /dev/null +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/fault/FaultUtils.java @@ -0,0 +1,67 @@ +package eu.dnetlib.dhp.common.fault; + +import java.io.PrintWriter; +import java.io.StringWriter; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; + +import eu.dnetlib.dhp.audit.schemas.Cause; +import eu.dnetlib.dhp.audit.schemas.Fault; + +/** + * {@link Fault} related utilities. + * @author mhorst + * + */ +public final class FaultUtils { + + // ---------------------- CONSTRUCTORS ------------------- + + private FaultUtils() {} + + // ---------------------- LOGIC -------------------------- + + /** + * Generates {@link Fault} instance based on {@link Throwable}. + * @param entityId entity identifier + * @param throwable + * @param auditSupplementaryData + * @return {@link Fault} instance generated for {@link Throwable} + */ + public static Fault exceptionToFault(CharSequence entityId, Throwable throwable, + Map auditSupplementaryData) { + Fault.Builder faultBuilder = Fault.newBuilder(); + faultBuilder.setInputObjectId(entityId); + faultBuilder.setTimestamp(System.currentTimeMillis()); + faultBuilder.setCode(throwable.getClass().getName()); + faultBuilder.setMessage(throwable.getMessage()); + StringWriter strWriter = new StringWriter(); + PrintWriter pw = new PrintWriter(strWriter); + throwable.printStackTrace(pw); + pw.close(); + faultBuilder.setStackTrace(strWriter.toString()); + if (throwable.getCause()!=null) { + faultBuilder.setCauses(appendThrowableToCauses( + throwable.getCause(), new ArrayList())); + } + if (auditSupplementaryData!=null && !auditSupplementaryData.isEmpty()) { + faultBuilder.setSupplementaryData(auditSupplementaryData); + } + return faultBuilder.build(); + } + + protected static List appendThrowableToCauses(Throwable e, List causes) { + Cause.Builder causeBuilder = Cause.newBuilder(); + causeBuilder.setCode(e.getClass().getName()); + causeBuilder.setMessage(e.getMessage()); + causes.add(causeBuilder.build()); + if (e.getCause()!=null) { + return appendThrowableToCauses( + e.getCause(),causes); + } else { + return causes; + } + } + +} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/CmdLineParser.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/CmdLineParser.java new file mode 100644 index 0000000000..b6106044b7 --- /dev/null +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/CmdLineParser.java @@ -0,0 +1,98 @@ +package eu.dnetlib.dhp.common.java; + +import org.apache.commons.cli.CommandLine; +import org.apache.commons.cli.CommandLineParser; +import org.apache.commons.cli.GnuParser; +import org.apache.commons.cli.HelpFormatter; +import org.apache.commons.cli.Option; +import org.apache.commons.cli.OptionBuilder; +import org.apache.commons.cli.Options; +import org.apache.commons.cli.ParseException; +import org.apache.commons.lang3.StringUtils; + +/** + * + * @author Mateusz Kobos + * + */ +@SuppressWarnings("deprecation") +public final class CmdLineParser { + /** HACK: make the names of various types of parameters of the program + * more readable, e.g. "--Input_person=..." instead of "-Iperson=...", + * "--Output_merged=..." instead of "-Omerged=...". I wasn't able to + * get such notation so far using the Apache CLI. */ + public static final String constructorPrefix = "C"; + public static final String inputPrefix = "I"; + public static final String outputPrefix = "O"; + public static final String specialParametersPrefix = "S"; + /** HACK: This field should be removed since this list of special + * parameters is empty, thus not used anywhere.*/ + public static final String[] mandatorySpecialParameters = new String[]{}; + public static final String processParametersPrefix = "P"; + + // ------------------------- CONSTRUCTORS ------------------------------ + + private CmdLineParser() {} + + // ------------------------- LOGIC ------------------------------------- + + public static CommandLine parse(String[] args) { + Options options = new Options(); + @SuppressWarnings("static-access") + Option constructorParams = OptionBuilder.withArgName("STRING") + .hasArg() + .withDescription("Constructor parameter") + .withLongOpt("ConstructorParam") + .create(constructorPrefix); + options.addOption(constructorParams); + @SuppressWarnings("static-access") + Option inputs = OptionBuilder.withArgName("portName=URI") + .hasArgs(2) + .withValueSeparator() + .withDescription("Path binding for a given input port") + .withLongOpt("Input") + .create(inputPrefix); + options.addOption(inputs); + @SuppressWarnings("static-access") + Option outputs = OptionBuilder.withArgName("portName=URI") + .hasArgs(2) + .withValueSeparator() + .withDescription("Path binding for a given output port") + .create(outputPrefix); + options.addOption(outputs); + @SuppressWarnings("static-access") + Option specialParameter = OptionBuilder.withArgName("parameter_name=string") + .hasArgs(2) + .withValueSeparator() + .withDescription(String.format("Value of special parameter. " + + "These are the mandatory parameters={%s}", + StringUtils.join(mandatorySpecialParameters, ","))) + .create(specialParametersPrefix); + options.addOption(specialParameter); + @SuppressWarnings("static-access") + Option otherParameter = OptionBuilder.withArgName("parameter_name=string") + .hasArgs(2) + .withValueSeparator() + .withDescription( + String.format("Value of some other parameter.")) + .create(processParametersPrefix); + options.addOption(otherParameter); + + Option help = new Option("help", "print this message"); + options.addOption(help); + + CommandLineParser parser = new GnuParser(); + try { + CommandLine cmdLine = parser.parse(options, args); + if(cmdLine.hasOption("help")){ + HelpFormatter formatter = new HelpFormatter(); + formatter.printHelp("", options ); + System.exit(1); + } + return cmdLine; + } catch (ParseException e) { + throw new CmdLineParserException("Parsing command line arguments failed", e); + } + + } +} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/CmdLineParserException.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/CmdLineParserException.java new file mode 100644 index 0000000000..bbcad8d84a --- /dev/null +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/CmdLineParserException.java @@ -0,0 +1,21 @@ +package eu.dnetlib.dhp.common.java; + +/** + * Command line parsing exception + * @author Mateusz Kobos + * + */ +public class CmdLineParserException extends RuntimeException { + /** + * + */ + private static final long serialVersionUID = 9219928547611876284L; + + public CmdLineParserException(String message){ + super(message); + } + + public CmdLineParserException(String message, Throwable cause){ + super(message, cause); + } +} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/CmdLineParserForProcessConstruction.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/CmdLineParserForProcessConstruction.java new file mode 100644 index 0000000000..2c65d08926 --- /dev/null +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/CmdLineParserForProcessConstruction.java @@ -0,0 +1,45 @@ +package eu.dnetlib.dhp.common.java; + +import java.lang.reflect.Constructor; + +import org.apache.commons.cli.CommandLine; + +/** + * Handles parsing the command line arguments provided by the Oozie + * to create a {@link Process} + * @author Mateusz Kobos + * + */ +public class CmdLineParserForProcessConstruction { + public Process run(CommandLine cmdLine){ + String[] args = cmdLine.getArgs(); + if(args.length != 1){ + throw new CmdLineParserException("The name of the class has "+ + "to be specified as the first agrument"); + } + String className = args[0]; + + String[] constructorParams = cmdLine.getOptionValues( + CmdLineParser.constructorPrefix); + if(constructorParams == null){ + constructorParams = new String[0]; + } + try { + Class processClass = Class.forName(className); + Constructor processConstructor = null; + if(constructorParams.length == 0){ + try{ + processConstructor = processClass.getConstructor(); + return (Process) processConstructor.newInstance(); + } catch(NoSuchMethodException ex){ + } + } + processConstructor = processClass.getConstructor(String[].class); + return (Process) processConstructor.newInstance( + (Object)constructorParams); + } catch (Exception e) { + throw new CmdLineParserException(String.format( + "Problem while creating class \"%s\"", className), e); + } + } +} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/CmdLineParserForProcessRunParameters.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/CmdLineParserForProcessRunParameters.java new file mode 100644 index 0000000000..31db331032 --- /dev/null +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/CmdLineParserForProcessRunParameters.java @@ -0,0 +1,100 @@ +package eu.dnetlib.dhp.common.java; + +import java.util.HashMap; +import java.util.Map; +import java.util.Map.Entry; +import java.util.Properties; +import java.util.Set; + +import org.apache.commons.cli.CommandLine; +import org.apache.hadoop.fs.Path; + +/** + * Handles parsing parameters passed to the {@link Process} + * @author Mateusz Kobos + * + */ +public class CmdLineParserForProcessRunParameters { + /** Parse the command line arguments. + * + * @param cmdLine command line arguments + * @param ports names of ports that ought to be extracted from command line + */ + public ProcessParameters run(CommandLine cmdLine, Ports ports) { + + Properties inputProperties = cmdLine.getOptionProperties( + CmdLineParser.inputPrefix); + assumePortNamesMatch(CmdLineParser.inputPrefix, inputProperties, + ports.getInput().keySet()); + Map inputBindings = getBindings( + inputProperties, ports.getInput().keySet()); + + Properties outputProperties = cmdLine.getOptionProperties( + CmdLineParser.outputPrefix); + assumePortNamesMatch(CmdLineParser.outputPrefix, outputProperties, + ports.getOutput().keySet()); + Map outputBindings = getBindings( + outputProperties, ports.getOutput().keySet()); + + PortBindings bindings = new PortBindings(inputBindings, outputBindings); + + Properties specialProperties = cmdLine.getOptionProperties( + CmdLineParser.specialParametersPrefix); + assumeContainAllMandatoryParameters( + specialProperties, CmdLineParser.mandatorySpecialParameters); + + Properties rawProperties = cmdLine.getOptionProperties( + CmdLineParser.processParametersPrefix); + Map processParameters = new HashMap(); + for(Entry entry: rawProperties.entrySet()){ + processParameters.put( + (String)entry.getKey(), (String)entry.getValue()); + } + + return new ProcessParameters(bindings, processParameters); + } + + private static void assumeContainAllMandatoryParameters( + Properties properties, String[] mandatoryParameters){ + for(String otherParameter: mandatoryParameters){ + if(!properties.containsKey(otherParameter)){ + throw new CmdLineParserException(String.format( + "Not all mandatory properties are set using the \"%s\" " + + "option are given, e.g. \"-%s\" parameter is missing", + CmdLineParser.specialParametersPrefix, otherParameter)); + } + } + } + + private static void assumePortNamesMatch(String cmdLineParamPrefix, + Properties cmdLineProperties, Set portNames) { + for (String name : portNames) { + if (!cmdLineProperties.containsKey(name)) { + throw new CmdLineParserException(String.format( + "The port with name \"%s\" is not specified in " + + "command line (command line option \"-%s\" is missing)", + name, cmdLineParamPrefix + name)); + } + } + for (Object cmdLineKeyObject : cmdLineProperties.keySet()) { + String name = (String) cmdLineKeyObject; + if (!portNames.contains(name)) { + throw new CmdLineParserException(String.format( + "A port name \"%s\" which is not specified is given " + + "in the command line " + + "(command line option \"%s\" is excess)", + name, cmdLineParamPrefix + name)); + } + } + } + + private static Map getBindings( + Properties cmdLineProperties, Set portNames) { + Map bindings = new HashMap(); + for (String name : portNames) { + Path path = new Path((String) cmdLineProperties.get(name)); + bindings.put(name, path); + } + return bindings; + } +} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/PortBindings.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/PortBindings.java new file mode 100644 index 0000000000..7920026891 --- /dev/null +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/PortBindings.java @@ -0,0 +1,43 @@ +package eu.dnetlib.dhp.common.java; + +import java.util.Map; + +import org.apache.commons.lang.NotImplementedException; +import org.apache.hadoop.fs.Path; + +/** + * Port names (see {@link Ports}) bound to certain paths in the file system + * @author Mateusz Kobos + * + */ +public class PortBindings { + private final Map input; + private final Map output; + + public PortBindings(Map input, Map output) { + this.input = input; + this.output = output; + } + + public Map getInput() { + return input; + } + + public Map getOutput() { + return output; + } + + @Override + public boolean equals(Object o){ + if(!(o instanceof PortBindings)){ + return false; + } + PortBindings other = (PortBindings) o; + return input.equals(other.input) && output.equals(other.output); + } + + @Override + public int hashCode(){ + throw new NotImplementedException(); + } +} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/Ports.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/Ports.java new file mode 100644 index 0000000000..165f250610 --- /dev/null +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/Ports.java @@ -0,0 +1,27 @@ +package eu.dnetlib.dhp.common.java; + +import java.util.Map; + +import eu.dnetlib.dhp.common.java.porttype.PortType; + +/** + * A class that groups information about input and output ports, i.e. + * their (name of the port -> type of the port) mappings. + * @author Mateusz Kobos + */ +public class Ports { + private final Map input; + private final Map output; + + public Ports(Map input, Map output){ + this.input = input; + this.output = output; + } + + public Map getInput() { + return input; + } + public Map getOutput() { + return output; + } +} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/Process.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/Process.java new file mode 100644 index 0000000000..77e7b617a8 --- /dev/null +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/Process.java @@ -0,0 +1,43 @@ +package eu.dnetlib.dhp.common.java; + +import java.util.Map; + +import org.apache.hadoop.conf.Configuration; + +import eu.dnetlib.dhp.common.java.porttype.PortType; + +/** Workflow node written in Java. + * + * The implementing class has to define a constructor with no parameters + * (possibly the default one) or a constructor with String[] as a single + * parameter. + * @author Mateusz Kobos + */ +public interface Process { + /** + * Run the process. + * + * The process ends with a success status if no exception is thrown, + * otherwise it ends with an error status. + * + * @param parameters parameters of the process. Each parameter + * corresponds to a single entry in the map, its name is the key, its + * value is the value. + * @throws Exception if thrown, it means that the process finished + * with an error status + */ + void run(PortBindings portBindings, Configuration conf, + Map parameters) throws Exception; + + /** + * @return map containing as the key: name of the port, as the value: type + * of the port + */ + Map getInputPorts(); + + /** + * @return map containing as the key: name of the port, as the value: type + * of the port + */ + Map getOutputPorts(); +} \ No newline at end of file diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/ProcessException.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/ProcessException.java new file mode 100644 index 0000000000..9d8d827799 --- /dev/null +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/ProcessException.java @@ -0,0 +1,20 @@ +package eu.dnetlib.dhp.common.java; + +/** + * Process exception + * @author Dominika Tkaczyk + * + */ +public class ProcessException extends RuntimeException { + + private static final long serialVersionUID = 2758953138374438377L; + + public ProcessException(String message){ + super(message); + } + + public ProcessException(String message, Throwable cause){ + super(message, cause); + } + +} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/ProcessParameters.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/ProcessParameters.java new file mode 100644 index 0000000000..33902dc200 --- /dev/null +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/ProcessParameters.java @@ -0,0 +1,43 @@ +package eu.dnetlib.dhp.common.java; + +import java.util.Map; + +import org.apache.commons.lang.NotImplementedException; + +/** + * Parameters of the Process retrieved from Oozie + * @author Mateusz Kobos + * + */ +public class ProcessParameters { + private final PortBindings portBindings; + private final Map parameters; + + public PortBindings getPortBindings() { + return portBindings; + } + + public Map getParameters(){ + return parameters; + } + + public ProcessParameters(PortBindings portBindings, + Map parameters) { + this.portBindings = portBindings; + this.parameters = parameters; + } + + @Override + public boolean equals(Object o){ + if(!(o instanceof ProcessParameters)){ + return false; + } + ProcessParameters other = (ProcessParameters) o; + return this.portBindings.equals(other.portBindings); + } + + @Override + public int hashCode(){ + throw new NotImplementedException(); + } +} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/ProcessUtils.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/ProcessUtils.java new file mode 100644 index 0000000000..084a521e64 --- /dev/null +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/ProcessUtils.java @@ -0,0 +1,43 @@ +package eu.dnetlib.dhp.common.java; + +import java.util.Map; + +import org.apache.hadoop.conf.Configuration; + +/** + * {@link Process} related utility class. + * @author mhorst + * + */ +public final class ProcessUtils { + + // ------------- CONSTRUCTORS ---------------- + + private ProcessUtils() {} + + // ------------- LOGIC ----------------------- + + /** + * Returns parameter value retrived from parameters or context. + * @param paramName + * @param hadoopConf + * @param parameters + * @return parameter value + */ + public static String getParameterValue(String paramName, + Configuration hadoopConf, + Map parameters) { + if (parameters!=null && !parameters.isEmpty()) { + String result = null; + result = parameters.get(paramName); + if (result!=null) { + return result; + } + } + if (hadoopConf!=null) { + return hadoopConf.get(paramName); + } else { + return null; + } + } +} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/ProcessWrapper.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/ProcessWrapper.java new file mode 100644 index 0000000000..d60eb0cd9c --- /dev/null +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/ProcessWrapper.java @@ -0,0 +1,88 @@ +package eu.dnetlib.dhp.common.java; + +import java.io.IOException; +import java.util.Map; + +import org.apache.avro.file.DataFileWriter; +import org.apache.avro.generic.GenericContainer; +import org.apache.commons.cli.CommandLine; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.LocatedFileStatus; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.RemoteIterator; + +import eu.dnetlib.dhp.common.java.io.DataStore; +import eu.dnetlib.dhp.common.java.io.FileSystemPath; +import eu.dnetlib.dhp.common.java.porttype.AvroPortType; +import eu.dnetlib.dhp.common.java.porttype.PortType; + +/** + * Creates {@link Process} object through reflection by parsing + * the command-line arguments + * @author Mateusz Kobos + * + */ +public class ProcessWrapper { + + public Configuration getConfiguration() throws Exception{ + return new Configuration(); + } + + public static void main(String[] args) throws Exception { + ProcessWrapper wrapper = new ProcessWrapper(); + wrapper.run(args); + } + + public void run(String[] args) throws Exception{ + CommandLine cmdLine = CmdLineParser.parse(args); + + CmdLineParserForProcessConstruction constructionParser = + new CmdLineParserForProcessConstruction(); + Process process = constructionParser.run(cmdLine); + Ports ports = + new Ports(process.getInputPorts(), process.getOutputPorts()); + CmdLineParserForProcessRunParameters runParametersParser = + new CmdLineParserForProcessRunParameters(); + ProcessParameters params = runParametersParser.run(cmdLine, ports); + Configuration conf = getConfiguration(); + process.run(params.getPortBindings(), conf, params.getParameters()); + createOutputsIfDontExist( + process.getOutputPorts(), params.getPortBindings().getOutput(), + conf); + } + + private static void createOutputsIfDontExist( + Map outputPortsSpecification, + Map outputPortBindings, Configuration conf) throws IOException{ + FileSystem fs = FileSystem.get(conf); + for(Map.Entry entry: outputPortBindings.entrySet()){ + Path path = entry.getValue(); + if(!fs.exists(path) || isEmptyDirectory(fs, path)){ + PortType rawType = outputPortsSpecification.get(entry.getKey()); + if(!(rawType instanceof AvroPortType)){ + throw new RuntimeException("The port \""+entry.getKey()+ + "\" is not of Avro type and only Avro types are "+ + "supported"); + } + AvroPortType type = (AvroPortType) rawType; + FileSystemPath fsPath = new FileSystemPath(fs, path); + DataFileWriter writer = + DataStore.create(fsPath, type.getSchema()); + writer.close(); + } + } + } + + private static boolean isEmptyDirectory(FileSystem fs, Path path) throws IOException{ + if(!fs.isDirectory(path)){ + return false; + } + RemoteIterator files = fs.listFiles(path, false); + /** There's at least one file, so the directory is not empty */ + if(files.hasNext()){ + return false; + } + return true; + } +} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/io/AvroDataStoreReader.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/io/AvroDataStoreReader.java new file mode 100644 index 0000000000..4d36188548 --- /dev/null +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/io/AvroDataStoreReader.java @@ -0,0 +1,156 @@ +package eu.dnetlib.dhp.common.java.io; + +import java.io.IOException; +import java.util.NoSuchElementException; +import java.util.regex.Pattern; + +import org.apache.avro.Schema; +import org.apache.avro.file.DataFileReader; +import org.apache.avro.specific.SpecificDatumReader; +import org.apache.hadoop.fs.AvroFSInput; +import org.apache.hadoop.fs.FSDataInputStream; +import org.apache.hadoop.fs.LocatedFileStatus; +import org.apache.hadoop.fs.RemoteIterator; + + + +/** + * An abstraction over data store format which allows + * iterating over records stored in the data store. + * It handles the standard case of a data store that is a directory containing + * many Avro files (but it can also read records from a single file). + * + * @author mhorst + * @author Mateusz Kobos + */ +class AvroDataStoreReader implements CloseableIterator { + + private DataFileReader currentReader; + private RemoteIterator fileIterator; + private final FileSystemPath path; + private final Schema readerSchema; + + /** + * Ignore file starting with underscore. Such files are also ignored by + * default by map-reduce jobs. + */ + private final Pattern whitelistPattern = Pattern.compile("^(?!_).*"); + + /** + * Here the schema used for reading the data store is set to be the same + * as the one that was used to write it. + */ + public AvroDataStoreReader(final FileSystemPath path) + throws IOException { + this(path, null); + } + + /** + * @param path path to the data store to be read + * @param readerSchema the schema onto which the read data store will + * be projected + */ + public AvroDataStoreReader(final FileSystemPath path, Schema readerSchema) + throws IOException { + this.path = path; + this.readerSchema = readerSchema; + fileIterator = path.getFileSystem().listFiles(path.getPath(), false); + currentReader = getNextNonemptyReader(); + } + + private DataFileReader getNextNonemptyReader() throws IOException { + while (fileIterator != null && fileIterator.hasNext()) { + LocatedFileStatus currentFileStatus = fileIterator.next(); + if (isValidFile(currentFileStatus)) { + FileSystemPath currPath = new FileSystemPath( + path.getFileSystem(), currentFileStatus.getPath()); + DataFileReader reader = + getSingleFileReader(currPath, readerSchema); + /** Check if the file contains at least one record */ + if(reader.hasNext()){ + return reader; + } else { + reader.close(); + } + } + } + /** fallback */ + return null; + } + + /** + * Get a reader for the specified Avro file. A utility function. + * @param path path to the existing file + * @param readerSchema optional reader schema. If you want to use the + * default option of using writer schema as the reader schema, pass the + * {@code null} value. + * @throws IOException + */ + private static DataFileReader getSingleFileReader( + FileSystemPath path, Schema readerSchema) throws IOException{ + try{ + SpecificDatumReader datumReader = new SpecificDatumReader(); + if(readerSchema != null){ + datumReader.setExpected(readerSchema); + } + long len = path.getFileSystem().getFileStatus(path.getPath()).getLen(); + FSDataInputStream inputStream = path.getFileSystem().open(path.getPath()); + return new DataFileReader( + new AvroFSInput(inputStream, len), datumReader); + } catch (IOException ex){ + throw new IOException("Problem with file \""+ + path.getPath().toString()+"\": "+ex.getMessage(), ex); + } + } + + /** + * Checks whether file is valid + * + * @param fileStatus + * @return true when valid, false otherwise + */ + private boolean isValidFile(LocatedFileStatus fileStatus) { + if (fileStatus.isFile()) { + return whitelistPattern.matcher( + fileStatus.getPath().getName()).matches(); + } + /** fallback */ + return false; + } + + @Override + public boolean hasNext() { + return currentReader != null; + } + + @Override + public T next(){ + if(currentReader == null){ + throw new NoSuchElementException(); + } + T obj = currentReader.next(); + if(!currentReader.hasNext()){ + try{ + currentReader.close(); + currentReader = getNextNonemptyReader(); + } catch(IOException ex){ + throw new RuntimeException(ex); + } + } + return obj; + } + + @Override + public void remove() { + throw new UnsupportedOperationException(); + } + + @Override + public void close() throws IOException { + if(currentReader != null){ + currentReader.close(); + currentReader = null; + } + fileIterator = null; + } +} \ No newline at end of file diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/io/CloseableIterator.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/io/CloseableIterator.java new file mode 100644 index 0000000000..1fc77832e4 --- /dev/null +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/io/CloseableIterator.java @@ -0,0 +1,25 @@ +package eu.dnetlib.dhp.common.java.io; + +import java.io.Closeable; +import java.util.Iterator; + +/** + * An iterator for I/O operations that can be {@code close}d explicitly to + * release the resources it holds. + * + * You should call {@code close} only when interrupting the iteration in the + * middle since in such situation there is no way for the iterator to know if + * you're going to continue the iteration and it should still hold the resources + * or not. There's no need to call {@code close} when iterating over all + * elements since in such situation it is called automatically after the + * end of iteration. + * + * @author mhorst + * + * @param + */ +public interface CloseableIterator extends Iterator, Closeable { + + +} + diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/io/CountingIterator.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/io/CountingIterator.java new file mode 100644 index 0000000000..a08f975e15 --- /dev/null +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/io/CountingIterator.java @@ -0,0 +1,19 @@ +package eu.dnetlib.dhp.common.java.io; + +import java.util.Iterator; + +/** + * Counting iterator providing total number of results. + * @author mhorst + * + * @param + */ +public interface CountingIterator extends Iterator { + + /** + * Provides total number of results to be iterating on. + * @return total number of results to be iterating on + */ + int getCount(); + +} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/io/DataStore.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/io/DataStore.java new file mode 100644 index 0000000000..aa66b6f51c --- /dev/null +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/io/DataStore.java @@ -0,0 +1,172 @@ +package eu.dnetlib.dhp.common.java.io; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +import org.apache.avro.Schema; +import org.apache.avro.file.DataFileWriter; +import org.apache.avro.generic.GenericContainer; +import org.apache.avro.io.DatumWriter; +import org.apache.avro.specific.SpecificDatumWriter; + + +/** + * Utility for accessing to Avro-based data stores stored in file system + * @author Mateusz Kobos + * + */ +public final class DataStore { + + private final static String singleDataStoreFileName = "content.avro"; + + private static final int FILE_NO_PADDING_LENGTH = 7; + + private DataStore(){} + + /** + * Create a new data store directory with single file and return writer that allows + * adding new records + * @param path path to a directory to be created + * @param schema schema of the records to be stored in the file + * @return + * @throws IOException + */ + public static DataFileWriter create( + FileSystemPath path, Schema schema) throws IOException{ + return create(path, schema, singleDataStoreFileName); + } + + + /** + * Create a new data store directory and return writer that allows + * adding new records + * @param path path to a directory to be created + * @param schema schema of the records to be stored in the file + * @param dataStoreFileName datastore file name + * @return + * @throws IOException + */ + public static DataFileWriter create( + FileSystemPath path, Schema schema, String dataStoreFileName) throws IOException{ + path.getFileSystem().mkdirs(path.getPath()); + FileSystemPath outFile = new FileSystemPath( + path, dataStoreFileName); + return DataStore.createSingleFile(outFile, schema); + } + + /** + * Get reader for reading records from given data store + * + * Here the schema used for reading the data store is set to be the same + * as the one that was used to write it. + * + * @see getReader(FileSystemPath path, Schema readerSchema) for details. + * + */ + public static CloseableIterator getReader(FileSystemPath path) + throws IOException{ + return getReader(path, null); + } + + /** + * Get reader for reading records from given data store + * @param path path to a directory corresponding to data store + * @param readerSchema the schema onto which the read data store will + * be projected + */ + public static CloseableIterator getReader( + FileSystemPath path, Schema readerSchema) throws IOException{ + return new AvroDataStoreReader(path, readerSchema); + } + + /** + * Read data store entries and insert them into a list. A utility function. + * + * Here the schema used for reading the data store is set to be the same + * as the one that was used to write it. + */ + public static List read(FileSystemPath path) + throws IOException{ + return read(path, null); + } + + /** + * Read data store entries and insert them into a list. A utility function. + * + * @param readerSchema the schema onto which the read data store will + * be projected + */ + public static List read(FileSystemPath path, Schema readerSchema) + throws IOException{ + CloseableIterator iterator = getReader(path, readerSchema); + List elems = new ArrayList(); + while(iterator.hasNext()){ + elems.add(iterator.next()); + } + return elems; + } + + /** + * Create a data store from a list of entries. A utility function. + * The schema is implicitly + * taken from the first element from the {@code elements} list. + * @param elements list of elements to write. At least one element has + * to be present, because it is used to retrieve schema of the + * structures passed in the list. + */ + public static void create( + List elements, FileSystemPath path) throws IOException{ + if(elements.isEmpty()){ + throw new IllegalArgumentException( + "The list of elements has to be non-empty"); + } + Schema schema = elements.get(0).getSchema(); + create(elements, path, schema); + } + + /** + * Create a data store from a list of entries with schema given explicitly. + * A utility function. + */ + public static void create( + List elements, FileSystemPath path, Schema schema) + throws IOException{ + DataFileWriter writer = create(path, schema); + try{ + for(T i: elements){ + writer.append(i); + } + } finally { + if(writer != null){ + writer.close(); + } + } + } + + /** + * Create a single Avro file. This method shouldn't be normally used to + * create data stores since it creates only a single Avro file, + * while a data store consists of a directory containing one or more files. + */ + public static DataFileWriter createSingleFile( + FileSystemPath path, Schema schema) throws IOException{ + DatumWriter datumWriter = new SpecificDatumWriter(); + DataFileWriter writer = new DataFileWriter(datumWriter); + writer.create(schema, path.getFileSystem().create(path.getPath())); + return writer; + } + + /** + * Generates filename for given file number. + * @param fileNo file sequence number + */ + public static String generateFileName(int fileNo) { + StringBuffer strBuff = new StringBuffer(String.valueOf(fileNo)); + while(strBuff.length() { + + private SequenceFile.Reader sequenceReader; + + private final RemoteIterator fileIt; + + private final FileSystem fs; + + /** + * Ignore file starting with underscore. Such files are also ignored by + * default by map-reduce jobs. + */ + private final static Pattern WHITELIST_REGEXP = Pattern.compile("^[^_].*"); + + private Text toBeReturned; + + //------------------------ CONSTRUCTORS -------------------------- + + /** + * Default constructor. + * + * @param path HDFS path along with associated FileSystem + * @throws IOException + */ + public SequenceFileTextValueReader(final FileSystemPath path) throws IOException { + this.fs = path.getFileSystem(); + if (fs.isDirectory(path.getPath())) { + fileIt = fs.listFiles(path.getPath(), false); + sequenceReader = getNextSequenceReader(); + } else { + fileIt = null; + sequenceReader = new Reader(fs.getConf(), SequenceFile.Reader.file(path.getPath())); + } + } + + //------------------------ LOGIC --------------------------------- + + /* + * (non-Javadoc) + * + * @see java.util.Iterator#hasNext() + */ + @Override + public boolean hasNext() { + // check and provide next when already returned + if (toBeReturned == null) { + toBeReturned = getNext(); + } + return toBeReturned != null; + } + + /* + * (non-Javadoc) + * + * @see java.util.Iterator#next() + */ + @Override + public Text next() { + if (toBeReturned != null) { + // element fetched while executing hasNext() + Text result = toBeReturned; + toBeReturned = null; + return result; + } else { + Text resultCandidate = getNext(); + if (resultCandidate!=null) { + return resultCandidate; + } else { + throw new NoSuchElementException(); + } + } + } + + /* + * (non-Javadoc) + * + * @see eu.dnetlib.dhp.exp.iterator.ClosableIterator#close() + */ + @Override + public void close() throws IOException { + if (sequenceReader != null) { + sequenceReader.close(); + } + } + + //------------------------ PRIVATE ------------------------------- + + private final Reader getNextSequenceReader() throws IOException { + while (fileIt != null && fileIt.hasNext()) { + LocatedFileStatus currentFileStatus = fileIt.next(); + if (isValidFile(currentFileStatus)) { + return new Reader(this.fs.getConf(), SequenceFile.Reader.file(currentFileStatus.getPath())); + } + } + // fallback + return null; + } + + /** + * Checks whether file is valid candidate. + * + * @param fileStatus + * file status holding file name + * @return true when valid, false otherwise + */ + private final boolean isValidFile(LocatedFileStatus fileStatus) { + if (fileStatus.isFile()) { + return WHITELIST_REGEXP.matcher(fileStatus.getPath().getName()).matches(); + } else { + return false; + } + } + + /** + * @return next data package + */ + private Text getNext() { + try { + if (sequenceReader == null) { + return null; + } + Writable key = (Writable) ReflectionUtils.newInstance(sequenceReader.getKeyClass(), fs.getConf()); + Writable value = (Writable) ReflectionUtils.newInstance(sequenceReader.getValueClass(), fs.getConf()); + if (sequenceReader.next(key, value)) { + return (Text) value; + } else { + sequenceReader.close(); + sequenceReader = getNextSequenceReader(); + if (sequenceReader != null) { + return getNext(); + } + } + // fallback + return null; + } catch (IOException e) { + throw new RuntimeException(e); + } + } +} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/jsonworkflownodes/ClassPathResourceToHdfsCopier.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/jsonworkflownodes/ClassPathResourceToHdfsCopier.java new file mode 100644 index 0000000000..00a071ac9e --- /dev/null +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/jsonworkflownodes/ClassPathResourceToHdfsCopier.java @@ -0,0 +1,54 @@ +package eu.dnetlib.dhp.common.java.jsonworkflownodes; + +import java.io.InputStream; +import java.io.OutputStream; +import java.util.HashMap; +import java.util.Map; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.io.IOUtils; + +import com.google.common.base.Preconditions; + +import eu.dnetlib.dhp.common.java.PortBindings; +import eu.dnetlib.dhp.common.java.Process; +import eu.dnetlib.dhp.common.java.porttype.PortType; + +/** + * Utility class responsible for copying resources available on classpath to specified HDFS location. + * @author mhorst + * + */ +public class ClassPathResourceToHdfsCopier implements Process { + + private static final String PARAM_INPUT_CLASSPATH_RESOURCE = "inputClasspathResource"; + + private static final String PARAM_OUTPUT_HDFS_FILE_LOCATION = "outputHdfsFileLocation"; + + @Override + public void run(PortBindings portBindings, Configuration conf, Map parameters) throws Exception { + Preconditions.checkNotNull(parameters.get(PARAM_INPUT_CLASSPATH_RESOURCE), PARAM_INPUT_CLASSPATH_RESOURCE + " parameter was not specified!"); + Preconditions.checkNotNull(parameters.get(PARAM_OUTPUT_HDFS_FILE_LOCATION), PARAM_OUTPUT_HDFS_FILE_LOCATION + " parameter was not specified!"); + + FileSystem fs = FileSystem.get(conf); + + try (InputStream in = Thread.currentThread().getContextClassLoader() + .getResourceAsStream(parameters.get(PARAM_INPUT_CLASSPATH_RESOURCE)); + OutputStream os = fs.create(new Path(parameters.get(PARAM_OUTPUT_HDFS_FILE_LOCATION)))) { + IOUtils.copyBytes(in, os, 4096, false); + } + } + + @Override + public Map getInputPorts() { + return new HashMap(); + } + + @Override + public Map getOutputPorts() { + return new HashMap(); + } + +} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/jsonworkflownodes/PortSpecifications.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/jsonworkflownodes/PortSpecifications.java new file mode 100644 index 0000000000..9de33809a0 --- /dev/null +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/jsonworkflownodes/PortSpecifications.java @@ -0,0 +1,66 @@ +package eu.dnetlib.dhp.common.java.jsonworkflownodes; + +import java.util.HashMap; +import java.util.Map; + +import org.apache.avro.Schema; + +import eu.dnetlib.dhp.common.java.jsonworkflownodes.StringPortSpecificationExtractor.PortSpecification; +import eu.dnetlib.dhp.common.java.porttype.AvroPortType; +import eu.dnetlib.dhp.common.java.porttype.PortType; +import eu.dnetlib.dhp.common.utils.AvroUtils; + +/** + * @author Mateusz Kobos + */ +public class PortSpecifications { + private static final String[] propertyRegexps = + new String[]{"[\\w\\.]+", "[\\w\\./_\\-]+"}; + private final Map specs; + + public static class SpecificationValues { + + private final Schema schema; + + private final String jsonFilePath; + + public SpecificationValues(Schema schema, String jsonFilePath) { + this.schema = schema; + this.jsonFilePath = jsonFilePath; + } + + public Schema getSchema() { + return schema; + } + + public String getJsonFilePath() { + return jsonFilePath; + } + + } + + public PortSpecifications(String[] portSpecifications){ + StringPortSpecificationExtractor portSpecExtractor = + new StringPortSpecificationExtractor(propertyRegexps); + specs = new HashMap(); + for(int i = 0; i < portSpecifications.length; i++){ + PortSpecification portSpec = portSpecExtractor.getSpecification(portSpecifications[i]); + Schema schema = AvroUtils.toSchema(portSpec.getProperties()[0]); + String jsonPath = portSpec.getProperties()[1]; + specs.put(portSpec.getName(), new SpecificationValues(schema, jsonPath)); + } + } + + public SpecificationValues get(String portName){ + return specs.get(portName); + } + + public Map getPortTypes(){ + Map ports = new HashMap(); + for(Map.Entry e: specs.entrySet()){ + Schema schema = e.getValue().schema; + ports.put(e.getKey(), new AvroPortType(schema)); + } + return ports; + } +} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/jsonworkflownodes/StringPortSpecificationExtractor.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/jsonworkflownodes/StringPortSpecificationExtractor.java new file mode 100644 index 0000000000..0b10b68051 --- /dev/null +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/jsonworkflownodes/StringPortSpecificationExtractor.java @@ -0,0 +1,89 @@ +package eu.dnetlib.dhp.common.java.jsonworkflownodes; + +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +/** + * Extracts information about port name and its properties from a string + * of a form "{port_name, property_1, property_2, ...}" + * @author Mateusz Kobos + */ +public class StringPortSpecificationExtractor { + private final String[] propertiesRegexp; + private final String portSpecificationRegexp; + private final Pattern pattern; + + public static class PortSpecification { + + private final String name; + + private final String[] properties; + + public PortSpecification(String name, String[] properties) { + this.name = name; + this.properties = properties; + } + + public String getName() { + return name; + } + + public String[] getProperties() { + return properties; + } + } + + /** + * @param propertiesRegexp regular expressions specifying pattern for + * each of the properties associated with a port. An example of a single + * specification: {@code "[\\w\\.]+"}. + */ + public StringPortSpecificationExtractor(String[] propertiesRegexp){ + this.propertiesRegexp = propertiesRegexp; + this.portSpecificationRegexp = createRegexpString("[\\w\\._]+", propertiesRegexp); + this.pattern = Pattern.compile(this.portSpecificationRegexp); + } + + private static String createRegexpString(String portNameRegexp, String[] propertiesRegexp){ + StringBuilder regexp = new StringBuilder(); + regexp.append("s*\\{\\s*"); + regexp.append("("+portNameRegexp+")"); + for(String propertyRegexp: propertiesRegexp){ + regexp.append(",\\s*("+propertyRegexp+")"); + } + regexp.append("\\s*\\}\\s*"); + return regexp.toString(); + } + + private int getPropertiesCount(){ + return propertiesRegexp.length; + } + + public PortSpecification getSpecification(String text){ + Matcher m = pattern.matcher(text); + if(!m.matches()){ + throw new RuntimeException(String.format("Specification of " + + "the port (\"%s\") does not match regexp \"%s\"", + text, portSpecificationRegexp)); + } + final int expectedGroupsCount = getPropertiesCount()+1; + if(m.groupCount() != expectedGroupsCount){ + StringBuilder groups = new StringBuilder(); + for(int i = 0; i < m.groupCount(); i++){ + groups.append("\""+m.group(i)+"\""); + if(i != m.groupCount()-1) { + groups.append(", "); + } + } + throw new RuntimeException(String.format( + "Invalid output port specification \"%s\": got %d groups "+ + "instead of %d (namely: %s)", text, m.groupCount(), + expectedGroupsCount, groups.toString())); + } + String[] properties = new String[getPropertiesCount()]; + for(int i = 0; i < getPropertiesCount(); i++){ + properties[i] = m.group(i+2); + } + return new PortSpecification(m.group(1), properties); + } +} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/porttype/AnyPortType.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/porttype/AnyPortType.java new file mode 100644 index 0000000000..57bda6ed7c --- /dev/null +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/porttype/AnyPortType.java @@ -0,0 +1,20 @@ +package eu.dnetlib.dhp.common.java.porttype; + +/** + * A port type that accepts any type of data + * @author Mateusz Kobos + * + */ +public class AnyPortType implements PortType { + + @Override + public String getName() { + return "Any"; + } + + @Override + public boolean accepts(PortType other) { + return true; + } + +} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/porttype/AvroPortType.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/porttype/AvroPortType.java new file mode 100644 index 0000000000..47a57164e6 --- /dev/null +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/porttype/AvroPortType.java @@ -0,0 +1,65 @@ +package eu.dnetlib.dhp.common.java.porttype; + +import org.apache.avro.Schema; +import org.apache.commons.lang.NotImplementedException; + +/** + * This port type accepts data stores in a format of Avro + * Object Container Files, i.e. Avro data files. + * This kind of file corresponds to a list of objects, each one being of the + * same type, i.e. each one is defined by the same Avro schema. + * @author Mateusz Kobos + */ +public class AvroPortType implements PortType { + + private final Schema schema; + + + public AvroPortType(Schema schema) { + this.schema = schema; + } + + @Override + public String getName() { + return schema.getFullName(); + } + + @Override + /** Simple check if the port types are exactly the same + * (as defined by the {@code equals} method). + * + * TODO: this should work in a more relaxed way - + * {@code this.accepts(other)} should be true if {@code this} + * describes a subset of structures defined in {@code other}. To be + * more precise: the JSON schema tree tree defined by {@code this} should + * form a sub-tree of the JSON schema tree defined by {@code other}. */ + public boolean accepts(PortType other) { + return this.equals(other); + } + + /** + * Two patterns are equal if their schemas are the same. + */ + @Override + public boolean equals(Object o){ + if(!(o instanceof AvroPortType)){ + return false; + } + AvroPortType other = (AvroPortType) o; + return this.schema.equals(other.schema); + } + + @Override + public int hashCode(){ + throw new NotImplementedException(); + } + + /** + * Returns avro schema. + * @return avro schema + */ + public Schema getSchema() { + return schema; + } + +} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/porttype/PortType.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/porttype/PortType.java new file mode 100644 index 0000000000..5ae2d48c2e --- /dev/null +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/porttype/PortType.java @@ -0,0 +1,33 @@ +package eu.dnetlib.dhp.common.java.porttype; + +/** + * Type of the port. This is used to specify what kind of data is + * accepted on a certain input port or produced on a certain output port + * of a workflow node. + * + * @author Mateusz Kobos + * + */ +public interface PortType { + + String getName(); + + /** + * This should be used to check whether data produced by a workflow node + * conforms to the data consumed by other workflow node. + * In a scenario when A produces certain data on a port p and B consumes + * this data on a port q, type(q).accepts(type(p)) has to be true. + * + * @return {@code true} if {@code this} port type is a more general + * version of the {@code other} port type, + * or as an alternative explanation: {@code other} is a subset of + * {@code this}, i.e. {@code other} has at least all the properties present + * in {@code this} (and possibly some others). This is analogous to a + * situation in object-oriented programming, where in order for assignment + * operation {@code this = other} to work, the type of {@code this} has to + * accept type of {@code other}, or in other words {@code other} has to + * inherit from {@code this}, or in yet other words: {@code other} has to + * conform to {@code this}. + */ + boolean accepts(PortType other); +} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/lock/LockManagingProcess.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/lock/LockManagingProcess.java new file mode 100644 index 0000000000..d5d38fe297 --- /dev/null +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/lock/LockManagingProcess.java @@ -0,0 +1,149 @@ +package eu.dnetlib.dhp.common.lock; + +import java.security.InvalidParameterException; +import java.util.Collections; +import java.util.Map; +import java.util.concurrent.Semaphore; + +import org.apache.commons.lang.StringUtils; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.ha.ZKFailoverController; +import org.apache.log4j.Logger; +import org.apache.zookeeper.CreateMode; +import org.apache.zookeeper.KeeperException; +import org.apache.zookeeper.Watcher.Event; +import org.apache.zookeeper.ZooDefs; +import org.apache.zookeeper.ZooKeeper; + +import com.google.common.base.Preconditions; +import com.google.common.base.Stopwatch; + +import eu.dnetlib.dhp.common.java.PortBindings; +import eu.dnetlib.dhp.common.java.porttype.PortType; + +/** + * Zookeeper lock managing process. Blocks until lock is released. + * + * @author mhorst + * + */ +public class LockManagingProcess implements eu.dnetlib.dhp.common.java.Process { + + public static final String DEFAULT_ROOT_NODE = "/cache"; + + public static final String NODE_SEPARATOR = "/"; + + public static final String PARAM_ZK_SESSION_TIMEOUT = "zk_session_timeout"; + + public static final String PARAM_NODE_ID = "node_id"; + + public static final String PARAM_LOCK_MODE = "mode"; + + public static enum LockMode { + obtain, + release + } + + public static final int DEFAULT_SESSION_TIMEOUT = 60000; + + public static final Logger log = Logger.getLogger(LockManagingProcess.class); + + @Override + public Map getInputPorts() { + return Collections.emptyMap(); + } + + @Override + public Map getOutputPorts() { + return Collections.emptyMap(); + } + + @Override + public void run(PortBindings portBindings, Configuration conf, + Map parameters) throws Exception { + + Preconditions.checkArgument(parameters.containsKey(PARAM_NODE_ID), "node id not provided!"); + Preconditions.checkArgument(parameters.containsKey(PARAM_LOCK_MODE), "lock mode not provided!"); + + String zkConnectionString = conf.get(ZKFailoverController.ZK_QUORUM_KEY); + Preconditions.checkArgument(StringUtils.isNotBlank(zkConnectionString), + "zookeeper quorum is unknown, invalid '%s' property value: %s", ZKFailoverController.ZK_QUORUM_KEY, zkConnectionString); + + int sessionTimeout = parameters.containsKey(PARAM_ZK_SESSION_TIMEOUT)? + Integer.valueOf(parameters.get(PARAM_ZK_SESSION_TIMEOUT)) : DEFAULT_SESSION_TIMEOUT; + + final ZooKeeper zooKeeper = new ZooKeeper(zkConnectionString, sessionTimeout, (e) -> { + // we are not interested in generic events + }); + +// initializing root node if does not exist + if (zooKeeper.exists(DEFAULT_ROOT_NODE, false) == null) { + log.info("initializing root node: " + DEFAULT_ROOT_NODE); + zooKeeper.create(DEFAULT_ROOT_NODE, new byte[0], ZooDefs.Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT); + log.info("root node initialized"); + } + + final String nodePath = generatePath(parameters.get(PARAM_NODE_ID), DEFAULT_ROOT_NODE); + + final Semaphore semaphore = new Semaphore(1); + semaphore.acquire(); + + switch(LockMode.valueOf(parameters.get(PARAM_LOCK_MODE))) { + case obtain: { + obtain(zooKeeper, nodePath, semaphore); + break; + } + case release: { + release(zooKeeper, nodePath); + break; + } + default: { + throw new InvalidParameterException("unsupported lock mode: " + parameters.get(PARAM_LOCK_MODE)); + } + } + } + + // ------------------------- PRIVATE -------------------------- + + private void obtain(final ZooKeeper zooKeeper, final String nodePath, final Semaphore semaphore) throws KeeperException, InterruptedException { + log.info("trying to obtain lock: " + nodePath); + if (zooKeeper.exists(nodePath, (event) -> { + if (Event.EventType.NodeDeleted == event.getType()) { + try { + log.info(nodePath + " lock release detected"); + log.info("creating new lock instance: " + nodePath + "..."); + zooKeeper.create(nodePath, new byte[0], ZooDefs.Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT); + log.info("lock" + nodePath + " created"); + semaphore.release(); + } catch (KeeperException e) { + throw new RuntimeException(e); + } catch (InterruptedException e) { + throw new RuntimeException(e); + } + } + }) == null) { + log.info("lock not found, creating new lock instance: " + nodePath); + zooKeeper.create(nodePath, new byte[0], ZooDefs.Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT); + log.info("lock" + nodePath + " created"); + semaphore.release(); + } else { + // waiting until node is removed by other lock manager + log.info("waiting until lock is released"); + Stopwatch timer = new Stopwatch().start(); + semaphore.acquire(); + log.info("lock released, waited for " + timer.elapsedMillis() + " ms"); + semaphore.release(); + } + } + + private void release(final ZooKeeper zooKeeper, final String nodePath) throws InterruptedException, KeeperException { + log.info("removing lock" + nodePath + "..."); + zooKeeper.delete(nodePath, -1); + log.info("lock" + nodePath + " removed"); + } + + private static final String generatePath(String nodeId, String rootNode) { + return rootNode + NODE_SEPARATOR + nodeId.replace('/', '_'); + } + +} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/oozie/OozieClientFactory.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/oozie/OozieClientFactory.java new file mode 100644 index 0000000000..71599277a2 --- /dev/null +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/oozie/OozieClientFactory.java @@ -0,0 +1,24 @@ +package eu.dnetlib.dhp.common.oozie; + +import org.apache.oozie.client.OozieClient; + +/** + * Factory of {@link OozieClient} + * + * @author madryk + */ +public class OozieClientFactory { + + + //------------------------ LOGIC -------------------------- + + /** + * Returns {@link OozieClient} object used for communication with oozie + */ + public OozieClient createOozieClient(String oozieUrl) { + + OozieClient oozieClient = new OozieClient(oozieUrl); + + return oozieClient; + } +} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/oozie/property/ConditionalPropertySetter.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/oozie/property/ConditionalPropertySetter.java new file mode 100644 index 0000000000..6b7fe29758 --- /dev/null +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/oozie/property/ConditionalPropertySetter.java @@ -0,0 +1,76 @@ +package eu.dnetlib.dhp.common.oozie.property; + +import java.io.File; +import java.io.FileOutputStream; +import java.io.OutputStream; +import java.util.Collections; +import java.util.Map; +import java.util.Properties; + +import eu.dnetlib.dhp.common.java.PortBindings; +import eu.dnetlib.dhp.common.java.Process; +import eu.dnetlib.dhp.common.java.porttype.PortType; +import org.apache.hadoop.conf.Configuration; + +import static eu.dnetlib.dhp.common.WorkflowRuntimeParameters.OOZIE_ACTION_OUTPUT_FILENAME; + +/** + * This process is a solution for setting dynamic properties in oozie workflow definition. + * + * Expects three parameters to be provided: the first 'condition' parameter is boolean value + * based on which either first 'inCaseOfTrue' or second 'elseCase' parameter value is set as + * the 'result' property. + * + * This can be understood as the: + * + * condition ? inCaseOfTrue : elseCase + * + * java syntax equivalent. + * + * @author mhorst + * + */ +public class ConditionalPropertySetter implements Process { + + public static final String PARAM_CONDITION = "condition"; + public static final String PARAM_INCASEOFTRUE = "inCaseOfTrue"; + public static final String PARAM_ELSECASE = "elseCase"; + + public static final String OUTPUT_PROPERTY_RESULT = "result"; + + @Override + public Map getInputPorts() { + return Collections.emptyMap(); + } + + @Override + public Map getOutputPorts() { + return Collections.emptyMap(); + } + + @Override + public void run(PortBindings portBindings, Configuration conf, + Map parameters) throws Exception { + + String condition = parameters.get(PARAM_CONDITION); + if (condition == null) { + throw new RuntimeException("unable to make decision: " + + PARAM_CONDITION + " parameter was not set!"); + } + + Properties props = new Properties(); + props.setProperty(OUTPUT_PROPERTY_RESULT, + Boolean.parseBoolean(condition)? + parameters.get(PARAM_INCASEOFTRUE): + parameters.get(PARAM_ELSECASE)); + OutputStream os = new FileOutputStream( + new File(System.getProperty(OOZIE_ACTION_OUTPUT_FILENAME))); + try { + props.store(os, ""); + } finally { + os.close(); + } + + } + +} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/protobuf/AvroToProtoBufConverter.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/protobuf/AvroToProtoBufConverter.java new file mode 100644 index 0000000000..78da47f12d --- /dev/null +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/protobuf/AvroToProtoBufConverter.java @@ -0,0 +1,12 @@ +package eu.dnetlib.dhp.common.protobuf; + +import com.google.protobuf.Message; +import org.apache.avro.generic.IndexedRecord; + +/** + * @author Mateusz Fedoryszak (m.fedoryszak@icm.edu.pl) + */ +public interface AvroToProtoBufConverter { + String convertIntoKey(IN datum); + OUT convertIntoValue(IN datum); +} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/protobuf/AvroToProtoBufOneToOneMapper.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/protobuf/AvroToProtoBufOneToOneMapper.java new file mode 100644 index 0000000000..00a318aff8 --- /dev/null +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/protobuf/AvroToProtoBufOneToOneMapper.java @@ -0,0 +1,62 @@ +package eu.dnetlib.dhp.common.protobuf; + +import com.google.protobuf.Message; +import org.apache.avro.generic.IndexedRecord; +import org.apache.avro.mapred.AvroKey; +import org.apache.hadoop.io.BytesWritable; +import org.apache.hadoop.io.NullWritable; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.mapreduce.Mapper; +import org.apache.log4j.Logger; + +import java.io.IOException; + +/** + * @author Mateusz Fedoryszak (m.fedoryszak@icm.edu.pl) + */ +public class AvroToProtoBufOneToOneMapper + extends Mapper, NullWritable, Text, BytesWritable> { + private static final String CONVERTER_CLASS_PROPERTY = "converter_class"; + private static final Logger log = Logger.getLogger(AvroToProtoBufOneToOneMapper.class); + + private final Text keyWritable = new Text(); + private final BytesWritable valueWritable = new BytesWritable(); + private AvroToProtoBufConverter converter; + + @SuppressWarnings("unchecked") + @Override + public void setup(Context context) throws IOException, InterruptedException { + Class converterClass = context.getConfiguration().getClass(CONVERTER_CLASS_PROPERTY, null); + + if (converterClass == null) { + throw new IOException("Please specify " + CONVERTER_CLASS_PROPERTY); + } + + try { + converter = (AvroToProtoBufConverter) converterClass.newInstance(); + } catch (ClassCastException e) { + throw new IOException( + "Class specified in " + CONVERTER_CLASS_PROPERTY + " doesn't implement AvroToProtoBufConverter", e); + } catch (Exception e) { + throw new IOException( + "Could not instantiate specified AvroToProtoBufConverter class, " + converterClass, e); + } + } + + @Override + public void map(AvroKey avro, NullWritable ignore, Context context) + throws IOException, InterruptedException { + String key = null; + try { + key = converter.convertIntoKey(avro.datum()); + keyWritable.set(key); + + byte[] value = converter.convertIntoValue(avro.datum()).toByteArray(); + valueWritable.set(value, 0, value.length); + + context.write(keyWritable, valueWritable); + } catch (Exception e) { + log.error("Error" + (key != null ? " while processing " + key : ""), e); + } + } +} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/report/ReportEntryFactory.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/report/ReportEntryFactory.java new file mode 100644 index 0000000000..3c301fa8d3 --- /dev/null +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/report/ReportEntryFactory.java @@ -0,0 +1,32 @@ +package eu.dnetlib.dhp.common.report; + +import eu.dnetlib.dhp.common.schemas.ReportEntry; +import eu.dnetlib.dhp.common.schemas.ReportEntryType; + +/** + * Factory of {@link ReportEntry} objects. + * + * @author madryk + */ +public final class ReportEntryFactory { + + // ----------------------- CONSTRUCTORS ----------------------------- + + private ReportEntryFactory() {} + + // ----------------------- LOGIC ------------------------------------ + + /** + * Creates {@link ReportEntry} with {@link ReportEntryType#COUNTER} type + */ + public static ReportEntry createCounterReportEntry(String key, long count) { + return new ReportEntry(key, ReportEntryType.COUNTER, String.valueOf(count)); + } + + /** + * Creates {@link ReportEntry} with {@link ReportEntryType#DURATION} type + */ + public static ReportEntry createDurationReportEntry(String key, long duration) { + return new ReportEntry(key, ReportEntryType.DURATION, String.valueOf(duration)); + } +} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/report/ReportGenerator.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/report/ReportGenerator.java new file mode 100644 index 0000000000..1ed3dfb081 --- /dev/null +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/report/ReportGenerator.java @@ -0,0 +1,110 @@ +package eu.dnetlib.dhp.common.report; + +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; + +import org.apache.commons.lang3.tuple.Pair; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; + +import com.google.common.collect.Lists; + +import eu.dnetlib.dhp.common.java.PortBindings; +import eu.dnetlib.dhp.common.java.Process; +import eu.dnetlib.dhp.common.java.io.DataStore; +import eu.dnetlib.dhp.common.java.io.FileSystemPath; +import eu.dnetlib.dhp.common.java.porttype.AvroPortType; +import eu.dnetlib.dhp.common.java.porttype.PortType; +import eu.dnetlib.dhp.common.schemas.ReportEntry; + +/** + * Java workflow node process for building report.
+ * It writes report properties into avro datastore of {@link ReportEntry}s + * with location specified in output port.
+ * Report property name must start with report. to + * be included in output datastore. + * + * Usage example:
+ *
+ * {@code
+ * 
+ *     
+ *         eu.dnetlib.dhp.common.java.ProcessWrapper
+ *         eu.dnetlib.dhp.common.report.ReportGenerator
+ *         -Preport.someProperty=someValue
+ *         -Oreport=/report/path
+ *     
+ *     ...
+ * 
+ * }
+ * 
+ * Above example will produce avro datastore in /report/path + * with single {@link ReportEntry}. + * Where the {@link ReportEntry#getKey()} will be equal to someProperty and + * the {@link ReportEntry#getValue()} will be equal to someValue + * (notice the stripped report. prefix from the entry key). + * + * + * @author madryk + * + */ +public class ReportGenerator implements Process { + + private static final String REPORT_PORT_OUT_NAME = "report"; + + private static final String REPORT_PROPERTY_PREFIX = "report."; + + + //------------------------ LOGIC -------------------------- + + @Override + public Map getInputPorts() { + return Collections.emptyMap(); + } + + @Override + public Map getOutputPorts() { + return Collections.singletonMap(REPORT_PORT_OUT_NAME, new AvroPortType(ReportEntry.SCHEMA$)); + } + + @Override + public void run(PortBindings portBindings, Configuration conf, Map parameters) throws Exception { + + Map entriesToReport = collectEntriesToReport(parameters); + + List avroReport = convertToAvroReport(entriesToReport); + + + FileSystem fs = FileSystem.get(conf); + + Path reportPath = portBindings.getOutput().get(REPORT_PORT_OUT_NAME); + + DataStore.create(avroReport, new FileSystemPath(fs, reportPath)); + + } + + + //------------------------ PRIVATE -------------------------- + + private Map collectEntriesToReport(Map parameters) { + + return parameters.entrySet().stream() + .filter(property -> property.getKey().startsWith(REPORT_PROPERTY_PREFIX)) + .map(x -> Pair.of(x.getKey().substring(REPORT_PROPERTY_PREFIX.length()), x.getValue())) + .collect(Collectors.toMap(e -> e.getLeft(), e -> e.getRight())); + + } + + private List convertToAvroReport(Map entriesToReport) { + + List avroReport = Lists.newArrayList(); + entriesToReport.forEach((key, value) -> avroReport.add(ReportEntryFactory.createCounterReportEntry(key, Long.valueOf(value)))); + + return avroReport; + } + + +} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/spark/pipe/SparkPipeExecutor.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/spark/pipe/SparkPipeExecutor.java new file mode 100644 index 0000000000..33b7f788c5 --- /dev/null +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/spark/pipe/SparkPipeExecutor.java @@ -0,0 +1,74 @@ +package eu.dnetlib.dhp.common.spark.pipe; + +import java.io.Serializable; + +import org.apache.avro.generic.GenericRecord; +import org.apache.avro.mapred.AvroKey; +import org.apache.hadoop.io.NullWritable; +import org.apache.spark.SparkFiles; +import org.apache.spark.api.java.JavaPairRDD; +import org.apache.spark.api.java.JavaRDD; + +import eu.dnetlib.dhp.common.utils.AvroGsonFactory; +import scala.Tuple2; + + +/** + * Executor of mapreduce scripts using spark pipes. + * It imitates hadoop streaming behavior. + * + * @author madryk + * + */ +public class SparkPipeExecutor implements Serializable { + + private static final long serialVersionUID = 1L; + + + //------------------------ LOGIC -------------------------- + + /** + * Imitates map part of hadoop streaming job. + * It executes provided script for every key in inputRecords rdd. + *

+ * It is assumed that provided script will read records from standard input (one line for one record) + * and write mapped record into standard output (also one line for one record). + * Mapped record can be a key/value pair. In that case script should return key and value + * splitted by tab (\t) character in single line. + */ + public JavaPairRDD doMap(JavaPairRDD, NullWritable> inputRecords, String scriptName, String args) { + + JavaRDD mappedRecords = inputRecords.keys().pipe("python " + SparkFiles.get(scriptName) + " " + args); + + JavaPairRDD outputRecords = mappedRecords + .mapToPair(line -> { + String[] splittedPair = line.split("\t"); + return new Tuple2(splittedPair[0], (splittedPair.length == 1) ? null : splittedPair[1]); + }); + + return outputRecords; + } + + /** + * Imitates reduce part of hadoop streaming job. + *

+ * It is assumed that provided script will read records from standard input (one line for one record) + * and group records with the same key into single record (reduce). + * Method assures that all input records with the same key will be transfered in adjacent lines. + * Reduced records should be written by script into standard output (one line for one record). + * Reduced records must be json strings of class provided as argument. + */ + public JavaPairRDD, NullWritable> doReduce(JavaPairRDD inputRecords, String scriptName, String args, Class outputClass) { + + JavaRDD reducedRecords = inputRecords.sortByKey() + .map(record -> record._1 + ((record._2 == null) ? "" : ("\t" + record._2))) + .pipe("python " + SparkFiles.get(scriptName) + " " + args); + + JavaPairRDD, NullWritable> outputRecords = reducedRecords + .map(recordString -> AvroGsonFactory.create().fromJson(recordString, outputClass)) + .mapToPair(record -> new Tuple2, NullWritable>(new AvroKey<>(record), NullWritable.get())); + + return outputRecords; + } + +} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/string/CharSequenceUtils.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/string/CharSequenceUtils.java new file mode 100644 index 0000000000..5889cf57a3 --- /dev/null +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/string/CharSequenceUtils.java @@ -0,0 +1,31 @@ +package eu.dnetlib.dhp.common.string; + +/** + * Operations on {@link CharSequence} + * + * @author Łukasz Dumiszewski +*/ + +public final class CharSequenceUtils { + + + //------------------------ CONSTRUCTORS -------------------------- + + private CharSequenceUtils() { + throw new IllegalStateException("may not be initialized"); + } + + + //------------------------ LOGIC -------------------------- + + /** + * Converts the given {@link CharSequence} value to {@link String} by using {@link CharSequence#toString()}. + * Returns empty string if value is null. + */ + public static String toStringWithNullToEmpty(CharSequence value) { + + return value == null? "": value.toString(); + + } + +} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/string/DiacriticsRemover.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/string/DiacriticsRemover.java new file mode 100644 index 0000000000..b69ed34199 --- /dev/null +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/string/DiacriticsRemover.java @@ -0,0 +1,113 @@ +/* + * This file is part of CoAnSys project. + * Copyright (c) 2012-2015 ICM-UW + * + * CoAnSys is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + + * CoAnSys is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with CoAnSys. If not, see . + */ + +package eu.dnetlib.dhp.common.string; + +import java.text.Normalizer; +import java.util.HashMap; +import java.util.Map; + +/** + * Mapping to the basic Latin alphabet (a-z, A-Z). In most cases, a character is + * mapped to the closest visual form, rather than functional one, e.g.: "ö" is + * mapped to "o" rather than "oe", and "đ" is mapped to "d" rather than "dj" or + * "gj". Notable exceptions include: "ĸ" mapped to "q", "ß" mapped to "ss", and + * "Þ", "þ" mapped to "Y", "y". + * + *

Each character is processed as follows:

  1. the character is + * compatibility decomposed,
  2. all the combining marks are removed,
  3. + *
  4. the character is compatibility composed,
  5. additional "manual" + * substitutions are applied.

+ * + *

All the characters from the "Latin-1 Supplement" and "Latin Extended-A" + * Unicode blocks are mapped to the "Basic Latin" block. Characters from other + * alphabets are generally left intact, although the decomposable ones may be + * affected by the procedure.

+ * + * @author Lukasz Bolikowski (bolo@icm.edu.pl) + * + * @author Łukasz Dumiszewski /just copied from coansys-commons/ + * + */ +public final class DiacriticsRemover { + + private static final Character[] from = { + 'Æ', 'Ð', 'Ø', 'Þ', 'ß', 'æ', 'ð', 'ø', 'þ', 'Đ', 'đ', 'Ħ', + 'ħ', 'ı', 'ĸ', 'Ł', 'ł', 'Ŋ', 'ŋ', 'Œ', 'œ', 'Ŧ', 'ŧ'}; + private static final String[] to = { + "AE", "D", "O", "Y", "ss", "ae", "d", "o", "y", "D", "d", "H", + "h", "i", "q", "L", "l", "N", "n", "OE", "oe", "T", "t"}; + + private static Map lookup = buildLookup(); + + + //------------------------ CONSTRUCTORS ------------------- + + + private DiacriticsRemover() {} + + + //------------------------ LOGIC -------------------------- + + + /** + * Removes diacritics from a text. + * + * @param text Text to process. + * @return Text without diacritics. + */ + public static String removeDiacritics(String text) { + if (text == null) { + return null; + } + + String tmp = Normalizer.normalize(text, Normalizer.Form.NFKD); + + StringBuilder builder = new StringBuilder(); + for (int i = 0; i < tmp.length(); i++) { + Character ch = tmp.charAt(i); + if (Character.getType(ch) == Character.NON_SPACING_MARK) { + continue; + } + + if (lookup.containsKey(ch)) { + builder.append(lookup.get(ch)); + } else { + builder.append(ch); + } + } + + return builder.toString(); + } + + + //------------------------ PRIVATE -------------------------- + + private static Map buildLookup() { + if (from.length != to.length) { + throw new IllegalStateException(); + } + + Map _lookup = new HashMap(); + for (int i = 0; i < from.length; i++) { + _lookup.put(from[i], to[i]); + } + + return _lookup; + } +} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/string/LenientComparisonStringNormalizer.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/string/LenientComparisonStringNormalizer.java new file mode 100644 index 0000000000..bae64ae38b --- /dev/null +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/string/LenientComparisonStringNormalizer.java @@ -0,0 +1,130 @@ +/* + * This file is part of CoAnSys project. + * Copyright (c) 2012-2015 ICM-UW + * + * CoAnSys is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + + * CoAnSys is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with CoAnSys. If not, see . + */ +package eu.dnetlib.dhp.common.string; + +import java.io.Serializable; +import java.util.List; + +import org.apache.commons.lang3.StringUtils; + +import com.google.common.collect.ImmutableList; + +/** + * An implementation of {@link StringNormalizer} that normalizes strings for non-strict comparisons + * in which one does not care about characters other than letters and digits or about differently written diacritics. + * + * @author Łukasz Dumiszewski + * + */ +public final class LenientComparisonStringNormalizer implements StringNormalizer, Serializable { + + + private static final long serialVersionUID = 1L; + + + private List whitelistCharacters; + + + //------------------------ CONSTRUCTORS -------------------------- + + public LenientComparisonStringNormalizer() { + this(ImmutableList.of()); + } + + /** + * @param whitelistCharacters - non alphanumeric characters that will not be removed + * during normalization + */ + public LenientComparisonStringNormalizer(List whitelistCharacters) { + this.whitelistCharacters = whitelistCharacters; + } + + + //------------------------ LOGIC -------------------------- + + + + /** + * Normalizes the given value.
+ * The normalized strings are better suited for non-strict comparisons, in which one does NOT care about characters that are + * neither letters nor digits; about accidental spaces or different diacritics etc.

+ * This method: + *
    + *
  • Replaces all characters that are not letters or digits with spaces (except those on whitelist characters list)
  • + *
  • Replaces white spaces with spaces
  • + *
  • Trims
  • + *
  • Compacts multi-space gaps to one-space gaps
  • + *
  • Removes diacritics
  • + *
  • Changes characters to lower case
  • + *
+ * Returns "" if the passed value is null or blank + * + * @param value the string to normalize + * @see DiacriticsRemover#removeDiacritics(String, boolean) + * + * + */ + public String normalize(String value) { + + if (StringUtils.isBlank(value)) { + + return ""; + + } + + + String result = value; + + result = DiacriticsRemover.removeDiacritics(result); + + result = removeNonLetterDigitCharacters(result); + + result = result.toLowerCase(); + + result = result.trim().replaceAll(" +", " "); + + return result; + } + + + + + //------------------------ PRIVATE -------------------------- + + + private String removeNonLetterDigitCharacters(final String value) { + + StringBuilder sb = new StringBuilder(); + + for (int i = 0; i < value.length(); ++i) { + + char c = value.charAt(i); + + if (Character.isLetterOrDigit(c) || whitelistCharacters.contains(c)) { + sb.append(c); + } else { + sb.append(' '); + } + } + + return sb.toString(); + } + + + +} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/string/StringNormalizer.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/string/StringNormalizer.java new file mode 100644 index 0000000000..6e28422bc0 --- /dev/null +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/string/StringNormalizer.java @@ -0,0 +1,16 @@ +package eu.dnetlib.dhp.common.string; + +/** + * String normalizer. + * + * @author Łukasz Dumiszewski + * + */ +public interface StringNormalizer { + + /** + * Normalizes the given string value. + */ + String normalize(String value); + +} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/utils/AvroGsonFactory.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/utils/AvroGsonFactory.java new file mode 100644 index 0000000000..7fcc0506a0 --- /dev/null +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/utils/AvroGsonFactory.java @@ -0,0 +1,45 @@ +package eu.dnetlib.dhp.common.utils; + +import java.lang.reflect.Type; + +import com.google.gson.Gson; +import com.google.gson.GsonBuilder; +import com.google.gson.JsonDeserializationContext; +import com.google.gson.JsonDeserializer; +import com.google.gson.JsonElement; +import com.google.gson.JsonParseException; + +/** + * Factory for gson object that supports serializing avro generated classes + * + * @author madryk + * + */ +public final class AvroGsonFactory { + + //------------------------ CONSTRUCTORS ------------------- + + + private AvroGsonFactory() {} + + + //------------------------ LOGIC -------------------------- + + public static Gson create() { + GsonBuilder builder = new GsonBuilder(); + + builder.registerTypeAdapter(CharSequence.class, new CharSequenceDeserializer()); + + return builder.create(); + } + + public static class CharSequenceDeserializer implements JsonDeserializer { + + @Override + public CharSequence deserialize(JsonElement json, Type typeOfT, JsonDeserializationContext context) + throws JsonParseException { + return json.getAsString(); + } + + } +} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/utils/AvroUtils.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/utils/AvroUtils.java new file mode 100644 index 0000000000..44dd218b5e --- /dev/null +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/utils/AvroUtils.java @@ -0,0 +1,77 @@ +package eu.dnetlib.dhp.common.utils; + +import java.lang.reflect.Field; + +import org.apache.avro.Schema; + +/** + * + * @author Mateusz Kobos + * + */ +public final class AvroUtils { + + public final static String primitiveTypePrefix = "org.apache.avro.Schema.Type."; + + + //------------------------ CONSTRUCTORS ------------------- + + + private AvroUtils() {} + + + //------------------------ LOGIC -------------------------- + + + /** + * For a given name of a class generated from Avro schema return + * a JSON schema. + * + * Apart from a name of a class you can also give a name of one of enums + * defined in {@link org.apache.avro.Schema.Type}; in such case an + * appropriate primitive type will be returned. + * + * @param typeName fully qualified name of a class generated from Avro schema, + * e.g. {@code eu.dnetlib.dhp.common.avro.Person}, + * or a fully qualified name of enum defined by + * {@link org.apache.avro.Schema.Type}, + * e.g. {@link org.apache.avro.Schema.Type.STRING}. + * @return JSON string + */ + public static Schema toSchema(String typeName) { + Schema schema = null; + if(typeName.startsWith(primitiveTypePrefix)){ + String shortName = typeName.substring( + primitiveTypePrefix.length(), typeName.length()); + schema = getPrimitiveTypeSchema(shortName); + } else { + schema = getAvroClassSchema(typeName); + } + return schema; + } + + private static Schema getPrimitiveTypeSchema(String shortName){ + Schema.Type type = Schema.Type.valueOf(shortName); + return Schema.create(type); + } + + private static Schema getAvroClassSchema(String className){ + try { + Class avroClass = Class.forName(className); + Field f = avroClass.getDeclaredField("SCHEMA$"); + return (Schema) f.get(null); + } catch (ClassNotFoundException e) { + throw new RuntimeException( + "Class \""+className+"\" does not exist", e); + } catch (SecurityException e) { + throw new RuntimeException(e); + } catch (NoSuchFieldException e) { + throw new RuntimeException(e); + } catch (IllegalArgumentException e) { + throw new RuntimeException(e); + } catch (IllegalAccessException e) { + throw new RuntimeException(e); + } + } + +} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/utils/ByteArrayUtils.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/utils/ByteArrayUtils.java new file mode 100644 index 0000000000..152271ab7c --- /dev/null +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/utils/ByteArrayUtils.java @@ -0,0 +1,45 @@ +package eu.dnetlib.dhp.common.utils; + +/** + * Byte array utility class. + * @author mhorst + * + */ +public final class ByteArrayUtils { + + //------------------------ CONSTRUCTORS ------------------- + + private ByteArrayUtils() {} + + //------------------------ LOGIC -------------------------- + + /** + * Does this byte array begin with match array content? + * @param source Byte array to examine + * @param match Byte array to locate in source + * @return true If the starting bytes are equal + */ + public static boolean startsWith(byte[] source, byte[] match) { + return startsWith(source, 0, match); + } + + /** + * Does this byte array begin with match array content? + * @param source Byte array to examine + * @param offset An offset into the source array + * @param match Byte array to locate in source + * @return true If the starting bytes are equal + */ + public static boolean startsWith(byte[] source, int offset, byte[] match) { + if (match.length > (source.length - offset)) { + return false; + } + for (int i = 0; i < match.length; i++) { + if (source[offset + i] != match[i]) { + return false; + } + } + return true; + } + +} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/utils/EmptyDatastoreVerifierProcess.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/utils/EmptyDatastoreVerifierProcess.java new file mode 100644 index 0000000000..1e6e041498 --- /dev/null +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/utils/EmptyDatastoreVerifierProcess.java @@ -0,0 +1,89 @@ +package eu.dnetlib.dhp.common.utils; + +import java.io.File; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.OutputStream; +import java.security.InvalidParameterException; +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; +import java.util.Properties; + +import eu.dnetlib.dhp.common.java.PortBindings; +import eu.dnetlib.dhp.common.java.Ports; +import eu.dnetlib.dhp.common.java.Process; +import eu.dnetlib.dhp.common.java.io.CloseableIterator; +import eu.dnetlib.dhp.common.java.io.DataStore; +import eu.dnetlib.dhp.common.java.io.FileSystemPath; +import eu.dnetlib.dhp.common.java.porttype.AnyPortType; +import eu.dnetlib.dhp.common.java.porttype.PortType; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; + +import static eu.dnetlib.dhp.common.WorkflowRuntimeParameters.OOZIE_ACTION_OUTPUT_FILENAME; + +/** + * Simple process verifying whether given datastore is empty. + * @author mhorst + * + */ +public class EmptyDatastoreVerifierProcess implements Process { + + public static final String INPUT_PORT_NAME = "input"; + + public static final String DEFAULT_ENCODING = "UTF-8"; + + public static final String OUTPUT_PROPERTY_IS_EMPTY = "isEmpty"; + + /** + * Ports handled by this module. + */ + private final Ports ports; + + + // ------------------------ CONSTRUCTORS -------------------------- + + public EmptyDatastoreVerifierProcess() { +// preparing ports + Map input = new HashMap(); + input.put(INPUT_PORT_NAME, new AnyPortType()); + Map output = Collections.emptyMap(); + ports = new Ports(input, output); + } + + @Override + public Map getInputPorts() { + return ports.getInput(); + } + + @Override + public Map getOutputPorts() { + return ports.getOutput(); + } + + @Override + public void run(PortBindings portBindings, Configuration conf, Map parameters) throws Exception { + if (!portBindings.getInput().containsKey(INPUT_PORT_NAME)) { + throw new InvalidParameterException("missing input port!"); + } + + try (CloseableIterator closeableIt = getIterator(conf, portBindings.getInput().get(INPUT_PORT_NAME))) { + File file = new File(System.getProperty(OOZIE_ACTION_OUTPUT_FILENAME)); + Properties props = new Properties(); + props.setProperty(OUTPUT_PROPERTY_IS_EMPTY, Boolean.toString(!closeableIt.hasNext())); + try (OutputStream os = new FileOutputStream(file)) { + props.store(os, ""); + } + } + } + + /** + * Returns iterator over datastore. + */ + protected CloseableIterator getIterator(Configuration conf, Path path) throws IOException { + return DataStore.getReader(new FileSystemPath(FileSystem.get(conf), path)); + } + +} diff --git a/dhp-schemas/README.md b/dhp-schemas/README.md new file mode 100644 index 0000000000..473ad4cf19 --- /dev/null +++ b/dhp-schemas/README.md @@ -0,0 +1,3 @@ +Description of the project +-------------------------- +This project defines **serialization schemas** of Avro data store files that are used to pass data between workflow nodes in the system. diff --git a/dhp-schemas/pom.xml b/dhp-schemas/pom.xml new file mode 100644 index 0000000000..2c6e18f271 --- /dev/null +++ b/dhp-schemas/pom.xml @@ -0,0 +1,62 @@ + + + 4.0.0 + + + eu.dnetlib.dhp + dhp + 1.0.0-SNAPSHOT + + + dhp-schemas + jar + + + + org.apache.avro + avro + + + + + + + + org.apache.avro + avro-maven-plugin + + + generate-sources + + schema + idl-protocol + + + + + + + org.codehaus.mojo + build-helper-maven-plugin + + + add-source + generate-sources + + add-source + + + + ${project.build.directory}/generated-sources/avro/ + + + + + + + + + diff --git a/dhp-schemas/src/main/avro/eu/dnetlib/dhp/audit/Fault.avdl b/dhp-schemas/src/main/avro/eu/dnetlib/dhp/audit/Fault.avdl new file mode 100644 index 0000000000..3bce821a4b --- /dev/null +++ b/dhp-schemas/src/main/avro/eu/dnetlib/dhp/audit/Fault.avdl @@ -0,0 +1,29 @@ +@namespace("eu.dnetlib.dhp.audit.schemas") +protocol DHP { + + record Cause { +// generic cause code, root exception class name when derived from exception + string code; +// cause message + union { null , string } message = null; + } + + record Fault { +// input object identifier + string inputObjectId; +// fault creation timestamp + long timestamp; +// generic fault code, root exception class name when derived from exception + string code; +// fault message + union { null , string } message = null; +// stack trace + union { null , string } stackTrace = null; +// fault causes, array is indexed with cause depth + union { null , array } causes = null; +// Other supplementary data related to specific type of fault. +// See parameters description in oozie workflow.xml documentation of modules +// that use this structure for information what exactly can be stored as supplementary data. + union { null , map } supplementaryData = null; + } +} diff --git a/dhp-schemas/src/main/avro/eu/dnetlib/dhp/common/ReportEntry.avdl b/dhp-schemas/src/main/avro/eu/dnetlib/dhp/common/ReportEntry.avdl new file mode 100644 index 0000000000..99406b4f06 --- /dev/null +++ b/dhp-schemas/src/main/avro/eu/dnetlib/dhp/common/ReportEntry.avdl @@ -0,0 +1,16 @@ +@namespace("eu.dnetlib.dhp.common.schemas") +protocol DHP{ + + enum ReportEntryType { + COUNTER, DURATION + } + + + record ReportEntry { + + string key; + ReportEntryType type; + string value; + + } +} diff --git a/dhp-schemas/src/main/avro/eu/dnetlib/dhp/importer/NativeRecord.avdl b/dhp-schemas/src/main/avro/eu/dnetlib/dhp/importer/NativeRecord.avdl new file mode 100644 index 0000000000..9ad5435fac --- /dev/null +++ b/dhp-schemas/src/main/avro/eu/dnetlib/dhp/importer/NativeRecord.avdl @@ -0,0 +1,21 @@ +@namespace("eu.dnetlib.dhp.importer.schemas") +protocol DHP { + + enum RecordFormat { + XML, JSON + } + + record ImportedRecord { + + // record identifier + string id; + + RecordFormat format; + + // format name (OAF, OAI_DC, Datacite, etc) for which there is a parser implementation + string formatName; + + // record body + string body; + } +} diff --git a/dhp-wf/dhp-wf-import/pom.xml b/dhp-wf/dhp-wf-import/pom.xml new file mode 100644 index 0000000000..6bf4ba8250 --- /dev/null +++ b/dhp-wf/dhp-wf-import/pom.xml @@ -0,0 +1,105 @@ + + + + eu.dnetlib.dhp + dhp-wf + 1.0.0-SNAPSHOT + + 4.0.0 + + dhp-wf-import + + + + + ${project.groupId} + dhp-common + ${project.version} + + + + ${project.groupId} + dhp-common + ${project.version} + test-jar + test + + + + ${project.groupId} + dhp-schemas + ${project.version} + + + + + org.apache.hadoop + hadoop-mapreduce-client-core + + + org.apache.hadoop + hadoop-common + + + + com.googlecode.json-simple + json-simple + + + commons-cli + commons-cli + + + eu.dnetlib + dnet-objectstore-rmi + + + eu.dnetlib + cnr-rmi-api + + + eu.dnetlib + cnr-resultset-client + + + eu.dnetlib + dnet-openaireplus-mapping-utils + + + + org.springframework + spring-context + + + org.apache.cxf + cxf-rt-frontend-jaxws + + + com.google.code.gson + gson + + + + org.apache.spark + spark-core_2.10 + + + org.apache.spark + spark-sql_2.10 + + + com.databricks + spark-avro_2.10 + + + org.mongodb.spark + mongo-spark-connector_2.10 + + + + + + + diff --git a/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/DataFileRecordReceiver.java b/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/DataFileRecordReceiver.java new file mode 100644 index 0000000000..214d6691dd --- /dev/null +++ b/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/DataFileRecordReceiver.java @@ -0,0 +1,29 @@ +package eu.dnetlib.dhp.wf.importer; + +import java.io.IOException; + +import org.apache.avro.file.DataFileWriter; + +/** + * {@link DataFileWriter} based record receiver. + * @author mhorst + * + */ +public class DataFileRecordReceiver implements RecordReceiver { + + private final DataFileWriter writer; + + /** + * Default constructor. + * @param writer + */ + public DataFileRecordReceiver(DataFileWriter writer) { + this.writer = writer; + } + + @Override + public void receive(T object) throws IOException { + this.writer.append(object); + } + +} diff --git a/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/DataFileRecordReceiverWithCounter.java b/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/DataFileRecordReceiverWithCounter.java new file mode 100644 index 0000000000..955f18065c --- /dev/null +++ b/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/DataFileRecordReceiverWithCounter.java @@ -0,0 +1,50 @@ +package eu.dnetlib.dhp.wf.importer; + +import java.io.IOException; + +import org.apache.avro.file.DataFileWriter; + +/** + * {@link DataFileWriter} based record receiver with counter of + * received records. + * + * @author madryk + */ +public class DataFileRecordReceiverWithCounter extends DataFileRecordReceiver { + + private long receivedCount = 0L; + + + //------------------------ CONSTRUCTORS -------------------------- + + /** + * Default constructor + * + * @param writer - writer of the received records + */ + public DataFileRecordReceiverWithCounter(DataFileWriter writer) { + super(writer); + } + + + //------------------------ GETTERS -------------------------- + + /** + * Returns number of received records + */ + public long getReceivedCount() { + return receivedCount; + } + + + //------------------------ LOGIC -------------------------- + + /** + * Receives passed record and increments the counter. + */ + @Override + public void receive(T record) throws IOException { + super.receive(record); + ++receivedCount; + } +} diff --git a/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/ImportWorkflowRuntimeParameters.java b/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/ImportWorkflowRuntimeParameters.java new file mode 100644 index 0000000000..40f673ee08 --- /dev/null +++ b/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/ImportWorkflowRuntimeParameters.java @@ -0,0 +1,52 @@ +package eu.dnetlib.dhp.wf.importer; + +/** + * Import realated workflow parameters. + * @author mhorst + * + */ +public final class ImportWorkflowRuntimeParameters { + + // parameter names + + public static final String IMPORT_INFERENCE_PROVENANCE_BLACKLIST = "import.inference.provenance.blacklist"; + public static final String IMPORT_SKIP_DELETED_BY_INFERENCE = "import.skip.deleted.by.inference"; + public static final String IMPORT_TRUST_LEVEL_THRESHOLD = "import.trust.level.threshold"; + public static final String IMPORT_APPROVED_DATASOURCES_CSV = "import.approved.datasources.csv"; + public static final String IMPORT_APPROVED_COLUMNFAMILIES_CSV = "import.approved.columnfamilies.csv"; + public static final String IMPORT_MERGE_BODY_WITH_UPDATES = "import.merge.body.with.updates"; + public static final String IMPORT_CONTENT_APPROVED_OBJECSTORES_CSV = "import.content.approved.objectstores.csv"; + public static final String IMPORT_CONTENT_BLACKLISTED_OBJECSTORES_CSV = "import.content.blacklisted.objectstores.csv"; + + public static final String IMPORT_CONTENT_OBJECT_STORE_LOC = "import.content.object.store.location"; + public static final String IMPORT_CONTENT_OBJECT_STORE_IDS_CSV = "import.content.object.store.ids.csv"; + public static final String IMPORT_CONTENT_MAX_FILE_SIZE_MB = "import.content.max.file.size.mb"; + public static final String IMPORT_CONTENT_CONNECTION_TIMEOUT = "import.content.connection.timeout"; + public static final String IMPORT_CONTENT_READ_TIMEOUT = "import.content.read.timeout"; + + public static final String IMPORT_MDSTORE_IDS_CSV = "import.mdstore.ids.csv"; + public static final String IMPORT_MDSTORE_SERVICE_LOCATION = "import.mdstore.service.location"; + public static final String IMPORT_MDSTORE_RECORD_MAXLENGTH = "import.mdstore.record.maxlength"; + + public static final String IMPORT_ISLOOKUP_SERVICE_LOCATION = "import.islookup.service.location"; + public static final String IMPORT_VOCABULARY_CODE = "import.vocabulary.code"; + public static final String IMPORT_VOCABULARY_OUTPUT_FILENAME = "import.vocabulary.output.filename"; + + public static final String IMPORT_RESULT_SET_CLIENT_READ_TIMEOUT = "import.resultset.client.read.timeout"; + public static final String IMPORT_RESULT_SET_CLIENT_CONNECTION_TIMEOUT = "import.resultset.client.connection.timeout"; + public static final String IMPORT_RESULT_SET_PAGESIZE = "import.resultset.pagesize"; + + + public static final String HBASE_ENCODING = "hbase.table.encoding"; + + public static final String IMPORT_FACADE_FACTORY_CLASS = "import.facade.factory.classname"; + + // default values + + public static final String RESULTSET_READ_TIMEOUT_DEFAULT_VALUE = "60000"; + public static final String RESULTSET_CONNECTION_TIMEOUT_DEFAULT_VALUE = "60000"; + public static final String RESULTSET_PAGESIZE_DEFAULT_VALUE = "100"; + + private ImportWorkflowRuntimeParameters() {} + +} diff --git a/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/RecordReceiver.java b/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/RecordReceiver.java new file mode 100644 index 0000000000..c0a5e89507 --- /dev/null +++ b/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/RecordReceiver.java @@ -0,0 +1,14 @@ +package eu.dnetlib.dhp.wf.importer; + +import java.io.IOException; + +/** + * Record receiver interface. + * @author mhorst + * + * @param + */ +public interface RecordReceiver { + + void receive(T object) throws IOException; +} diff --git a/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/AbstractResultSetAwareWebServiceFacade.java b/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/AbstractResultSetAwareWebServiceFacade.java new file mode 100644 index 0000000000..0a3cd6fb4c --- /dev/null +++ b/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/AbstractResultSetAwareWebServiceFacade.java @@ -0,0 +1,104 @@ +package eu.dnetlib.dhp.wf.importer.facade; + +import java.util.Map; + +import javax.xml.ws.BindingProvider; +import javax.xml.ws.wsaddressing.W3CEndpointReferenceBuilder; + +import org.apache.log4j.Logger; + +import eu.dnetlib.enabling.tools.JaxwsServiceResolverImpl; + +/** + * Abstract class utilized by all WebService facades. + * @author mhorst + * + */ +public abstract class AbstractResultSetAwareWebServiceFacade { + + private final Logger log = Logger.getLogger(this.getClass()); + + /** + * Web service. + */ + private final T service; + + /** + * ResultSet read timeout. + */ + private final long resultSetReadTimeout; + + /** + * ResultSet connection timeout. + */ + private final long resultSetConnectionTimeout; + + /** + * ResultSet page size. + */ + private final int resultSetPageSize; + + + //------------------------ CONSTRUCTORS ------------------- + + /** + * Instantiates underlying service. + * @param clazz webservice class + * @param serviceLocation webservice location + * @param serviceReadTimeout service read timeout + * @param serviceConnectionTimeout service connection timeout + * @param resultSetReadTimeout resultset read timeout + * @param resultSetConnectionTimeout resultset connection timeout + * @param resultSetPageSize resultset page size + */ + protected AbstractResultSetAwareWebServiceFacade(Class clazz, String serviceLocation, + long serviceReadTimeout, long serviceConnectionTimeout, + long resultSetReadTimeout, long resultSetConnectionTimeout, int resultSetPageSize) { + W3CEndpointReferenceBuilder eprBuilder = new W3CEndpointReferenceBuilder(); + eprBuilder.address(serviceLocation); + eprBuilder.build(); + this.service = new JaxwsServiceResolverImpl().getService(clazz, eprBuilder.build()); + if (this.service instanceof BindingProvider) { + log.info(String.format("setting timeouts for %s: read timeout (%s) and connect timeout (%s)", + BindingProvider.class, serviceReadTimeout, serviceConnectionTimeout)); + final Map requestContext = ((BindingProvider) service).getRequestContext(); + + // can't be sure about which will be used. Set them all. + requestContext.put("com.sun.xml.internal.ws.request.timeout", serviceReadTimeout); + requestContext.put("com.sun.xml.internal.ws.connect.timeout", serviceConnectionTimeout); + + requestContext.put("com.sun.xml.ws.request.timeout", serviceReadTimeout); + requestContext.put("com.sun.xml.ws.connect.timeout", serviceConnectionTimeout); + + requestContext.put("javax.xml.ws.client.receiveTimeout", serviceReadTimeout); + requestContext.put("javax.xml.ws.client.connectionTimeout", serviceConnectionTimeout); + } + + this.resultSetReadTimeout = resultSetReadTimeout; + this.resultSetConnectionTimeout = resultSetConnectionTimeout; + this.resultSetPageSize = resultSetPageSize; + } + + + //------------------------ GETTERS ------------------------- + + public T getService() { + return service; + } + + + public long getResultSetReadTimeout() { + return resultSetReadTimeout; + } + + + public long getResultSetConnectionTimeout() { + return resultSetConnectionTimeout; + } + + + public int getResultSetPageSize() { + return resultSetPageSize; + } + +} diff --git a/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/ISLookupFacade.java b/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/ISLookupFacade.java new file mode 100644 index 0000000000..c156ae1cc8 --- /dev/null +++ b/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/ISLookupFacade.java @@ -0,0 +1,17 @@ +package eu.dnetlib.dhp.wf.importer.facade; + +/** + * ISLookup service facade. + * + * @author mhorst + * + */ +public interface ISLookupFacade { + + /** + * Provides all profiles matching given query + * @param xPathQuery XPath query + */ + Iterable searchProfile(String xPathQuery) throws ServiceFacadeException; + +} diff --git a/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/MDStoreFacade.java b/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/MDStoreFacade.java new file mode 100644 index 0000000000..f50b02b980 --- /dev/null +++ b/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/MDStoreFacade.java @@ -0,0 +1,17 @@ +package eu.dnetlib.dhp.wf.importer.facade; + +/** + * MDStore service facade. + * + * @author mhorst + * + */ +public interface MDStoreFacade { + + /** + * Delivers all records for given MDStore identifier + * @param mdStoreId MDStore identifier + */ + Iterable deliverMDRecords(String mdStoreId) throws ServiceFacadeException; + +} diff --git a/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/ObjectStoreFacade.java b/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/ObjectStoreFacade.java new file mode 100644 index 0000000000..0e1aa19ef9 --- /dev/null +++ b/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/ObjectStoreFacade.java @@ -0,0 +1,19 @@ +package eu.dnetlib.dhp.wf.importer.facade; + +/** + * ObjectStore service facade. + * + * @author mhorst + * + */ +public interface ObjectStoreFacade { + + /** + * Returns metadata records from given objectstore created in specified time range. + * @param objectStoreId object store identifier + * @param from from time in millis + * @param until until time in millis + */ + Iterable deliverObjects(String objectStoreId, long from, long until) throws ServiceFacadeException; + +} diff --git a/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/ServiceFacadeException.java b/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/ServiceFacadeException.java new file mode 100644 index 0000000000..9776306fa2 --- /dev/null +++ b/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/ServiceFacadeException.java @@ -0,0 +1,27 @@ +package eu.dnetlib.dhp.wf.importer.facade; + +/** + * Service facade generic exception. + * + * @author mhorst + * + */ +public class ServiceFacadeException extends Exception { + + private static final long serialVersionUID = 0L; + + //------------------------ CONSTRUCTORS ------------------- + + public ServiceFacadeException(String message, Throwable cause) { + super(message, cause); + } + + public ServiceFacadeException(String message) { + super(message); + } + + public ServiceFacadeException(Throwable cause) { + super(cause); + } + +} diff --git a/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/ServiceFacadeFactory.java b/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/ServiceFacadeFactory.java new file mode 100644 index 0000000000..94b9307c47 --- /dev/null +++ b/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/ServiceFacadeFactory.java @@ -0,0 +1,20 @@ +package eu.dnetlib.dhp.wf.importer.facade; + +import java.util.Map; + +/** + * Generic service facade factory. All implementations must be instantiable with no-argument construtor. + * + * @author mhorst + * + */ +public interface ServiceFacadeFactory { + + /** + * Creates service of given type configured with parameters. + * + * @param parameters service configuration + * + */ + T instantiate(Map parameters); +} diff --git a/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/ServiceFacadeUtils.java b/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/ServiceFacadeUtils.java new file mode 100644 index 0000000000..53a76d761d --- /dev/null +++ b/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/ServiceFacadeUtils.java @@ -0,0 +1,80 @@ +package eu.dnetlib.dhp.wf.importer.facade; + +import static eu.dnetlib.dhp.wf.importer.ImportWorkflowRuntimeParameters.IMPORT_FACADE_FACTORY_CLASS; + +import java.lang.reflect.Constructor; +import java.util.Map; + +import org.apache.commons.lang.StringUtils; +import org.apache.hadoop.conf.Configuration; + +import com.google.common.collect.ImmutableMap; + +import eu.dnetlib.dhp.wf.importer.ImportWorkflowRuntimeParameters; + +/** + * Service facade utility methods. + * @author mhorst + * + */ +public final class ServiceFacadeUtils { + + //------------------------ CONSTRUCTORS ------------------- + + private ServiceFacadeUtils() {} + + //------------------------ LOGIC -------------------------- + + /** + * Instantiates service based on provided parameters. + * + * Service factory class name is mandatory and has to be provided as {@value ImportWorkflowRuntimeParameters#IMPORT_FACADE_FACTORY_CLASS} parameter. + * Other parameters will be used by factory itself. Factory must be instantiable with no-argument construtor. + * + * @param parameters set of parameters required for service instantiation + * + */ + public static T instantiate(Map parameters) throws ServiceFacadeException { + String serviceFactoryClassName = parameters.get(IMPORT_FACADE_FACTORY_CLASS); + if (StringUtils.isBlank(serviceFactoryClassName)) { + throw new ServiceFacadeException("unknown service facade factory, no " + IMPORT_FACADE_FACTORY_CLASS + " parameter provided!"); + } + try { + Class clazz = Class.forName(serviceFactoryClassName); + Constructor constructor = clazz.getConstructor(); + @SuppressWarnings("unchecked") + ServiceFacadeFactory serviceFactory = (ServiceFacadeFactory) constructor.newInstance(); + return serviceFactory.instantiate(parameters); + } catch (Exception e) { + throw new ServiceFacadeException("exception occurred while instantiating service by facade factory: " + IMPORT_FACADE_FACTORY_CLASS, e); + } + + } + + /** + * Instantiates service based on provided configuration. + * + * Service factory class name is mandatory and has to be provided as {@value ImportWorkflowRuntimeParameters#IMPORT_FACADE_FACTORY_CLASS} configuration entry. + * Other parameters will be used by factory itself. Factory must be instantiable with no-argument construtor. + * + * @param config set of configuration entries required for service instantiation + */ + public static T instantiate(Configuration config) throws ServiceFacadeException { + return instantiate(buildParameters(config)); + } + + + // ------------------------ PRIVATE -------------------------- + + /** + * Converts configuration entries into plain map. + */ + private static Map buildParameters(Configuration config) { + ImmutableMap.Builder builder = ImmutableMap.builder(); + for (Map.Entry entry : config) { + builder.put(entry); + } + return builder.build(); + } + +} diff --git a/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/WebServiceISLookupFacade.java b/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/WebServiceISLookupFacade.java new file mode 100644 index 0000000000..7c787f2f8c --- /dev/null +++ b/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/WebServiceISLookupFacade.java @@ -0,0 +1,55 @@ +package eu.dnetlib.dhp.wf.importer.facade; + +import java.util.Collections; + +import org.apache.log4j.Logger; + +import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpDocumentNotFoundException; +import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; +import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; + +/** + * WebService based database facade. + * + * @author mhorst + * + */ +public class WebServiceISLookupFacade extends AbstractResultSetAwareWebServiceFacade implements ISLookupFacade { + + private static final Logger log = Logger.getLogger(WebServiceISLookupFacade.class); + + + //------------------------ CONSTRUCTORS ------------------- + + /** + * @param serviceLocation database service location + * @param serviceReadTimeout service read timeout + * @param serviceConnectionTimeout service connection timeout + * @param resultSetReadTimeout result set providing database results read timeout + * @param resultSetConnectionTimeout result set connection timeout + * @param resultSetPageSize result set data chunk size + */ + public WebServiceISLookupFacade(String serviceLocation, + long serviceReadTimeout, long serviceConnectionTimeout, + long resultSetReadTimeout, long resultSetConnectionTimeout, int resultSetPageSize) { + super(ISLookUpService.class, serviceLocation, + serviceReadTimeout, serviceConnectionTimeout, + resultSetReadTimeout, resultSetConnectionTimeout, resultSetPageSize); + } + + //------------------------ LOGIC -------------------------- + + @Override + public Iterable searchProfile(String xPathQuery) throws ServiceFacadeException { + try { + return getService().quickSearchProfile(xPathQuery); + } catch (ISLookUpDocumentNotFoundException e) { + log.error("unable to find profile for query: " + xPathQuery, e); + return Collections.emptyList(); + } catch (ISLookUpException e) { + throw new ServiceFacadeException("searching profiles in ISLookup failed with query '" + xPathQuery + "'", e); + } + + } + +} diff --git a/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/WebServiceISLookupFacadeFactory.java b/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/WebServiceISLookupFacadeFactory.java new file mode 100644 index 0000000000..6557ead948 --- /dev/null +++ b/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/WebServiceISLookupFacadeFactory.java @@ -0,0 +1,45 @@ +package eu.dnetlib.dhp.wf.importer.facade; + +import static eu.dnetlib.dhp.common.WorkflowRuntimeParameters.DNET_SERVICE_CLIENT_CONNECTION_TIMEOUT; +import static eu.dnetlib.dhp.common.WorkflowRuntimeParameters.DNET_SERVICE_CLIENT_READ_TIMEOUT; +import static eu.dnetlib.dhp.common.WorkflowRuntimeParameters.DNET_SERVICE_CONNECTION_TIMEOUT_DEFAULT_VALUE; +import static eu.dnetlib.dhp.common.WorkflowRuntimeParameters.DNET_SERVICE_READ_TIMEOUT_DEFAULT_VALUE; +import static eu.dnetlib.dhp.wf.importer.ImportWorkflowRuntimeParameters.IMPORT_ISLOOKUP_SERVICE_LOCATION; +import static eu.dnetlib.dhp.wf.importer.ImportWorkflowRuntimeParameters.IMPORT_RESULT_SET_CLIENT_CONNECTION_TIMEOUT; +import static eu.dnetlib.dhp.wf.importer.ImportWorkflowRuntimeParameters.IMPORT_RESULT_SET_CLIENT_READ_TIMEOUT; +import static eu.dnetlib.dhp.wf.importer.ImportWorkflowRuntimeParameters.IMPORT_RESULT_SET_PAGESIZE; +import static eu.dnetlib.dhp.wf.importer.ImportWorkflowRuntimeParameters.RESULTSET_CONNECTION_TIMEOUT_DEFAULT_VALUE; +import static eu.dnetlib.dhp.wf.importer.ImportWorkflowRuntimeParameters.RESULTSET_PAGESIZE_DEFAULT_VALUE; +import static eu.dnetlib.dhp.wf.importer.ImportWorkflowRuntimeParameters.RESULTSET_READ_TIMEOUT_DEFAULT_VALUE; + +import java.util.Map; + +import com.google.common.base.Preconditions; + +import eu.dnetlib.dhp.common.WorkflowRuntimeParameters; + +/** + * WebService Database service facade factory. + * + * @author mhorst + * + */ +public class WebServiceISLookupFacadeFactory implements ServiceFacadeFactory { + + + //------------------------ LOGIC -------------------------- + + @Override + public ISLookupFacade instantiate(Map parameters) { + Preconditions.checkArgument(parameters.containsKey(IMPORT_ISLOOKUP_SERVICE_LOCATION), + "unknown ISLookup service location: no parameter provided: '%s'", IMPORT_ISLOOKUP_SERVICE_LOCATION); + + return new WebServiceISLookupFacade(parameters.get(IMPORT_ISLOOKUP_SERVICE_LOCATION), + Long.parseLong(WorkflowRuntimeParameters.getParamValue(DNET_SERVICE_CLIENT_READ_TIMEOUT, DNET_SERVICE_READ_TIMEOUT_DEFAULT_VALUE, parameters)), + Long.parseLong(WorkflowRuntimeParameters.getParamValue(DNET_SERVICE_CLIENT_CONNECTION_TIMEOUT, DNET_SERVICE_CONNECTION_TIMEOUT_DEFAULT_VALUE, parameters)), + Long.parseLong(WorkflowRuntimeParameters.getParamValue(IMPORT_RESULT_SET_CLIENT_READ_TIMEOUT, RESULTSET_READ_TIMEOUT_DEFAULT_VALUE, parameters)), + Long.parseLong(WorkflowRuntimeParameters.getParamValue(IMPORT_RESULT_SET_CLIENT_CONNECTION_TIMEOUT, RESULTSET_CONNECTION_TIMEOUT_DEFAULT_VALUE, parameters)), + Integer.parseInt(WorkflowRuntimeParameters.getParamValue(IMPORT_RESULT_SET_PAGESIZE, RESULTSET_PAGESIZE_DEFAULT_VALUE, parameters))); + } + +} diff --git a/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/WebServiceMDStoreFacade.java b/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/WebServiceMDStoreFacade.java new file mode 100644 index 0000000000..d37d020ed9 --- /dev/null +++ b/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/WebServiceMDStoreFacade.java @@ -0,0 +1,52 @@ +package eu.dnetlib.dhp.wf.importer.facade; + +import javax.xml.ws.wsaddressing.W3CEndpointReference; + +import eu.dnetlib.data.mdstore.MDStoreService; +import eu.dnetlib.data.mdstore.MDStoreServiceException; +import eu.dnetlib.enabling.resultset.client.ResultSetClientFactory; +import eu.dnetlib.enabling.tools.JaxwsServiceResolverImpl; + +/** + * WebService based MDStore facade. + * + * @author mhorst + * + */ +public class WebServiceMDStoreFacade extends AbstractResultSetAwareWebServiceFacade implements MDStoreFacade { + + + //------------------------ CONSTRUCTORS ------------------- + + /** + * @param serviceLocation MDStore webservice location + * @param serviceReadTimeout service read timeout + * @param serviceConnectionTimeout service connection timeout + * @param resultSetReadTimeout resultset read timeout + * @param resultSetConnectionTimeout result set connection timeout + * @param resultSetPageSize resultset page size + */ + public WebServiceMDStoreFacade(String serviceLocation, + long serviceReadTimeout, long serviceConnectionTimeout, + long resultSetReadTimeout, long resultSetConnectionTimeout, int resultSetPageSize) { + super(MDStoreService.class, serviceLocation, + serviceReadTimeout, serviceConnectionTimeout, + resultSetReadTimeout, resultSetConnectionTimeout, resultSetPageSize); + } + + //------------------------ LOGIC -------------------------- + + @Override + public Iterable deliverMDRecords(String mdStoreId) throws ServiceFacadeException { + try { + W3CEndpointReference eprResult = getService().deliverMDRecords(mdStoreId, null, null, null); + ResultSetClientFactory rsFactory = new ResultSetClientFactory( + getResultSetPageSize(), getResultSetReadTimeout(), getResultSetConnectionTimeout()); + rsFactory.setServiceResolver(new JaxwsServiceResolverImpl()); + return rsFactory.getClient(eprResult); + } catch (MDStoreServiceException e) { + throw new ServiceFacadeException("delivering records for md store " + mdStoreId + " failed!", e); + } + } + +} diff --git a/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/WebServiceMDStoreFacadeFactory.java b/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/WebServiceMDStoreFacadeFactory.java new file mode 100644 index 0000000000..00bb0c3f74 --- /dev/null +++ b/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/WebServiceMDStoreFacadeFactory.java @@ -0,0 +1,45 @@ +package eu.dnetlib.dhp.wf.importer.facade; + +import static eu.dnetlib.dhp.common.WorkflowRuntimeParameters.DNET_SERVICE_CLIENT_CONNECTION_TIMEOUT; +import static eu.dnetlib.dhp.common.WorkflowRuntimeParameters.DNET_SERVICE_CLIENT_READ_TIMEOUT; +import static eu.dnetlib.dhp.common.WorkflowRuntimeParameters.DNET_SERVICE_CONNECTION_TIMEOUT_DEFAULT_VALUE; +import static eu.dnetlib.dhp.common.WorkflowRuntimeParameters.DNET_SERVICE_READ_TIMEOUT_DEFAULT_VALUE; +import static eu.dnetlib.dhp.wf.importer.ImportWorkflowRuntimeParameters.IMPORT_MDSTORE_SERVICE_LOCATION; +import static eu.dnetlib.dhp.wf.importer.ImportWorkflowRuntimeParameters.IMPORT_RESULT_SET_CLIENT_CONNECTION_TIMEOUT; +import static eu.dnetlib.dhp.wf.importer.ImportWorkflowRuntimeParameters.IMPORT_RESULT_SET_CLIENT_READ_TIMEOUT; +import static eu.dnetlib.dhp.wf.importer.ImportWorkflowRuntimeParameters.IMPORT_RESULT_SET_PAGESIZE; +import static eu.dnetlib.dhp.wf.importer.ImportWorkflowRuntimeParameters.RESULTSET_CONNECTION_TIMEOUT_DEFAULT_VALUE; +import static eu.dnetlib.dhp.wf.importer.ImportWorkflowRuntimeParameters.RESULTSET_PAGESIZE_DEFAULT_VALUE; +import static eu.dnetlib.dhp.wf.importer.ImportWorkflowRuntimeParameters.RESULTSET_READ_TIMEOUT_DEFAULT_VALUE; + +import java.util.Map; + +import com.google.common.base.Preconditions; + +import eu.dnetlib.dhp.common.WorkflowRuntimeParameters; + +/** + * WebService MDStore service facade factory. + * + * @author mhorst + * + */ +public class WebServiceMDStoreFacadeFactory implements ServiceFacadeFactory { + + + //------------------------ LOGIC -------------------------- + + @Override + public WebServiceMDStoreFacade instantiate(Map parameters) { + Preconditions.checkArgument(parameters.containsKey(IMPORT_MDSTORE_SERVICE_LOCATION), + "unknown MDStore service location: no parameter provided: '%s'", IMPORT_MDSTORE_SERVICE_LOCATION); + + return new WebServiceMDStoreFacade(parameters.get(IMPORT_MDSTORE_SERVICE_LOCATION), + Long.parseLong(WorkflowRuntimeParameters.getParamValue(DNET_SERVICE_CLIENT_READ_TIMEOUT, DNET_SERVICE_READ_TIMEOUT_DEFAULT_VALUE, parameters)), + Long.parseLong(WorkflowRuntimeParameters.getParamValue(DNET_SERVICE_CLIENT_CONNECTION_TIMEOUT, DNET_SERVICE_CONNECTION_TIMEOUT_DEFAULT_VALUE, parameters)), + Long.parseLong(WorkflowRuntimeParameters.getParamValue(IMPORT_RESULT_SET_CLIENT_READ_TIMEOUT, RESULTSET_READ_TIMEOUT_DEFAULT_VALUE, parameters)), + Long.parseLong(WorkflowRuntimeParameters.getParamValue(IMPORT_RESULT_SET_CLIENT_CONNECTION_TIMEOUT, RESULTSET_CONNECTION_TIMEOUT_DEFAULT_VALUE, parameters)), + Integer.parseInt(WorkflowRuntimeParameters.getParamValue(IMPORT_RESULT_SET_PAGESIZE, RESULTSET_PAGESIZE_DEFAULT_VALUE, parameters))); + } + +} diff --git a/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/WebServiceObjectStoreFacade.java b/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/WebServiceObjectStoreFacade.java new file mode 100644 index 0000000000..6e1aee80b5 --- /dev/null +++ b/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/WebServiceObjectStoreFacade.java @@ -0,0 +1,52 @@ +package eu.dnetlib.dhp.wf.importer.facade; + +import javax.xml.ws.wsaddressing.W3CEndpointReference; + +import eu.dnetlib.data.objectstore.rmi.ObjectStoreService; +import eu.dnetlib.data.objectstore.rmi.ObjectStoreServiceException; +import eu.dnetlib.enabling.resultset.client.ResultSetClientFactory; +import eu.dnetlib.enabling.tools.JaxwsServiceResolverImpl; + +/** + * WebService based ObjectStore facade. + * + * @author mhorst + * + */ +public class WebServiceObjectStoreFacade extends AbstractResultSetAwareWebServiceFacade implements ObjectStoreFacade { + + + //------------------------ CONSTRUCTORS ------------------- + + /** + * @param serviceLocation ObjectStore webservice location + * @param serviceReadTimeout service read timeout + * @param serviceConnectionTimeout service connection timeout + * @param resultSetReadTimeout resultset read timeout + * @param resultSetConnectionTimeout result set connection timeout + * @param resultSetPageSize resultset page size + */ + public WebServiceObjectStoreFacade(String serviceLocation, + long serviceReadTimeout, long serviceConnectionTimeout, + long resultSetReadTimeout, long resultSetConnectionTimeout, int resultSetPageSize) { + super(ObjectStoreService.class, serviceLocation, + serviceReadTimeout, serviceConnectionTimeout, + resultSetReadTimeout, resultSetConnectionTimeout, resultSetPageSize); + } + + //------------------------ LOGIC -------------------------- + + @Override + public Iterable deliverObjects(String objectStoreId, long from, long until) throws ServiceFacadeException { + try { + W3CEndpointReference eprResult = getService().deliverObjects(objectStoreId, from, until); + ResultSetClientFactory rsFactory = new ResultSetClientFactory( + getResultSetPageSize(), getResultSetReadTimeout(), getResultSetConnectionTimeout()); + rsFactory.setServiceResolver(new JaxwsServiceResolverImpl()); + return rsFactory.getClient(eprResult); + } catch (ObjectStoreServiceException e) { + throw new ServiceFacadeException("delivering records for object store " + objectStoreId + " failed!", e); + } + } + +} diff --git a/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/WebServiceObjectStoreFacadeFactory.java b/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/WebServiceObjectStoreFacadeFactory.java new file mode 100644 index 0000000000..9c77c45464 --- /dev/null +++ b/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/WebServiceObjectStoreFacadeFactory.java @@ -0,0 +1,44 @@ +package eu.dnetlib.dhp.wf.importer.facade; + +import static eu.dnetlib.dhp.common.WorkflowRuntimeParameters.DNET_SERVICE_CLIENT_CONNECTION_TIMEOUT; +import static eu.dnetlib.dhp.common.WorkflowRuntimeParameters.DNET_SERVICE_CLIENT_READ_TIMEOUT; +import static eu.dnetlib.dhp.common.WorkflowRuntimeParameters.DNET_SERVICE_CONNECTION_TIMEOUT_DEFAULT_VALUE; +import static eu.dnetlib.dhp.common.WorkflowRuntimeParameters.DNET_SERVICE_READ_TIMEOUT_DEFAULT_VALUE; +import static eu.dnetlib.dhp.wf.importer.ImportWorkflowRuntimeParameters.IMPORT_CONTENT_OBJECT_STORE_LOC; +import static eu.dnetlib.dhp.wf.importer.ImportWorkflowRuntimeParameters.IMPORT_RESULT_SET_CLIENT_CONNECTION_TIMEOUT; +import static eu.dnetlib.dhp.wf.importer.ImportWorkflowRuntimeParameters.IMPORT_RESULT_SET_CLIENT_READ_TIMEOUT; +import static eu.dnetlib.dhp.wf.importer.ImportWorkflowRuntimeParameters.IMPORT_RESULT_SET_PAGESIZE; +import static eu.dnetlib.dhp.wf.importer.ImportWorkflowRuntimeParameters.RESULTSET_CONNECTION_TIMEOUT_DEFAULT_VALUE; +import static eu.dnetlib.dhp.wf.importer.ImportWorkflowRuntimeParameters.RESULTSET_PAGESIZE_DEFAULT_VALUE; +import static eu.dnetlib.dhp.wf.importer.ImportWorkflowRuntimeParameters.RESULTSET_READ_TIMEOUT_DEFAULT_VALUE; + +import java.util.Map; + +import com.google.common.base.Preconditions; + +import eu.dnetlib.dhp.common.WorkflowRuntimeParameters; + +/** + * WebService ObjectStore facade factory. + * + * @author mhorst + * + */ +public class WebServiceObjectStoreFacadeFactory implements ServiceFacadeFactory { + + //------------------------ LOGIC -------------------------- + + @Override + public WebServiceObjectStoreFacade instantiate(Map parameters) { + Preconditions.checkArgument(parameters.containsKey(IMPORT_CONTENT_OBJECT_STORE_LOC), + "unknown object store service location: no parameter provided: '%s'", IMPORT_CONTENT_OBJECT_STORE_LOC); + + return new WebServiceObjectStoreFacade(parameters.get(IMPORT_CONTENT_OBJECT_STORE_LOC), + Long.parseLong(WorkflowRuntimeParameters.getParamValue(DNET_SERVICE_CLIENT_READ_TIMEOUT, DNET_SERVICE_READ_TIMEOUT_DEFAULT_VALUE, parameters)), + Long.parseLong(WorkflowRuntimeParameters.getParamValue(DNET_SERVICE_CLIENT_CONNECTION_TIMEOUT, DNET_SERVICE_CONNECTION_TIMEOUT_DEFAULT_VALUE, parameters)), + Long.parseLong(WorkflowRuntimeParameters.getParamValue(IMPORT_RESULT_SET_CLIENT_READ_TIMEOUT, RESULTSET_READ_TIMEOUT_DEFAULT_VALUE, parameters)), + Long.parseLong(WorkflowRuntimeParameters.getParamValue(IMPORT_RESULT_SET_CLIENT_CONNECTION_TIMEOUT, RESULTSET_CONNECTION_TIMEOUT_DEFAULT_VALUE, parameters)), + Integer.parseInt(WorkflowRuntimeParameters.getParamValue(IMPORT_RESULT_SET_PAGESIZE, RESULTSET_PAGESIZE_DEFAULT_VALUE, parameters))); + } + +} diff --git a/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/mdrecord/MDRecordHandler.java b/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/mdrecord/MDRecordHandler.java new file mode 100644 index 0000000000..5d61f06a5c --- /dev/null +++ b/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/mdrecord/MDRecordHandler.java @@ -0,0 +1,94 @@ +package eu.dnetlib.dhp.wf.importer.mdrecord; + +import java.util.Stack; + +import org.xml.sax.Attributes; +import org.xml.sax.SAXException; +import org.xml.sax.helpers.DefaultHandler; + +import eu.dnetlib.dhp.common.InfoSpaceConstants; + +/** + * MDRecord handler extracting record identifier. + * + * Notice: writer is not being closed by handler. Created outside, let it be closed outside as well. + * @author mhorst + * + */ +public class MDRecordHandler extends DefaultHandler { + + public static final String ELEM_OBJ_IDENTIFIER = "objIdentifier"; + + private static final String ELEM_HEADER = "header"; + + private Stack parents; + + private StringBuilder currentValue = new StringBuilder(); + + private String recordId; + + + // ------------------------ LOGIC -------------------------- + + @Override + public void startDocument() throws SAXException { + parents = new Stack(); + recordId = null; + } + + @Override + public void startElement(String uri, String localName, String qName, + Attributes attributes) throws SAXException { + if (this.recordId == null) { + if (isWithinElement(localName, ELEM_OBJ_IDENTIFIER, ELEM_HEADER)) { +// identifierType attribute is mandatory + this.currentValue = new StringBuilder(); + } + this.parents.push(localName); + } + } + + @Override + public void endElement(String uri, String localName, String qName) + throws SAXException { + if (this.recordId == null) { + this.parents.pop(); + if (isWithinElement(localName, ELEM_OBJ_IDENTIFIER, ELEM_HEADER)) { + this.recordId = InfoSpaceConstants.ROW_PREFIX_RESULT + this.currentValue.toString().trim(); + } +// resetting current value; + this.currentValue = null; + } + } + + @Override + public void endDocument() throws SAXException { + parents.clear(); + parents = null; + } + + @Override + public void characters(char[] ch, int start, int length) + throws SAXException { + if (this.currentValue!=null) { + this.currentValue.append(ch, start, length); + } + } + + /** + * @return record identifier + */ + public String getRecordId() { + return recordId; + } + + // ------------------------ PRIVATE -------------------------- + + private boolean isWithinElement(String localName, String expectedElement, String expectedParent) { + return localName.equals(expectedElement) && !this.parents.isEmpty() && + expectedParent.equals(this.parents.peek()); + } + + +} + diff --git a/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/mdrecord/MDStoreRecordsImporter.java b/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/mdrecord/MDStoreRecordsImporter.java new file mode 100644 index 0000000000..4610938514 --- /dev/null +++ b/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/mdrecord/MDStoreRecordsImporter.java @@ -0,0 +1,157 @@ +package eu.dnetlib.dhp.wf.importer.mdrecord; + +import java.io.IOException; +import java.io.StringReader; +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; +import javax.xml.parsers.SAXParser; +import javax.xml.parsers.SAXParserFactory; + +import com.google.common.base.Preconditions; +import eu.dnetlib.dhp.common.WorkflowRuntimeParameters; +import eu.dnetlib.dhp.common.counter.NamedCounters; +import eu.dnetlib.dhp.common.counter.NamedCountersFileWriter; +import eu.dnetlib.dhp.common.java.PortBindings; +import eu.dnetlib.dhp.common.java.Process; +import eu.dnetlib.dhp.common.java.io.DataStore; +import eu.dnetlib.dhp.common.java.io.FileSystemPath; +import eu.dnetlib.dhp.common.java.porttype.AvroPortType; +import eu.dnetlib.dhp.common.java.porttype.PortType; +import eu.dnetlib.dhp.importer.schemas.ImportedRecord; +import eu.dnetlib.dhp.importer.schemas.RecordFormat; +import eu.dnetlib.dhp.wf.importer.facade.MDStoreFacade; +import eu.dnetlib.dhp.wf.importer.facade.ServiceFacadeUtils; +import org.apache.avro.file.DataFileWriter; +import org.apache.commons.lang3.StringUtils; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.log4j.Logger; +import org.xml.sax.InputSource; + +import static eu.dnetlib.dhp.common.WorkflowRuntimeParameters.OOZIE_ACTION_OUTPUT_FILENAME; +import static eu.dnetlib.dhp.wf.importer.ImportWorkflowRuntimeParameters.IMPORT_MDSTORE_IDS_CSV; +import static eu.dnetlib.dhp.wf.importer.ImportWorkflowRuntimeParameters.IMPORT_MDSTORE_RECORD_MAXLENGTH; + +/** + * {@link MDStoreFacade} based metadata records importer. + * @author mhorst + * + */ +public class MDStoreRecordsImporter implements Process { + + protected static final String COUNTER_NAME_TOTAL = "TOTAL"; + + protected static final String COUNTER_NAME_SIZE_EXCEEDED = "SIZE_EXCEEDED"; + + protected static final String PORT_OUT_MDRECORDS = "mdrecords"; + + private static final Logger log = Logger.getLogger(MDStoreRecordsImporter.class); + + private final static int progressLogInterval = 100000; + + private final NamedCountersFileWriter countersWriter = new NamedCountersFileWriter(); + + private final Map outputPorts = new HashMap(); + + + //------------------------ CONSTRUCTORS ------------------- + + public MDStoreRecordsImporter() { + outputPorts.put(PORT_OUT_MDRECORDS, new AvroPortType(ImportedRecord.SCHEMA$)); + } + + //------------------------ LOGIC -------------------------- + + @Override + public Map getInputPorts() { + return Collections.emptyMap(); + } + + @Override + public Map getOutputPorts() { + return outputPorts; + } + + @Override + public void run(PortBindings portBindings, Configuration conf, + Map parameters) throws Exception { + + Preconditions.checkArgument(parameters.containsKey(IMPORT_MDSTORE_IDS_CSV), + "unknown mdstore identifier, required parameter '%s' is missing!", IMPORT_MDSTORE_IDS_CSV); + String mdStoreIdsCSV = parameters.get(IMPORT_MDSTORE_IDS_CSV); + int recordMaxLength = parameters.containsKey(IMPORT_MDSTORE_RECORD_MAXLENGTH)? + Integer.parseInt(parameters.get(IMPORT_MDSTORE_RECORD_MAXLENGTH)):Integer.MAX_VALUE; + + NamedCounters counters = new NamedCounters(new String[] { COUNTER_NAME_TOTAL, COUNTER_NAME_SIZE_EXCEEDED }); + + if (StringUtils.isNotBlank(mdStoreIdsCSV) && !WorkflowRuntimeParameters.UNDEFINED_NONEMPTY_VALUE.equals(mdStoreIdsCSV)) { + + String[] mdStoreIds = StringUtils.split(mdStoreIdsCSV, WorkflowRuntimeParameters.DEFAULT_CSV_DELIMITER); + + try (DataFileWriter recordWriter = getWriter(FileSystem.get(conf), portBindings)) { + + MDStoreFacade mdStoreFacade = ServiceFacadeUtils.instantiate(parameters); + + SAXParserFactory parserFactory = SAXParserFactory.newInstance(); + parserFactory.setNamespaceAware(true); + SAXParser saxParser = parserFactory.newSAXParser(); + MDRecordHandler mdRecordHandler = new MDRecordHandler(); + + long startTime = System.currentTimeMillis(); + int currentCount = 0; + + for (String mdStoreId : mdStoreIds) { + for (String mdRecord : mdStoreFacade.deliverMDRecords(mdStoreId)) { + if (!StringUtils.isEmpty(mdRecord)) { + if (mdRecord.length() <= recordMaxLength) { + saxParser.parse(new InputSource(new StringReader(mdRecord)), mdRecordHandler); + String recordId = mdRecordHandler.getRecordId(); + if (StringUtils.isNotBlank(recordId)) { + recordWriter.append( + ImportedRecord.newBuilder() + .setId(recordId) + .setBody(mdRecord) + .setFormat(RecordFormat.XML) + .build()); + counters.increment(COUNTER_NAME_TOTAL); + } else { + log.error("skipping, unable to extract identifier from record: " + mdRecord); + } + } else { + counters.increment(COUNTER_NAME_SIZE_EXCEEDED); + log.error("mdstore record maximum length (" + recordMaxLength + "): was exceeded: " + + mdRecord.length() + ", record content:\n" + mdRecord); + } + + } else { + log.error("got empty metadata record from mdstore: " + mdStoreId); + } + currentCount++; + if (currentCount % progressLogInterval == 0) { + log.info("current progress: " + currentCount + ", last package of " + progressLogInterval + + " processed in " + ((System.currentTimeMillis() - startTime) / 1000) + " secs"); + startTime = System.currentTimeMillis(); + } + } + } + log.info("total number of processed records: " + currentCount); + } + } + + if (counters.currentValue(COUNTER_NAME_TOTAL)==0) { + log.warn("parsed 0 metadata records from mdstores: " + mdStoreIdsCSV); + } + countersWriter.writeCounters(counters, System.getProperty(OOZIE_ACTION_OUTPUT_FILENAME)); + + } + + /** + * Provides {@link ImportedRecord} writer consuming records. + */ + protected DataFileWriter getWriter(FileSystem fs, PortBindings portBindings) throws IOException { + return DataStore.create( + new FileSystemPath(fs, portBindings.getOutput().get(PORT_OUT_MDRECORDS)), ImportedRecord.SCHEMA$); + } + +} diff --git a/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/mdrecord/MongoRecordImporter.java b/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/mdrecord/MongoRecordImporter.java new file mode 100644 index 0000000000..4776af22b0 --- /dev/null +++ b/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/mdrecord/MongoRecordImporter.java @@ -0,0 +1,48 @@ +package eu.dnetlib.dhp.wf.importer.mdrecord; + +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; + +import eu.dnetlib.dhp.common.java.PortBindings; +import eu.dnetlib.dhp.common.java.Process; +import eu.dnetlib.dhp.common.java.porttype.PortType; +import org.apache.hadoop.conf.Configuration; + +public class MongoRecordImporter implements Process { + + private final Map outputPorts = new HashMap(); + + @Override + public Map getInputPorts() { + return Collections.emptyMap(); + } + + @Override + public Map getOutputPorts() { + return outputPorts; + } + + @Override + public void run(final PortBindings portBindings, final Configuration conf, final Map parameters) throws Exception { + + /* + SparkSession spark = SparkSession.builder() + .master("local") + .appName("MongoSparkConnectorIntro") + .config("spark.mongodb.input.uri", "mongodb://127.0.0.1/test.myCollection") + .config("spark.mongodb.output.uri", "mongodb://127.0.0.1/test.myCollection") + .getOrCreate(); + + // Create a JavaSparkContext using the SparkSession's SparkContext object + JavaSparkContext jsc = new JavaSparkContext(spark.sparkContext()); + + // More application logic would go here... + + jsc.close(); + */ + + } + + +} diff --git a/dhp-wf/dhp-wf-import/src/main/resources/eu/dnetlib/dhp/wf/importer/mdrecord/oozie_app/workflow.xml b/dhp-wf/dhp-wf-import/src/main/resources/eu/dnetlib/dhp/wf/importer/mdrecord/oozie_app/workflow.xml new file mode 100644 index 0000000000..dee61af910 --- /dev/null +++ b/dhp-wf/dhp-wf-import/src/main/resources/eu/dnetlib/dhp/wf/importer/mdrecord/oozie_app/workflow.xml @@ -0,0 +1,124 @@ + + + + + mdstore_facade_factory_classname + eu.dnetlib.dhp.wf.importer.facade.WebServiceMDStoreFacadeFactory + ServiceFacadeFactory implementation class name producing eu.dnetlib.dhp.wf.importer.facade.WebServiceMDStoreFacade + + + mdstore_service_location + $UNDEFINED$ + mdstore service location + + + mdstore_ids_csv + $UNDEFINED$ + comma separated mdstore identifiers + + + mdstore_record_maxlength + 500000 + maximum allowed length of mdstore record + + + output + ImportedRecord avro datastore output holding mdrecords + + + output_report_root_path + base directory for storing reports + + + output_report_relative_path + import_mdrecord + directory for storing report (relative to output_report_root_path) + + + dnet_service_client_read_timeout + 60000 + DNet service client reading timeout (expressed in milliseconds) + + + dnet_service_client_connection_timeout + 60000 + DNet service client connection timeout (expressed in milliseconds) + + + resultset_client_read_timeout + 60000 + result set client reading timeout (expressed in milliseconds) + + + resultset_client_connection_timeout + 60000 + result set client connection timeout (expressed in milliseconds) + + + report_properties_prefix + import.mdrecord + report entry related to total number of imported records + + + + + ${jobTracker} + ${nameNode} + + + mapreduce.job.queuename + ${queueName} + + + oozie.launcher.mapred.job.queue.name + ${oozieLauncherQueueName} + + + + + + + + + + + + + eu.dnetlib.dhp.common.java.ProcessWrapper + eu.dnetlib.dhp.wf.importer.mdrecord.MDStoreRecordsImporter + + -Pimport.mdstore.service.location=${mdstore_service_location} + -Pimport.mdstore.ids.csv=${mdstore_ids_csv} + -Pimport.mdstore.record.maxlength=${mdstore_record_maxlength} + + -Pimport.resultset.client.read.timeout=${resultset_client_read_timeout} + -Pimport.resultset.client.connection.timeout=${resultset_client_connection_timeout} + -Pdnet.service.client.read.timeout=${dnet_service_client_read_timeout} + -Pdnet.service.client.connection.timeout=${dnet_service_client_connection_timeout} + + -Pimport.facade.factory.classname=${mdstore_facade_factory_classname} + -Omdrecords=${output} + + + + + + + + + eu.dnetlib.dhp.common.java.ProcessWrapper + eu.dnetlib.dhp.common.report.ReportGenerator + -Preport.${report_properties_prefix}.total=${wf:actionData('mdrecord-importer')['TOTAL']} + -Preport.${report_properties_prefix}.invalid.sizeExceeded=${wf:actionData('mdrecord-importer')['SIZE_EXCEEDED']} + -Oreport=${output_report_root_path}/${output_report_relative_path} + + + + + + + Unfortunately, the process failed -- error message: + [${wf:errorMessage(wf:lastErrorNode())}] + + + diff --git a/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/DataFileRecordReceiver.class b/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/DataFileRecordReceiver.class new file mode 100644 index 0000000000..5bd02d3603 Binary files /dev/null and b/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/DataFileRecordReceiver.class differ diff --git a/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/DataFileRecordReceiverWithCounter.class b/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/DataFileRecordReceiverWithCounter.class new file mode 100644 index 0000000000..46f329580a Binary files /dev/null and b/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/DataFileRecordReceiverWithCounter.class differ diff --git a/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/ImportWorkflowRuntimeParameters.class b/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/ImportWorkflowRuntimeParameters.class new file mode 100644 index 0000000000..13a46a67ad Binary files /dev/null and b/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/ImportWorkflowRuntimeParameters.class differ diff --git a/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/RecordReceiver.class b/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/RecordReceiver.class new file mode 100644 index 0000000000..fb8394cb5a Binary files /dev/null and b/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/RecordReceiver.class differ diff --git a/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/facade/AbstractResultSetAwareWebServiceFacade.class b/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/facade/AbstractResultSetAwareWebServiceFacade.class new file mode 100644 index 0000000000..abb39ad860 Binary files /dev/null and b/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/facade/AbstractResultSetAwareWebServiceFacade.class differ diff --git a/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/facade/ISLookupFacade.class b/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/facade/ISLookupFacade.class new file mode 100644 index 0000000000..7689db9d69 Binary files /dev/null and b/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/facade/ISLookupFacade.class differ diff --git a/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/facade/MDStoreFacade.class b/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/facade/MDStoreFacade.class new file mode 100644 index 0000000000..ec0403005b Binary files /dev/null and b/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/facade/MDStoreFacade.class differ diff --git a/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/facade/ObjectStoreFacade.class b/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/facade/ObjectStoreFacade.class new file mode 100644 index 0000000000..e9e7a2cbdf Binary files /dev/null and b/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/facade/ObjectStoreFacade.class differ diff --git a/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/facade/ServiceFacadeException.class b/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/facade/ServiceFacadeException.class new file mode 100644 index 0000000000..03ce390c2d Binary files /dev/null and b/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/facade/ServiceFacadeException.class differ diff --git a/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/facade/ServiceFacadeFactory.class b/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/facade/ServiceFacadeFactory.class new file mode 100644 index 0000000000..18e211bf9c Binary files /dev/null and b/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/facade/ServiceFacadeFactory.class differ diff --git a/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/facade/ServiceFacadeUtils.class b/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/facade/ServiceFacadeUtils.class new file mode 100644 index 0000000000..d7663b0dd8 Binary files /dev/null and b/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/facade/ServiceFacadeUtils.class differ diff --git a/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/facade/WebServiceISLookupFacade.class b/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/facade/WebServiceISLookupFacade.class new file mode 100644 index 0000000000..aff40623b0 Binary files /dev/null and b/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/facade/WebServiceISLookupFacade.class differ diff --git a/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/facade/WebServiceISLookupFacadeFactory.class b/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/facade/WebServiceISLookupFacadeFactory.class new file mode 100644 index 0000000000..c9ac132382 Binary files /dev/null and b/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/facade/WebServiceISLookupFacadeFactory.class differ diff --git a/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/facade/WebServiceMDStoreFacade.class b/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/facade/WebServiceMDStoreFacade.class new file mode 100644 index 0000000000..cbd0403cbe Binary files /dev/null and b/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/facade/WebServiceMDStoreFacade.class differ diff --git a/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/facade/WebServiceMDStoreFacadeFactory.class b/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/facade/WebServiceMDStoreFacadeFactory.class new file mode 100644 index 0000000000..b7c9aaee6c Binary files /dev/null and b/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/facade/WebServiceMDStoreFacadeFactory.class differ diff --git a/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/facade/WebServiceObjectStoreFacade.class b/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/facade/WebServiceObjectStoreFacade.class new file mode 100644 index 0000000000..8ffb1d81bb Binary files /dev/null and b/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/facade/WebServiceObjectStoreFacade.class differ diff --git a/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/facade/WebServiceObjectStoreFacadeFactory.class b/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/facade/WebServiceObjectStoreFacadeFactory.class new file mode 100644 index 0000000000..b6a1ceca93 Binary files /dev/null and b/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/facade/WebServiceObjectStoreFacadeFactory.class differ diff --git a/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/mdrecord/MDRecordHandler.class b/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/mdrecord/MDRecordHandler.class new file mode 100644 index 0000000000..092c87c701 Binary files /dev/null and b/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/mdrecord/MDRecordHandler.class differ diff --git a/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/mdrecord/MDStoreRecordsImporter.class b/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/mdrecord/MDStoreRecordsImporter.class new file mode 100644 index 0000000000..254a7f6478 Binary files /dev/null and b/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/mdrecord/MDStoreRecordsImporter.class differ diff --git a/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/mdrecord/MongoRecordImporter.class b/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/mdrecord/MongoRecordImporter.class new file mode 100644 index 0000000000..5079c03852 Binary files /dev/null and b/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/mdrecord/MongoRecordImporter.class differ diff --git a/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/mdrecord/oozie_app/workflow.xml b/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/mdrecord/oozie_app/workflow.xml new file mode 100644 index 0000000000..dee61af910 --- /dev/null +++ b/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/mdrecord/oozie_app/workflow.xml @@ -0,0 +1,124 @@ + + + + + mdstore_facade_factory_classname + eu.dnetlib.dhp.wf.importer.facade.WebServiceMDStoreFacadeFactory + ServiceFacadeFactory implementation class name producing eu.dnetlib.dhp.wf.importer.facade.WebServiceMDStoreFacade + + + mdstore_service_location + $UNDEFINED$ + mdstore service location + + + mdstore_ids_csv + $UNDEFINED$ + comma separated mdstore identifiers + + + mdstore_record_maxlength + 500000 + maximum allowed length of mdstore record + + + output + ImportedRecord avro datastore output holding mdrecords + + + output_report_root_path + base directory for storing reports + + + output_report_relative_path + import_mdrecord + directory for storing report (relative to output_report_root_path) + + + dnet_service_client_read_timeout + 60000 + DNet service client reading timeout (expressed in milliseconds) + + + dnet_service_client_connection_timeout + 60000 + DNet service client connection timeout (expressed in milliseconds) + + + resultset_client_read_timeout + 60000 + result set client reading timeout (expressed in milliseconds) + + + resultset_client_connection_timeout + 60000 + result set client connection timeout (expressed in milliseconds) + + + report_properties_prefix + import.mdrecord + report entry related to total number of imported records + + + + + ${jobTracker} + ${nameNode} + + + mapreduce.job.queuename + ${queueName} + + + oozie.launcher.mapred.job.queue.name + ${oozieLauncherQueueName} + + + + + + + + + + + + + eu.dnetlib.dhp.common.java.ProcessWrapper + eu.dnetlib.dhp.wf.importer.mdrecord.MDStoreRecordsImporter + + -Pimport.mdstore.service.location=${mdstore_service_location} + -Pimport.mdstore.ids.csv=${mdstore_ids_csv} + -Pimport.mdstore.record.maxlength=${mdstore_record_maxlength} + + -Pimport.resultset.client.read.timeout=${resultset_client_read_timeout} + -Pimport.resultset.client.connection.timeout=${resultset_client_connection_timeout} + -Pdnet.service.client.read.timeout=${dnet_service_client_read_timeout} + -Pdnet.service.client.connection.timeout=${dnet_service_client_connection_timeout} + + -Pimport.facade.factory.classname=${mdstore_facade_factory_classname} + -Omdrecords=${output} + + + + + + + + + eu.dnetlib.dhp.common.java.ProcessWrapper + eu.dnetlib.dhp.common.report.ReportGenerator + -Preport.${report_properties_prefix}.total=${wf:actionData('mdrecord-importer')['TOTAL']} + -Preport.${report_properties_prefix}.invalid.sizeExceeded=${wf:actionData('mdrecord-importer')['SIZE_EXCEEDED']} + -Oreport=${output_report_root_path}/${output_report_relative_path} + + + + + + + Unfortunately, the process failed -- error message: + [${wf:errorMessage(wf:lastErrorNode())}] + + + diff --git a/dhp-wf/dhp-wf-import/target/maven-status/maven-compiler-plugin/compile/default-compile/createdFiles.lst b/dhp-wf/dhp-wf-import/target/maven-status/maven-compiler-plugin/compile/default-compile/createdFiles.lst new file mode 100644 index 0000000000..672248f223 --- /dev/null +++ b/dhp-wf/dhp-wf-import/target/maven-status/maven-compiler-plugin/compile/default-compile/createdFiles.lst @@ -0,0 +1,20 @@ +eu/dnetlib/dhp/wf/importer/RecordReceiver.class +eu/dnetlib/dhp/wf/importer/facade/ObjectStoreFacade.class +eu/dnetlib/dhp/wf/importer/facade/WebServiceISLookupFacade.class +eu/dnetlib/dhp/wf/importer/facade/WebServiceMDStoreFacade.class +eu/dnetlib/dhp/wf/importer/facade/ServiceFacadeFactory.class +eu/dnetlib/dhp/wf/importer/facade/MDStoreFacade.class +eu/dnetlib/dhp/wf/importer/facade/AbstractResultSetAwareWebServiceFacade.class +eu/dnetlib/dhp/wf/importer/mdrecord/MDStoreRecordsImporter.class +eu/dnetlib/dhp/wf/importer/DataFileRecordReceiver.class +eu/dnetlib/dhp/wf/importer/mdrecord/MongoRecordImporter.class +eu/dnetlib/dhp/wf/importer/facade/ServiceFacadeUtils.class +eu/dnetlib/dhp/wf/importer/facade/ServiceFacadeException.class +eu/dnetlib/dhp/wf/importer/DataFileRecordReceiverWithCounter.class +eu/dnetlib/dhp/wf/importer/facade/ISLookupFacade.class +eu/dnetlib/dhp/wf/importer/facade/WebServiceObjectStoreFacadeFactory.class +eu/dnetlib/dhp/wf/importer/mdrecord/MDRecordHandler.class +eu/dnetlib/dhp/wf/importer/facade/WebServiceObjectStoreFacade.class +eu/dnetlib/dhp/wf/importer/facade/WebServiceISLookupFacadeFactory.class +eu/dnetlib/dhp/wf/importer/ImportWorkflowRuntimeParameters.class +eu/dnetlib/dhp/wf/importer/facade/WebServiceMDStoreFacadeFactory.class diff --git a/dhp-wf/dhp-wf-import/target/maven-status/maven-compiler-plugin/compile/default-compile/inputFiles.lst b/dhp-wf/dhp-wf-import/target/maven-status/maven-compiler-plugin/compile/default-compile/inputFiles.lst new file mode 100644 index 0000000000..e9820d1d98 --- /dev/null +++ b/dhp-wf/dhp-wf-import/target/maven-status/maven-compiler-plugin/compile/default-compile/inputFiles.lst @@ -0,0 +1,20 @@ +/Users/claudio/workspace/dnet-hadoop/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/ISLookupFacade.java +/Users/claudio/workspace/dnet-hadoop/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/RecordReceiver.java +/Users/claudio/workspace/dnet-hadoop/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/mdrecord/MDRecordHandler.java +/Users/claudio/workspace/dnet-hadoop/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/mdrecord/MDStoreRecordsImporter.java +/Users/claudio/workspace/dnet-hadoop/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/WebServiceISLookupFacade.java +/Users/claudio/workspace/dnet-hadoop/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/WebServiceMDStoreFacade.java +/Users/claudio/workspace/dnet-hadoop/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/MDStoreFacade.java +/Users/claudio/workspace/dnet-hadoop/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/DataFileRecordReceiver.java +/Users/claudio/workspace/dnet-hadoop/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/ServiceFacadeException.java +/Users/claudio/workspace/dnet-hadoop/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/AbstractResultSetAwareWebServiceFacade.java +/Users/claudio/workspace/dnet-hadoop/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/DataFileRecordReceiverWithCounter.java +/Users/claudio/workspace/dnet-hadoop/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/WebServiceMDStoreFacadeFactory.java +/Users/claudio/workspace/dnet-hadoop/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/ObjectStoreFacade.java +/Users/claudio/workspace/dnet-hadoop/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/WebServiceObjectStoreFacadeFactory.java +/Users/claudio/workspace/dnet-hadoop/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/WebServiceObjectStoreFacade.java +/Users/claudio/workspace/dnet-hadoop/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/ImportWorkflowRuntimeParameters.java +/Users/claudio/workspace/dnet-hadoop/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/ServiceFacadeUtils.java +/Users/claudio/workspace/dnet-hadoop/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/mdrecord/MongoRecordImporter.java +/Users/claudio/workspace/dnet-hadoop/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/WebServiceISLookupFacadeFactory.java +/Users/claudio/workspace/dnet-hadoop/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/ServiceFacadeFactory.java diff --git a/dhp-wf/pom.xml b/dhp-wf/pom.xml new file mode 100644 index 0000000000..4c0aa666ac --- /dev/null +++ b/dhp-wf/pom.xml @@ -0,0 +1,249 @@ + + + 4.0.0 + + + eu.dnetlib.dhp + dhp + 1.0.0-SNAPSHOT + + + dhp-wf + pom + + + dhp-wf-import + + + + yyyy-MM-dd_HH_mm + + + oozie-package + + src/test/resources/define/path/pointing/to/directory/holding/oozie_app + oozie_app + default + default + default + primed + + runtime + + true + + ${user.home}/.dhp/application.properties + + ${maven.build.timestamp} + + ${project.version} + true + + + + + + + org.apache.oozie + oozie-client + + + net.schmizz + sshj + test + + + + + + oozie-package + + + + org.apache.maven.plugins + maven-enforcer-plugin + 1.4.1 + + + enforce-connection-properties-file-existence + initialize + + enforce + + + + + + ${dhpConnectionProperties} + + + The file with connection properties could not be found. Please, create the ${dhpConnectionProperties} file or set the location to another already created file by using + -DdhpConnectionProperties property. + + + + true + + + + + + org.apache.maven.plugins + maven-dependency-plugin + + + copy dependencies + prepare-package + + copy-dependencies + + + ${oozie.package.dependencies.include.scope} + ${oozie.package.dependencies.exclude.scope} + true + + + + + + + org.apache.maven.plugins + maven-jar-plugin + + + attach-test-resources-package + prepare-package + + test-jar + + + ${oozie.package.skip.test.jar} + + + + + + + + pl.project13.maven + git-commit-id-plugin + 2.1.11 + + + + revision + + + + + true + yyyy-MM-dd'T'HH:mm:ssZ + true + target/${oozie.package.file.name}/${oozieAppDir}/version.properties + + + + org.apache.maven.plugins + maven-assembly-plugin + 3.0.0 + + + assembly-oozie-installer + package + + single + + + false + ${oozie.package.file.name}_shell_scripts + + oozie-installer + + + + + + + + + + maven-antrun-plugin + + + + installer-copy-custom + process-resources + + run + + + + + + + + + + + package + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + run + + + + + + + + + + + diff --git a/pom.xml b/pom.xml new file mode 100644 index 0000000000..8861cbe25f --- /dev/null +++ b/pom.xml @@ -0,0 +1,953 @@ + + + 4.0.0 + + eu.dnetlib.dhp + dhp + 1.0.0-SNAPSHOT + pom + + http://www.d-net.research-infrastructures.eu + + + + The Apache Software License, Version 2.0 + http://www.apache.org/licenses/LICENSE-2.0.txt + repo + A business-friendly OSS license + + + + + dhp-build + dhp-common + dhp-schemas + dhp-wf + + + + Redmine + https://issue.openaire.research-infrastructures.eu/projects/openaire + + + + jenkins + https://jenkins-dnet.d4science.org/ + + + + scm:git:ssh://git@github.com/??? + scm:git:ssh://git@github.com/???.git + https://github.com/??? + HEAD + + + + + + + + + dnet45-releases + D-Net 45 releases + http://maven.research-infrastructures.eu/nexus/content/repositories/dnet45-releases + default + + false + + + true + + + + dnet45-bootstrap-release + dnet45 bootstrap release + http://maven.research-infrastructures.eu/nexus/content/repositories/dnet45-bootstrap-release + default + + false + + + true + + + + + cloudera + Cloudera Repository + https://repository.cloudera.com/artifactory/cloudera-repos + + true + + + false + + + + + + + + org.slf4j + slf4j-api + 1.7.22 + + + org.slf4j + slf4j-log4j12 + 1.7.22 + + + log4j + log4j + 1.2.17 + + + javax.servlet + javax.servlet-api + 3.1.0 + runtime + + + junit + junit + 4.12 + test + + + org.hamcrest + hamcrest-all + 1.3 + test + + + org.mockito + mockito-all + 1.10.19 + test + + + org.powermock + powermock-core + 1.6.6 + test + + + com.google.code.findbugs + annotations + 3.0.1 + provided + + + com.google.code.findbugs + jsr305 + 3.0.1 + provided + + + + + + + + org.apache.hadoop + hadoop-common + ${dhp.hadoop.version} + provided + + + servlet-api + javax.servlet + + + + + + org.apache.hadoop + hadoop-yarn-common + ${dhp.hadoop.version} + provided + + + org.apache.hadoop + hadoop-mapreduce-client-common + ${dhp.hadoop.version} + provided + + + + org.apache.hadoop + hadoop-yarn-common + ${dhp.hadoop.version} + test + test-jar + + + + org.apache.hadoop + hadoop-mapreduce-client-common + ${dhp.hadoop.version} + test + test-jar + + + + org.apache.hadoop + hadoop-mapreduce-client-app + ${dhp.hadoop.version} + provided + + + servlet-api + javax.servlet + + + + + + org.apache.hadoop + hadoop-mapreduce-client-core + ${dhp.hadoop.version} + provided + + + servlet-api + javax.servlet + + + + org.apache.calcite + calcite-core + + + org.apache.calcite + calcite-avatica + + + + + + org.apache.hadoop + hadoop-hdfs + ${dhp.hadoop.version} + + + + org.apache.hadoop + hadoop-hdfs + ${dhp.hadoop.version} + test-jar + test + + + + + + org.apache.oozie + oozie-core + ${dhp.oozie.version} + + + + servlet-api + javax.servlet + + + + org.apache.calcite + calcite-core + + + org.apache.calcite + calcite-avatica + + + + + + org.apache.oozie + oozie-client + ${dhp.oozie.version} + + + + slf4j-simple + org.slf4j + + + + + + org.apache.spark + spark-core_2.10 + ${dhp.spark.version} + provided + + + + org.apache.spark + spark-sql_2.10 + ${dhp.spark.version} + provided + + + + com.databricks + spark-avro_2.10 + 1.1.0-${dhp.cdh.version} + + + + com.databricks + spark-csv_2.10 + 1.5.0 + + + + pl.edu.icm.spark-utils + spark-utils + 1.0.0 + + + + org.mongodb.spark + mongo-spark-connector_2.10 + 2.2.1 + provided + + + + + + org.apache.avro + avro + ${dhp.avro.version} + + + + org.apache.avro + avro-mapred + ${dhp.avro.version} + hadoop2 + + + + servlet-api + org.mortbay.jetty + + + netty + io.netty + + + + + + + + org.apache.pig + pig + ${dhp.pig.version} + provided + + + org.mortbay.jetty + servlet-api-2.5 + + + servlet-api + javax.servlet + + + + + + org.apache.pig + piggybank + ${dhp.pig.version} + provided + + + + org.apache.pig + pigunit + ${dhp.pig.version} + + + + + pl.edu.icm.cermine + cermine-impl + 1.13 + + + + + pl.edu.icm.coansys + models + ${dhp.coansys.version} + + + + pl.edu.icm.coansys + citation-matching-core-code + ${dhp.coansys.version} + + + + org.apache.avro + avro-mapred + + + org.apache.hadoop + hadoop-client + + + org.apache.hadoop + hadoop-yarn-api + + + org.apache.hadoop + hadoop-yarn-client + + + org.apache.hadoop + hadoop-mapreduce-client-common + + + org.apache.hadoop + hadoop-mapreduce-client-app + + + org.apache.hadoop + hadoop-mapreduce-client-core + + + + org.slf4j + slf4j-simple + + + + + + + + eu.dnetlib + dnet-openaireplus-mapping-utils + [6.0.0, 7.0.0) + + + + org.apache.hadoop + hadoop-core + + + org.apache.hadoop + hadoop-hdfs + + + org.apache.zookeeper + zookeeper + + + jgrapht + jgrapht + + + + + + eu.dnetlib + dnet-objectstore-rmi + [2.0.0, 3.0.0) + + + + eu.dnetlib + cnr-rmi-api + [2.0.0, 3.0.0) + + + + eu.dnetlib + cnr-resultset-client + [2.0.0, 3.0.0) + + + + org.springframework + spring-web + + + org.springframework + spring-webmvc + + + + + + eu.dnetlib + dnet-actionmanager-common + [6.0.0, 7.0.0) + + + + apache + commons-logging + + + + + + eu.dnetlib + cnr-service-utils + [1.0.0, 2.0.0) + + + + + com.beust + jcommander + 1.60 + + + + com.google.code.gson + gson + 2.8.0 + + + + com.googlecode.json-simple + json-simple + 1.1.1 + + + + org.apache.commons + commons-lang3 + 3.5 + + + + org.apache.commons + commons-collections4 + 4.1 + + + + com.thoughtworks.xstream + xstream + 1.4.9 + + + + xalan + xalan + 2.7.2 + + + + xml-apis + xml-apis + 1.4.01 + + + + org.jdom + jdom + 1.1.3 + + + + org.jsoup + jsoup + 1.10.2 + + + + net.sf.opencsv + opencsv + 2.3 + + + + com.googlecode.protobuf-java-format + protobuf-java-format + 1.2 + + + + com.google.protobuf + protobuf-java + 2.5.0 + + + + + com.google.guava + guava + 12.0 + + + + commons-cli + commons-cli + 1.3.1 + + + + commons-io + commons-io + 2.5 + + + + de.sven-jacobs + loremipsum + 1.0 + + + + net.schmizz + sshj + 0.10.0 + + + + + org.bouncycastle + bcprov-jdk15on + 1.50 + + + + + com.thoughtworks.paranamer + paranamer + 2.8 + + + + + com.linkedin.datafu + datafu + 1.2.0 + + + + org.apache.cxf + cxf-rt-frontend-jaxws + ${cxf.version} + + + + + com.sun.xml.bind + jaxb-impl + 2.2.7 + runtime + + + + org.springframework + spring-beans + ${spring.version} + + + + org.springframework + spring-context + ${spring.version} + + + + org.scala-lang + scala-library + ${scala.version} + + + + commons-beanutils + commons-beanutils + 1.9.3 + + + + org.apache.curator + curator-test + 3.3.0 + test + + + + + + + + target + target/classes + ${project.artifactId}-${project.version} + target/test-classes + + + + + + org.apache.maven.plugins + maven-compiler-plugin + 3.6.0 + + 1.8 + 1.8 + ${project.build.sourceEncoding} + + + + + org.apache.maven.plugins + maven-jar-plugin + 3.0.2 + + + + org.apache.maven.plugins + maven-source-plugin + 3.0.1 + + + attach-sources + verify + + jar-no-fork + + + + + + + org.apache.maven.plugins + maven-surefire-plugin + 2.19.1 + + true + + + + + org.apache.maven.plugins + maven-javadoc-plugin + 2.10.4 + + true + + + + + org.apache.maven.plugins + maven-dependency-plugin + 3.0.0 + + + + net.alchim31.maven + scala-maven-plugin + 3.2.2 + + + scala-compile-first + process-resources + + add-source + compile + + + + scala-test-compile + process-test-resources + + testCompile + + + + + + + + org.apache.avro + avro-maven-plugin + 1.7.7 + + + + org.codehaus.mojo + build-helper-maven-plugin + 1.12 + + + + + org.eclipse.m2e + lifecycle-mapping + 1.0.0 + + + + + + + org.apache.avro + + + avro-maven-plugin + + + [1.7.4,) + + + idl-protocol + schema + + + + + + + + + + org.codehaus.mojo + + + build-helper-maven-plugin + + + [1.7,) + + + add-source + + + + + + + + + + org.apache.maven.plugins + + + maven-plugin-plugin + + + [3.2,) + + + descriptor + + + + + + + + + + + + + + + + + org.apache.maven.plugins + maven-release-plugin + 2.5.3 + + + + org.jacoco + jacoco-maven-plugin + 0.7.9 + + + **/schemas/* + **/com/cloudera/**/* + **/org/apache/avro/io/**/* + + + + + default-prepare-agent + + prepare-agent + + + + default-report + prepare-package + + report + + + + + + + + + org.apache.maven.wagon + wagon-ssh + 2.10 + + + + + + + + + dnet45-snapshots + DNet45 Snapshots + http://maven.research-infrastructures.eu/nexus/content/repositories/dnet45-snapshots + default + + + dnet45-releases + http://maven.research-infrastructures.eu/nexus/content/repositories/dnet45-releases + + + + + + + org.apache.maven.plugins + maven-javadoc-plugin + 2.10.4 + + true + + + + + + + UTF-8 + UTF-8 + [4.2.5.RELEASE] + [3.1.5] + + cdh5.9.0 + + 4.1.0-${dhp.cdh.version} + 0.12.0-${dhp.cdh.version} + 1.7.6-${dhp.cdh.version} + 2.6.0-${dhp.cdh.version} + 1.6.0-${dhp.cdh.version} + 2.10.6 + + +