diff --git a/dhp-build/dhp-build-assembly-resources/README.markdown b/dhp-build/dhp-build-assembly-resources/README.markdown deleted file mode 100644 index efee5fa45..000000000 --- a/dhp-build/dhp-build-assembly-resources/README.markdown +++ /dev/null @@ -1,7 +0,0 @@ -Module utilized by `dhp-wf`. - -Contains all required resources by this parent module: - -* assembly XML definitions -* build shell scripts -* oozie package commands for uploading, running and monitoring oozie workflows diff --git a/dhp-build/dhp-build-assembly-resources/pom.xml b/dhp-build/dhp-build-assembly-resources/pom.xml deleted file mode 100644 index 2d2543505..000000000 --- a/dhp-build/dhp-build-assembly-resources/pom.xml +++ /dev/null @@ -1,24 +0,0 @@ - - - - 4.0.0 - - - eu.dnetlib.dhp - dhp-build - 1.0.0-SNAPSHOT - - - dhp-build-assembly-resources - jar - - - - - org.apache.maven.plugins - maven-compiler-plugin - - - - - diff --git a/dhp-build/dhp-build-assembly-resources/src/main/resources/assemblies/oozie-installer.xml b/dhp-build/dhp-build-assembly-resources/src/main/resources/assemblies/oozie-installer.xml deleted file mode 100644 index 1419c5b1c..000000000 --- a/dhp-build/dhp-build-assembly-resources/src/main/resources/assemblies/oozie-installer.xml +++ /dev/null @@ -1,32 +0,0 @@ - - - oozie-installer - - dir - - - - - true - ${project.build.directory}/assembly-resources/commands - - / - - **/* - - 0755 - unix - - - / - diff --git a/dhp-build/dhp-build-assembly-resources/src/main/resources/assemblies/tests.xml b/dhp-build/dhp-build-assembly-resources/src/main/resources/assemblies/tests.xml deleted file mode 100644 index bf679e652..000000000 --- a/dhp-build/dhp-build-assembly-resources/src/main/resources/assemblies/tests.xml +++ /dev/null @@ -1,24 +0,0 @@ - - - tests - - jar - - false - - - ${project.build.testOutputDirectory} - - - - - - \ No newline at end of file diff --git a/dhp-build/dhp-build-assembly-resources/src/main/resources/commands/get_working_dir.sh b/dhp-build/dhp-build-assembly-resources/src/main/resources/commands/get_working_dir.sh deleted file mode 100644 index e9d55f0d7..000000000 --- a/dhp-build/dhp-build-assembly-resources/src/main/resources/commands/get_working_dir.sh +++ /dev/null @@ -1,3 +0,0 @@ -#!/bin/bash -hadoop fs -get ${workingDir} - diff --git a/dhp-build/dhp-build-assembly-resources/src/main/resources/commands/print_working_dir.sh b/dhp-build/dhp-build-assembly-resources/src/main/resources/commands/print_working_dir.sh deleted file mode 100644 index c79839ea4..000000000 --- a/dhp-build/dhp-build-assembly-resources/src/main/resources/commands/print_working_dir.sh +++ /dev/null @@ -1,5 +0,0 @@ -#!/bin/bash -echo "" -echo "---->Contents of the working directory" -hadoop fs -ls ${workingDir} - diff --git a/dhp-build/dhp-build-assembly-resources/src/main/resources/commands/readme.markdown b/dhp-build/dhp-build-assembly-resources/src/main/resources/commands/readme.markdown deleted file mode 100644 index 3e049c18b..000000000 --- a/dhp-build/dhp-build-assembly-resources/src/main/resources/commands/readme.markdown +++ /dev/null @@ -1,5 +0,0 @@ -Execute the scripts in the following order: - -1. `upload_workflow.sh` -2. `run_workflow.sh` -3. `print_working_dir.sh` or `get_working_dir.sh` diff --git a/dhp-build/dhp-build-assembly-resources/src/main/resources/commands/run_workflow.sh b/dhp-build/dhp-build-assembly-resources/src/main/resources/commands/run_workflow.sh deleted file mode 100644 index fee3d7737..000000000 --- a/dhp-build/dhp-build-assembly-resources/src/main/resources/commands/run_workflow.sh +++ /dev/null @@ -1,10 +0,0 @@ -#!/bin/bash - -if [ $# = 0 ] ; then - oozie job -oozie ${oozieServiceLoc} -config job.properties -run -else - oozie job -oozie ${oozieServiceLoc} -config $1/job.properties -run -fi - - - diff --git a/dhp-build/dhp-build-assembly-resources/src/main/resources/commands/upload_workflow.sh b/dhp-build/dhp-build-assembly-resources/src/main/resources/commands/upload_workflow.sh deleted file mode 100644 index c5d299c2f..000000000 --- a/dhp-build/dhp-build-assembly-resources/src/main/resources/commands/upload_workflow.sh +++ /dev/null @@ -1,34 +0,0 @@ -#!/bin/bash -exec 3>&1 -BASH_XTRACEFD=3 -set -x ## print every executed command - - -if [ $# = 0 ] ; then - target_dir_root=`pwd`'/${oozieAppDir}' -else - target_dir_root=`readlink -f $1`'/${oozieAppDir}' -fi - -# initial phase, creating symbolic links to jars in all subworkflows -# currently disabled -#libDir=$target_dir_root'/lib' -#dirs=`find $target_dir_root/* -maxdepth 10 -type d` -#for dir in $dirs -#do -# if [ -f $dir/workflow.xml ] -# then -# echo "creating symbolic links to jars in directory: $dir/lib" -# if [ ! -d "$dir/lib" ]; then -# mkdir $dir/lib -# fi -# find $libDir -type f -exec ln -s \{\} $dir/lib \; -# fi -#done - - -#uploading -hadoop fs -rm -r ${sandboxDir} -hadoop fs -mkdir -p ${sandboxDir} -hadoop fs -mkdir -p ${workingDir} -hadoop fs -put $target_dir_root ${sandboxDir} diff --git a/dhp-build/dhp-build-assembly-resources/src/main/resources/project-default.properties b/dhp-build/dhp-build-assembly-resources/src/main/resources/project-default.properties deleted file mode 100644 index 021ecf55b..000000000 --- a/dhp-build/dhp-build-assembly-resources/src/main/resources/project-default.properties +++ /dev/null @@ -1,7 +0,0 @@ -#sandboxName when not provided explicitly will be generated -sandboxName=${sandboxName} -sandboxDir=/user/${iis.hadoop.frontend.user.name}/${sandboxName} -workingDir=${sandboxDir}/working_dir -oozie.wf.application.path = ${nameNode}${sandboxDir}/${oozieAppDir} -oozieTopWfApplicationPath = ${oozie.wf.application.path} - diff --git a/dhp-build/dhp-build-assembly-resources/target/classes/assemblies/oozie-installer.xml b/dhp-build/dhp-build-assembly-resources/target/classes/assemblies/oozie-installer.xml deleted file mode 100644 index 1419c5b1c..000000000 --- a/dhp-build/dhp-build-assembly-resources/target/classes/assemblies/oozie-installer.xml +++ /dev/null @@ -1,32 +0,0 @@ - - - oozie-installer - - dir - - - - - true - ${project.build.directory}/assembly-resources/commands - - / - - **/* - - 0755 - unix - - - / - diff --git a/dhp-build/dhp-build-assembly-resources/target/classes/assemblies/tests.xml b/dhp-build/dhp-build-assembly-resources/target/classes/assemblies/tests.xml deleted file mode 100644 index bf679e652..000000000 --- a/dhp-build/dhp-build-assembly-resources/target/classes/assemblies/tests.xml +++ /dev/null @@ -1,24 +0,0 @@ - - - tests - - jar - - false - - - ${project.build.testOutputDirectory} - - - - - - \ No newline at end of file diff --git a/dhp-build/dhp-build-assembly-resources/target/classes/commands/get_working_dir.sh b/dhp-build/dhp-build-assembly-resources/target/classes/commands/get_working_dir.sh deleted file mode 100644 index e9d55f0d7..000000000 --- a/dhp-build/dhp-build-assembly-resources/target/classes/commands/get_working_dir.sh +++ /dev/null @@ -1,3 +0,0 @@ -#!/bin/bash -hadoop fs -get ${workingDir} - diff --git a/dhp-build/dhp-build-assembly-resources/target/classes/commands/print_working_dir.sh b/dhp-build/dhp-build-assembly-resources/target/classes/commands/print_working_dir.sh deleted file mode 100644 index c79839ea4..000000000 --- a/dhp-build/dhp-build-assembly-resources/target/classes/commands/print_working_dir.sh +++ /dev/null @@ -1,5 +0,0 @@ -#!/bin/bash -echo "" -echo "---->Contents of the working directory" -hadoop fs -ls ${workingDir} - diff --git a/dhp-build/dhp-build-assembly-resources/target/classes/commands/readme.markdown b/dhp-build/dhp-build-assembly-resources/target/classes/commands/readme.markdown deleted file mode 100644 index 3e049c18b..000000000 --- a/dhp-build/dhp-build-assembly-resources/target/classes/commands/readme.markdown +++ /dev/null @@ -1,5 +0,0 @@ -Execute the scripts in the following order: - -1. `upload_workflow.sh` -2. `run_workflow.sh` -3. `print_working_dir.sh` or `get_working_dir.sh` diff --git a/dhp-build/dhp-build-assembly-resources/target/classes/commands/run_workflow.sh b/dhp-build/dhp-build-assembly-resources/target/classes/commands/run_workflow.sh deleted file mode 100644 index fee3d7737..000000000 --- a/dhp-build/dhp-build-assembly-resources/target/classes/commands/run_workflow.sh +++ /dev/null @@ -1,10 +0,0 @@ -#!/bin/bash - -if [ $# = 0 ] ; then - oozie job -oozie ${oozieServiceLoc} -config job.properties -run -else - oozie job -oozie ${oozieServiceLoc} -config $1/job.properties -run -fi - - - diff --git a/dhp-build/dhp-build-assembly-resources/target/classes/commands/upload_workflow.sh b/dhp-build/dhp-build-assembly-resources/target/classes/commands/upload_workflow.sh deleted file mode 100644 index c5d299c2f..000000000 --- a/dhp-build/dhp-build-assembly-resources/target/classes/commands/upload_workflow.sh +++ /dev/null @@ -1,34 +0,0 @@ -#!/bin/bash -exec 3>&1 -BASH_XTRACEFD=3 -set -x ## print every executed command - - -if [ $# = 0 ] ; then - target_dir_root=`pwd`'/${oozieAppDir}' -else - target_dir_root=`readlink -f $1`'/${oozieAppDir}' -fi - -# initial phase, creating symbolic links to jars in all subworkflows -# currently disabled -#libDir=$target_dir_root'/lib' -#dirs=`find $target_dir_root/* -maxdepth 10 -type d` -#for dir in $dirs -#do -# if [ -f $dir/workflow.xml ] -# then -# echo "creating symbolic links to jars in directory: $dir/lib" -# if [ ! -d "$dir/lib" ]; then -# mkdir $dir/lib -# fi -# find $libDir -type f -exec ln -s \{\} $dir/lib \; -# fi -#done - - -#uploading -hadoop fs -rm -r ${sandboxDir} -hadoop fs -mkdir -p ${sandboxDir} -hadoop fs -mkdir -p ${workingDir} -hadoop fs -put $target_dir_root ${sandboxDir} diff --git a/dhp-build/dhp-build-assembly-resources/target/classes/project-default.properties b/dhp-build/dhp-build-assembly-resources/target/classes/project-default.properties deleted file mode 100644 index 021ecf55b..000000000 --- a/dhp-build/dhp-build-assembly-resources/target/classes/project-default.properties +++ /dev/null @@ -1,7 +0,0 @@ -#sandboxName when not provided explicitly will be generated -sandboxName=${sandboxName} -sandboxDir=/user/${iis.hadoop.frontend.user.name}/${sandboxName} -workingDir=${sandboxDir}/working_dir -oozie.wf.application.path = ${nameNode}${sandboxDir}/${oozieAppDir} -oozieTopWfApplicationPath = ${oozie.wf.application.path} - diff --git a/dhp-build/dhp-build-properties-maven-plugin/README.markdown b/dhp-build/dhp-build-properties-maven-plugin/README.markdown deleted file mode 100644 index f99c7c1b0..000000000 --- a/dhp-build/dhp-build-properties-maven-plugin/README.markdown +++ /dev/null @@ -1,6 +0,0 @@ -Maven plugin module utilized by `dhp-wf` for proper `job.properties` file building. - -It is based on http://site.kuali.org/maven/plugins/properties-maven-plugin/1.3.2/write-project-properties-mojo.html and supplemented with: - -* handling includePropertyKeysFromFiles property allowing writing only properties listed in given property files -As a final outcome only properties listed in `` element and listed as a keys in files from `` element will be written to output file. diff --git a/dhp-build/dhp-build-properties-maven-plugin/pom.xml b/dhp-build/dhp-build-properties-maven-plugin/pom.xml deleted file mode 100644 index 38093f4d1..000000000 --- a/dhp-build/dhp-build-properties-maven-plugin/pom.xml +++ /dev/null @@ -1,68 +0,0 @@ - - - - 4.0.0 - - - eu.dnetlib.dhp - dhp-build - 1.0.0-SNAPSHOT - - - dhp-build-properties-maven-plugin - maven-plugin - - - - - org.apache.maven - maven-plugin-api - 2.0 - - - org.apache.maven - maven-project - 2.0 - - - org.kuali.maven.plugins - properties-maven-plugin - 1.3.2 - - - - - - target - target/classes - ${project.artifactId}-${project.version} - target/test-classes - - - org.apache.maven.plugins - maven-compiler-plugin - - - org.apache.maven.plugins - maven-source-plugin - - - attach-sources - verify - - jar-no-fork - - - - - - org.apache.maven.plugins - maven-javadoc-plugin - - true - - - - - - diff --git a/dhp-build/dhp-build-properties-maven-plugin/src/main/java/eu/dnetlib/maven/plugin/properties/GenerateOoziePropertiesMojo.java b/dhp-build/dhp-build-properties-maven-plugin/src/main/java/eu/dnetlib/maven/plugin/properties/GenerateOoziePropertiesMojo.java deleted file mode 100644 index a3a99cc0c..000000000 --- a/dhp-build/dhp-build-properties-maven-plugin/src/main/java/eu/dnetlib/maven/plugin/properties/GenerateOoziePropertiesMojo.java +++ /dev/null @@ -1,71 +0,0 @@ -package eu.dnetlib.maven.plugin.properties; - -import java.io.File; -import java.util.ArrayList; -import java.util.List; - -import org.apache.commons.lang.ArrayUtils; -import org.apache.commons.lang.StringUtils; -import org.apache.maven.plugin.AbstractMojo; -import org.apache.maven.plugin.MojoExecutionException; -import org.apache.maven.plugin.MojoFailureException; - -/** - * Generates oozie properties which were not provided from commandline. - * @author mhorst - * - * @goal generate-properties - */ -public class GenerateOoziePropertiesMojo extends AbstractMojo { - - public static final String PROPERTY_NAME_WF_SOURCE_DIR = "workflow.source.dir"; - public static final String PROPERTY_NAME_SANDBOX_NAME = "sandboxName"; - - private final String[] limiters = {"iis", "dnetlib", "eu", "dhp"}; - - @Override - public void execute() throws MojoExecutionException, MojoFailureException { - if (System.getProperties().containsKey(PROPERTY_NAME_WF_SOURCE_DIR) && - !System.getProperties().containsKey(PROPERTY_NAME_SANDBOX_NAME)) { - String generatedSandboxName = generateSandboxName(System.getProperties().getProperty( - PROPERTY_NAME_WF_SOURCE_DIR)); - if (generatedSandboxName!=null) { - System.getProperties().setProperty(PROPERTY_NAME_SANDBOX_NAME, - generatedSandboxName); - } else { - System.out.println("unable to generate sandbox name from path: " + - System.getProperties().getProperty(PROPERTY_NAME_WF_SOURCE_DIR)); - } - } - } - - /** - * Generates sandbox name from workflow source directory. - * @param wfSourceDir - * @return generated sandbox name - */ - private String generateSandboxName(String wfSourceDir) { -// utilize all dir names until finding one of the limiters - List sandboxNameParts = new ArrayList(); - String[] tokens = StringUtils.split(wfSourceDir, File.separatorChar); - ArrayUtils.reverse(tokens); - if (tokens.length>0) { - for (String token : tokens) { - for (String limiter : limiters) { - if (limiter.equals(token)) { - return sandboxNameParts.size()>0? - StringUtils.join(sandboxNameParts.toArray()):null; - } - } - if (sandboxNameParts.size()>0) { - sandboxNameParts.add(0, File.separator); - } - sandboxNameParts.add(0, token); - } - return StringUtils.join(sandboxNameParts.toArray()); - } else { - return null; - } - } - -} diff --git a/dhp-build/dhp-build-properties-maven-plugin/src/main/java/eu/dnetlib/maven/plugin/properties/WritePredefinedProjectProperties.java b/dhp-build/dhp-build-properties-maven-plugin/src/main/java/eu/dnetlib/maven/plugin/properties/WritePredefinedProjectProperties.java deleted file mode 100644 index 62f04761a..000000000 --- a/dhp-build/dhp-build-properties-maven-plugin/src/main/java/eu/dnetlib/maven/plugin/properties/WritePredefinedProjectProperties.java +++ /dev/null @@ -1,436 +0,0 @@ -/** - * - * Licensed under the Educational Community License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.opensource.org/licenses/ecl2.php - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package eu.dnetlib.maven.plugin.properties; - -import java.io.File; -import java.io.FileInputStream; -import java.io.IOException; -import java.io.InputStream; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collections; -import java.util.Date; -import java.util.List; -import java.util.Map.Entry; -import java.util.Properties; -import java.util.Set; - -import org.apache.commons.io.FileUtils; -import org.apache.commons.io.IOUtils; -import org.apache.commons.lang.StringUtils; -import org.apache.maven.plugin.AbstractMojo; -import org.apache.maven.plugin.MojoExecutionException; -import org.apache.maven.plugin.MojoFailureException; -import org.apache.maven.project.MavenProject; -import org.springframework.core.io.DefaultResourceLoader; -import org.springframework.core.io.Resource; -import org.springframework.core.io.ResourceLoader; - -import edu.umd.cs.findbugs.annotations.SuppressFBWarnings; - -/** - * Writes project properties for the keys listed in specified properties files. - * Based on: - * http://site.kuali.org/maven/plugins/properties-maven-plugin/1.3.2/write-project-properties-mojo.html - - * @author mhorst - * @goal write-project-properties - */ -public class WritePredefinedProjectProperties extends AbstractMojo { - - private static final String CR = "\r"; - private static final String LF = "\n"; - private static final String TAB = "\t"; - protected static final String PROPERTY_PREFIX_ENV = "env."; - private static final String ENCODING_UTF8 = "utf8"; - - /** - * @parameter property="properties.includePropertyKeysFromFiles" - */ - private String[] includePropertyKeysFromFiles; - - /** - * @parameter default-value="${project}" - * @required - * @readonly - */ - protected MavenProject project; - - /** - * The file that properties will be written to - * - * @parameter property="properties.outputFile" - * default-value="${project.build.directory}/properties/project.properties"; - * @required - */ - protected File outputFile; - - /** - * If true, the plugin will silently ignore any non-existent properties files, and the build will continue - * - * @parameter property="properties.quiet" default-value="true" - */ - private boolean quiet; - - /** - * Comma separated list of characters to escape when writing property values. cr=carriage return, lf=linefeed, - * tab=tab. Any other values are taken literally. - * - * @parameter default-value="cr,lf,tab" property="properties.escapeChars" - */ - private String escapeChars; - - /** - * If true, the plugin will include system properties when writing the properties file. System properties override - * both environment variables and project properties. - * - * @parameter default-value="false" property="properties.includeSystemProperties" - */ - private boolean includeSystemProperties; - - /** - * If true, the plugin will include environment variables when writing the properties file. Environment variables - * are prefixed with "env". Environment variables override project properties. - * - * @parameter default-value="false" property="properties.includeEnvironmentVariables" - */ - private boolean includeEnvironmentVariables; - - /** - * Comma separated set of properties to exclude when writing the properties file - * - * @parameter property="properties.exclude" - */ - private String exclude; - - /** - * Comma separated set of properties to write to the properties file. If provided, only the properties matching - * those supplied here will be written to the properties file. - * - * @parameter property="properties.include" - */ - private String include; - - /* (non-Javadoc) - * @see org.apache.maven.plugin.AbstractMojo#execute() - */ - @Override - @SuppressFBWarnings({"NP_UNWRITTEN_FIELD","UWF_UNWRITTEN_FIELD"}) - public void execute() throws MojoExecutionException, MojoFailureException { - Properties properties = new Properties(); - // Add project properties - properties.putAll(project.getProperties()); - if (includeEnvironmentVariables) { - // Add environment variables, overriding any existing properties with the same key - properties.putAll(getEnvironmentVariables()); - } - if (includeSystemProperties) { - // Add system properties, overriding any existing properties with the same key - properties.putAll(System.getProperties()); - } - - // Remove properties as appropriate - trim(properties, exclude, include); - - String comment = "# " + new Date() + "\n"; - List escapeTokens = getEscapeChars(escapeChars); - - getLog().info("Creating " + outputFile); - writeProperties(outputFile, comment, properties, escapeTokens); - } - - /** - * Provides environment variables. - * @return environment variables - */ - protected static Properties getEnvironmentVariables() { - Properties props = new Properties(); - for (Entry entry : System.getenv().entrySet()) { - props.setProperty(PROPERTY_PREFIX_ENV + entry.getKey(), entry.getValue()); - } - return props; - } - - /** - * Removes properties which should not be written. - * @param properties - * @param omitCSV - * @param includeCSV - * @throws MojoExecutionException - */ - protected void trim(Properties properties, String omitCSV, String includeCSV) throws MojoExecutionException { - List omitKeys = getListFromCSV(omitCSV); - for (String key : omitKeys) { - properties.remove(key); - } - - List includeKeys = getListFromCSV(includeCSV); -// mh: including keys from predefined properties - if (includePropertyKeysFromFiles!=null && includePropertyKeysFromFiles.length>0) { - for (String currentIncludeLoc : includePropertyKeysFromFiles) { - if (validate(currentIncludeLoc)) { - Properties p = getProperties(currentIncludeLoc); - for (String key : p.stringPropertyNames()) { - includeKeys.add(key); - } - } - } - } - if (includeKeys!=null && !includeKeys.isEmpty()) { -// removing only when include keys provided - Set keys = properties.stringPropertyNames(); - for (String key : keys) { - if (!includeKeys.contains(key)) { - properties.remove(key); - } - } - } - } - - /** - * Checks whether file exists. - * @param location - * @return true when exists, false otherwise. - */ - protected boolean exists(String location) { - if (StringUtils.isBlank(location)) { - return false; - } - File file = new File(location); - if (file.exists()) { - return true; - } - ResourceLoader loader = new DefaultResourceLoader(); - Resource resource = loader.getResource(location); - return resource.exists(); - } - - /** - * Validates resource location. - * @param location - * @return true when valid, false otherwise - * @throws MojoExecutionException - */ - protected boolean validate(String location) throws MojoExecutionException { - boolean exists = exists(location); - if (exists) { - return true; - } - if (quiet) { - getLog().info("Ignoring non-existent properties file '" + location + "'"); - return false; - } else { - throw new MojoExecutionException("Non-existent properties file '" + location + "'"); - } - } - - /** - * Provides input stream. - * @param location - * @return input stream - * @throws IOException - */ - protected InputStream getInputStream(String location) throws IOException { - File file = new File(location); - if (file.exists()) { - return new FileInputStream(location); - } - ResourceLoader loader = new DefaultResourceLoader(); - Resource resource = loader.getResource(location); - return resource.getInputStream(); - } - - /** - * Creates properties for given location. - * @param location - * @return properties for given location - * @throws MojoExecutionException - */ - protected Properties getProperties(String location) throws MojoExecutionException { - InputStream in = null; - try { - Properties properties = new Properties(); - in = getInputStream(location); - if (location.toLowerCase().endsWith(".xml")) { - properties.loadFromXML(in); - } else { - properties.load(in); - } - return properties; - } catch (IOException e) { - throw new MojoExecutionException("Error reading properties file " + location, e); - } finally { - IOUtils.closeQuietly(in); - } - } - - /** - * Provides escape characters. - * @param escapeChars - * @return escape characters - */ - protected List getEscapeChars(String escapeChars) { - List tokens = getListFromCSV(escapeChars); - List realTokens = new ArrayList(); - for (String token : tokens) { - String realToken = getRealToken(token); - realTokens.add(realToken); - } - return realTokens; - } - - /** - * Provides real token. - * @param token - * @return real token - */ - protected String getRealToken(String token) { - if (token.equalsIgnoreCase("CR")) { - return CR; - } else if (token.equalsIgnoreCase("LF")) { - return LF; - } else if (token.equalsIgnoreCase("TAB")) { - return TAB; - } else { - return token; - } - } - - /** - * Returns content. - * @param comment - * @param properties - * @param escapeTokens - * @return content - */ - protected String getContent(String comment, Properties properties, List escapeTokens) { - List names = new ArrayList(properties.stringPropertyNames()); - Collections.sort(names); - StringBuilder sb = new StringBuilder(); - if (!StringUtils.isBlank(comment)) { - sb.append(comment); - } - for (String name : names) { - String value = properties.getProperty(name); - String escapedValue = escape(value, escapeTokens); - sb.append(name + "=" + escapedValue + "\n"); - } - return sb.toString(); - } - - /** - * Writes properties to given file. - * @param file - * @param comment - * @param properties - * @param escapeTokens - * @throws MojoExecutionException - */ - protected void writeProperties(File file, String comment, Properties properties, List escapeTokens) - throws MojoExecutionException { - try { - String content = getContent(comment, properties, escapeTokens); - FileUtils.writeStringToFile(file, content, ENCODING_UTF8); - } catch (IOException e) { - throw new MojoExecutionException("Error creating properties file", e); - } - } - - /** - * Escapes characters. - * @param s - * @param escapeChars - * @return - */ - protected String escape(String s, List escapeChars) { - String result = s; - for (String escapeChar : escapeChars) { - result = result.replace(escapeChar, getReplacementToken(escapeChar)); - } - return result; - } - - /** - * Provides replacement token. - * @param escapeChar - * @return replacement token - */ - protected String getReplacementToken(String escapeChar) { - if (escapeChar.equals(CR)) { - return "\\r"; - } else if (escapeChar.equals(LF)) { - return "\\n"; - } else if (escapeChar.equals(TAB)) { - return "\\t"; - } else { - return "\\" + escapeChar; - } - } - - /** - * Returns list from csv. - * @param csv - * @return list of values generated from CSV - */ - protected static final List getListFromCSV(String csv) { - if (StringUtils.isBlank(csv)) { - return new ArrayList(); - } - List list = new ArrayList(); - String[] tokens = StringUtils.split(csv, ","); - for (String token : tokens) { - list.add(token.trim()); - } - return list; - } - - public void setIncludeSystemProperties(boolean includeSystemProperties) { - this.includeSystemProperties = includeSystemProperties; - } - - public void setEscapeChars(String escapeChars) { - this.escapeChars = escapeChars; - } - - public void setIncludeEnvironmentVariables(boolean includeEnvironmentVariables) { - this.includeEnvironmentVariables = includeEnvironmentVariables; - } - - public void setExclude(String exclude) { - this.exclude = exclude; - } - - public void setInclude(String include) { - this.include = include; - } - - public void setQuiet(boolean quiet) { - this.quiet = quiet; - } - - /** - * Sets property files for which keys properties should be included. - * @param includePropertyKeysFromFiles - */ - public void setIncludePropertyKeysFromFiles( - String[] includePropertyKeysFromFiles) { - if (includePropertyKeysFromFiles!=null) { - this.includePropertyKeysFromFiles = Arrays.copyOf( - includePropertyKeysFromFiles, - includePropertyKeysFromFiles.length); - } - } - -} \ No newline at end of file diff --git a/dhp-build/dhp-build-properties-maven-plugin/src/test/java/eu/dnetlib/maven/plugin/properties/GenerateOoziePropertiesMojoTest.java b/dhp-build/dhp-build-properties-maven-plugin/src/test/java/eu/dnetlib/maven/plugin/properties/GenerateOoziePropertiesMojoTest.java deleted file mode 100644 index 8a763c1bd..000000000 --- a/dhp-build/dhp-build-properties-maven-plugin/src/test/java/eu/dnetlib/maven/plugin/properties/GenerateOoziePropertiesMojoTest.java +++ /dev/null @@ -1,101 +0,0 @@ -package eu.dnetlib.maven.plugin.properties; - -import static eu.dnetlib.maven.plugin.properties.GenerateOoziePropertiesMojo.PROPERTY_NAME_SANDBOX_NAME; -import static eu.dnetlib.maven.plugin.properties.GenerateOoziePropertiesMojo.PROPERTY_NAME_WF_SOURCE_DIR; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertNull; - -import org.junit.Before; -import org.junit.Test; - -/** - * @author mhorst - * - */ -public class GenerateOoziePropertiesMojoTest { - - private GenerateOoziePropertiesMojo mojo = new GenerateOoziePropertiesMojo(); - - @Before - public void clearSystemProperties() { - System.clearProperty(PROPERTY_NAME_SANDBOX_NAME); - System.clearProperty(PROPERTY_NAME_WF_SOURCE_DIR); - } - - @Test - public void testExecuteEmpty() throws Exception { - // execute - mojo.execute(); - - // assert - assertNull(System.getProperty(PROPERTY_NAME_SANDBOX_NAME)); - } - - @Test - public void testExecuteSandboxNameAlreadySet() throws Exception { - // given - String workflowSourceDir = "eu/dnetlib/iis/wf/transformers"; - String sandboxName = "originalSandboxName"; - System.setProperty(PROPERTY_NAME_WF_SOURCE_DIR, workflowSourceDir); - System.setProperty(PROPERTY_NAME_SANDBOX_NAME, sandboxName); - - // execute - mojo.execute(); - - // assert - assertEquals(sandboxName, System.getProperty(PROPERTY_NAME_SANDBOX_NAME)); - } - - @Test - public void testExecuteEmptyWorkflowSourceDir() throws Exception { - // given - String workflowSourceDir = ""; - System.setProperty(PROPERTY_NAME_WF_SOURCE_DIR, workflowSourceDir); - - // execute - mojo.execute(); - - // assert - assertNull(System.getProperty(PROPERTY_NAME_SANDBOX_NAME)); - } - - @Test - public void testExecuteNullSandboxNameGenerated() throws Exception { - // given - String workflowSourceDir = "eu/dnetlib/iis/"; - System.setProperty(PROPERTY_NAME_WF_SOURCE_DIR, workflowSourceDir); - - // execute - mojo.execute(); - - // assert - assertNull(System.getProperty(PROPERTY_NAME_SANDBOX_NAME)); - } - - @Test - public void testExecute() throws Exception { - // given - String workflowSourceDir = "eu/dnetlib/iis/wf/transformers"; - System.setProperty(PROPERTY_NAME_WF_SOURCE_DIR, workflowSourceDir); - - // execute - mojo.execute(); - - // assert - assertEquals("wf/transformers", System.getProperty(PROPERTY_NAME_SANDBOX_NAME)); - } - - @Test - public void testExecuteWithoutRoot() throws Exception { - // given - String workflowSourceDir = "wf/transformers"; - System.setProperty(PROPERTY_NAME_WF_SOURCE_DIR, workflowSourceDir); - - // execute - mojo.execute(); - - // assert - assertEquals("wf/transformers", System.getProperty(PROPERTY_NAME_SANDBOX_NAME)); - } - -} diff --git a/dhp-build/dhp-build-properties-maven-plugin/src/test/java/eu/dnetlib/maven/plugin/properties/WritePredefinedProjectPropertiesTest.java b/dhp-build/dhp-build-properties-maven-plugin/src/test/java/eu/dnetlib/maven/plugin/properties/WritePredefinedProjectPropertiesTest.java deleted file mode 100644 index 51d9575ff..000000000 --- a/dhp-build/dhp-build-properties-maven-plugin/src/test/java/eu/dnetlib/maven/plugin/properties/WritePredefinedProjectPropertiesTest.java +++ /dev/null @@ -1,365 +0,0 @@ -package eu.dnetlib.maven.plugin.properties; - -import static eu.dnetlib.maven.plugin.properties.WritePredefinedProjectProperties.PROPERTY_PREFIX_ENV; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertTrue; -import static org.mockito.Mockito.doReturn; - -import java.io.File; -import java.io.FileInputStream; -import java.io.FileNotFoundException; -import java.io.FileOutputStream; -import java.io.FileWriter; -import java.io.IOException; -import java.util.Properties; - -import org.apache.maven.plugin.MojoExecutionException; -import org.apache.maven.project.MavenProject; -import org.junit.Before; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.TemporaryFolder; -import org.junit.runner.RunWith; -import org.mockito.Mock; -import org.mockito.runners.MockitoJUnitRunner; - - -/** - * @author mhorst - * - */ -@RunWith(MockitoJUnitRunner.class) -public class WritePredefinedProjectPropertiesTest { - - @Rule - public TemporaryFolder testFolder = new TemporaryFolder(); - - @Mock - private MavenProject mavenProject; - - private WritePredefinedProjectProperties mojo; - - @Before - public void init() { - mojo = new WritePredefinedProjectProperties(); - mojo.outputFile = getPropertiesFileLocation(); - mojo.project = mavenProject; - doReturn(new Properties()).when(mavenProject).getProperties(); - } - - // ----------------------------------- TESTS --------------------------------------------- - - @Test - public void testExecuteEmpty() throws Exception { - // execute - mojo.execute(); - - // assert - assertTrue(mojo.outputFile.exists()); - Properties storedProperties = getStoredProperties(); - assertEquals(0, storedProperties.size()); - } - - @Test - public void testExecuteWithProjectProperties() throws Exception { - // given - String key = "projectPropertyKey"; - String value = "projectPropertyValue"; - Properties projectProperties = new Properties(); - projectProperties.setProperty(key, value); - doReturn(projectProperties).when(mavenProject).getProperties(); - - // execute - mojo.execute(); - - // assert - assertTrue(mojo.outputFile.exists()); - Properties storedProperties = getStoredProperties(); - assertEquals(1, storedProperties.size()); - assertTrue(storedProperties.containsKey(key)); - assertEquals(value, storedProperties.getProperty(key)); - } - - @Test(expected=MojoExecutionException.class) - public void testExecuteWithProjectPropertiesAndInvalidOutputFile() throws Exception { - // given - String key = "projectPropertyKey"; - String value = "projectPropertyValue"; - Properties projectProperties = new Properties(); - projectProperties.setProperty(key, value); - doReturn(projectProperties).when(mavenProject).getProperties(); - mojo.outputFile = testFolder.getRoot(); - - // execute - mojo.execute(); - } - - @Test - public void testExecuteWithProjectPropertiesExclusion() throws Exception { - // given - String key = "projectPropertyKey"; - String value = "projectPropertyValue"; - String excludedKey = "excludedPropertyKey"; - String excludedValue = "excludedPropertyValue"; - Properties projectProperties = new Properties(); - projectProperties.setProperty(key, value); - projectProperties.setProperty(excludedKey, excludedValue); - doReturn(projectProperties).when(mavenProject).getProperties(); - mojo.setExclude(excludedKey); - - // execute - mojo.execute(); - - // assert - assertTrue(mojo.outputFile.exists()); - Properties storedProperties = getStoredProperties(); - assertEquals(1, storedProperties.size()); - assertTrue(storedProperties.containsKey(key)); - assertEquals(value, storedProperties.getProperty(key)); - } - - @Test - public void testExecuteWithProjectPropertiesInclusion() throws Exception { - // given - String key = "projectPropertyKey"; - String value = "projectPropertyValue"; - String includedKey = "includedPropertyKey"; - String includedValue = "includedPropertyValue"; - Properties projectProperties = new Properties(); - projectProperties.setProperty(key, value); - projectProperties.setProperty(includedKey, includedValue); - doReturn(projectProperties).when(mavenProject).getProperties(); - mojo.setInclude(includedKey); - - // execute - mojo.execute(); - - // assert - assertTrue(mojo.outputFile.exists()); - Properties storedProperties = getStoredProperties(); - assertEquals(1, storedProperties.size()); - assertTrue(storedProperties.containsKey(includedKey)); - assertEquals(includedValue, storedProperties.getProperty(includedKey)); - } - - @Test - public void testExecuteIncludingPropertyKeysFromFile() throws Exception { - // given - String key = "projectPropertyKey"; - String value = "projectPropertyValue"; - String includedKey = "includedPropertyKey"; - String includedValue = "includedPropertyValue"; - Properties projectProperties = new Properties(); - projectProperties.setProperty(key, value); - projectProperties.setProperty(includedKey, includedValue); - doReturn(projectProperties).when(mavenProject).getProperties(); - - File includedPropertiesFile = new File(testFolder.getRoot(), "included.properties"); - Properties includedProperties = new Properties(); - includedProperties.setProperty(includedKey, "irrelevantValue"); - includedProperties.store(new FileWriter(includedPropertiesFile), null); - - mojo.setIncludePropertyKeysFromFiles(new String[] {includedPropertiesFile.getAbsolutePath()}); - - // execute - mojo.execute(); - - // assert - assertTrue(mojo.outputFile.exists()); - Properties storedProperties = getStoredProperties(); - assertEquals(1, storedProperties.size()); - assertTrue(storedProperties.containsKey(includedKey)); - assertEquals(includedValue, storedProperties.getProperty(includedKey)); - } - - @Test - public void testExecuteIncludingPropertyKeysFromClasspathResource() throws Exception { - // given - String key = "projectPropertyKey"; - String value = "projectPropertyValue"; - String includedKey = "includedPropertyKey"; - String includedValue = "includedPropertyValue"; - Properties projectProperties = new Properties(); - projectProperties.setProperty(key, value); - projectProperties.setProperty(includedKey, includedValue); - doReturn(projectProperties).when(mavenProject).getProperties(); - - mojo.setIncludePropertyKeysFromFiles(new String[] {"/eu/dnetlib/maven/plugin/properties/included.properties"}); - - // execute - mojo.execute(); - - // assert - assertTrue(mojo.outputFile.exists()); - Properties storedProperties = getStoredProperties(); - assertEquals(1, storedProperties.size()); - assertTrue(storedProperties.containsKey(includedKey)); - assertEquals(includedValue, storedProperties.getProperty(includedKey)); - } - - @Test(expected=MojoExecutionException.class) - public void testExecuteIncludingPropertyKeysFromBlankLocation() throws Exception { - // given - String key = "projectPropertyKey"; - String value = "projectPropertyValue"; - String includedKey = "includedPropertyKey"; - String includedValue = "includedPropertyValue"; - Properties projectProperties = new Properties(); - projectProperties.setProperty(key, value); - projectProperties.setProperty(includedKey, includedValue); - doReturn(projectProperties).when(mavenProject).getProperties(); - - mojo.setIncludePropertyKeysFromFiles(new String[] {""}); - - // execute - mojo.execute(); - } - - @Test - public void testExecuteIncludingPropertyKeysFromXmlFile() throws Exception { - // given - String key = "projectPropertyKey"; - String value = "projectPropertyValue"; - String includedKey = "includedPropertyKey"; - String includedValue = "includedPropertyValue"; - Properties projectProperties = new Properties(); - projectProperties.setProperty(key, value); - projectProperties.setProperty(includedKey, includedValue); - doReturn(projectProperties).when(mavenProject).getProperties(); - - File includedPropertiesFile = new File(testFolder.getRoot(), "included.xml"); - Properties includedProperties = new Properties(); - includedProperties.setProperty(includedKey, "irrelevantValue"); - includedProperties.storeToXML(new FileOutputStream(includedPropertiesFile), null); - - mojo.setIncludePropertyKeysFromFiles(new String[] {includedPropertiesFile.getAbsolutePath()}); - - // execute - mojo.execute(); - - // assert - assertTrue(mojo.outputFile.exists()); - Properties storedProperties = getStoredProperties(); - assertEquals(1, storedProperties.size()); - assertTrue(storedProperties.containsKey(includedKey)); - assertEquals(includedValue, storedProperties.getProperty(includedKey)); - } - - @Test(expected=MojoExecutionException.class) - public void testExecuteIncludingPropertyKeysFromInvalidXmlFile() throws Exception { - // given - String key = "projectPropertyKey"; - String value = "projectPropertyValue"; - String includedKey = "includedPropertyKey"; - String includedValue = "includedPropertyValue"; - Properties projectProperties = new Properties(); - projectProperties.setProperty(key, value); - projectProperties.setProperty(includedKey, includedValue); - doReturn(projectProperties).when(mavenProject).getProperties(); - - File includedPropertiesFile = new File(testFolder.getRoot(), "included.xml"); - Properties includedProperties = new Properties(); - includedProperties.setProperty(includedKey, "irrelevantValue"); - includedProperties.store(new FileOutputStream(includedPropertiesFile), null); - - mojo.setIncludePropertyKeysFromFiles(new String[] {includedPropertiesFile.getAbsolutePath()}); - - // execute - mojo.execute(); - } - - @Test - public void testExecuteWithQuietModeOn() throws Exception { - // given - mojo.setQuiet(true); - mojo.setIncludePropertyKeysFromFiles(new String[] {"invalid location"}); - - // execute - mojo.execute(); - - // assert - assertTrue(mojo.outputFile.exists()); - Properties storedProperties = getStoredProperties(); - assertEquals(0, storedProperties.size()); - } - - @Test(expected=MojoExecutionException.class) - public void testExecuteIncludingPropertyKeysFromInvalidFile() throws Exception { - // given - mojo.setIncludePropertyKeysFromFiles(new String[] {"invalid location"}); - - // execute - mojo.execute(); - } - - @Test - public void testExecuteWithEnvironmentProperties() throws Exception { - // given - mojo.setIncludeEnvironmentVariables(true); - - // execute - mojo.execute(); - - // assert - assertTrue(mojo.outputFile.exists()); - Properties storedProperties = getStoredProperties(); - assertTrue(storedProperties.size() > 0); - for (Object currentKey : storedProperties.keySet()) { - assertTrue(((String)currentKey).startsWith(PROPERTY_PREFIX_ENV)); - } - } - - @Test - public void testExecuteWithSystemProperties() throws Exception { - // given - String key = "systemPropertyKey"; - String value = "systemPropertyValue"; - System.setProperty(key, value); - mojo.setIncludeSystemProperties(true); - - // execute - mojo.execute(); - - // assert - assertTrue(mojo.outputFile.exists()); - Properties storedProperties = getStoredProperties(); - assertTrue(storedProperties.size() > 0); - assertTrue(storedProperties.containsKey(key)); - assertEquals(value, storedProperties.getProperty(key)); - } - - @Test - public void testExecuteWithSystemPropertiesAndEscapeChars() throws Exception { - // given - String key = "systemPropertyKey "; - String value = "systemPropertyValue"; - System.setProperty(key, value); - mojo.setIncludeSystemProperties(true); - String escapeChars = "cr,lf,tab,|"; - mojo.setEscapeChars(escapeChars); - - // execute - mojo.execute(); - - // assert - assertTrue(mojo.outputFile.exists()); - Properties storedProperties = getStoredProperties(); - assertTrue(storedProperties.size() > 0); - assertFalse(storedProperties.containsKey(key)); - assertTrue(storedProperties.containsKey(key.trim())); - assertEquals(value, storedProperties.getProperty(key.trim())); - } - - // ----------------------------------- PRIVATE ------------------------------------------- - - private File getPropertiesFileLocation() { - return new File(testFolder.getRoot(), "test.properties"); - } - - private Properties getStoredProperties() throws FileNotFoundException, IOException { - Properties properties = new Properties(); - properties.load(new FileInputStream(getPropertiesFileLocation())); - return properties; - } -} diff --git a/dhp-build/dhp-build-properties-maven-plugin/src/test/resources/eu/dnetlib/maven/plugin/properties/included.properties b/dhp-build/dhp-build-properties-maven-plugin/src/test/resources/eu/dnetlib/maven/plugin/properties/included.properties deleted file mode 100644 index 3c79fe6cb..000000000 --- a/dhp-build/dhp-build-properties-maven-plugin/src/test/resources/eu/dnetlib/maven/plugin/properties/included.properties +++ /dev/null @@ -1 +0,0 @@ -includedPropertyKey=irrelevantValue \ No newline at end of file diff --git a/dhp-build/dhp-build-properties-maven-plugin/target/classes/META-INF/maven/plugin.xml b/dhp-build/dhp-build-properties-maven-plugin/target/classes/META-INF/maven/plugin.xml deleted file mode 100644 index 03188dc53..000000000 --- a/dhp-build/dhp-build-properties-maven-plugin/target/classes/META-INF/maven/plugin.xml +++ /dev/null @@ -1,281 +0,0 @@ - - - dhp-build-properties-maven-plugin - - eu.dnetlib.dhp - dhp-build-properties-maven-plugin - 1.0.0-SNAPSHOT - dhp-build-properties - false - true - - - generate-properties - Generates oozie properties which were not provided from commandline. - false - true - false - false - false - true - eu.dnetlib.maven.plugin.properties.GenerateOoziePropertiesMojo - java - per-lookup - once-per-session - false - - - - write-project-properties - Writes project properties for the keys listed in specified properties files. -Based on: -http://site.kuali.org/maven/plugins/properties-maven-plugin/1.3.2/write-project-properties-mojo.html - false - true - false - false - false - true - eu.dnetlib.maven.plugin.properties.WritePredefinedProjectProperties - java - per-lookup - once-per-session - false - - - properties.escapeChars - java.lang.String - false - true - Comma separated list of characters to escape when writing property values. cr=carriage return, lf=linefeed, -tab=tab. Any other values are taken literally. - - - properties.exclude - java.lang.String - false - true - Comma separated set of properties to exclude when writing the properties file - - - properties.include - java.lang.String - false - true - Comma separated set of properties to write to the properties file. If provided, only the properties matching -those supplied here will be written to the properties file. - - - properties.includeEnvironmentVariables - boolean - false - true - If true, the plugin will include environment variables when writing the properties file. Environment variables -are prefixed with "env". Environment variables override project properties. - - - properties.includePropertyKeysFromFiles - java.lang.String[] - false - true - - - - properties.includeSystemProperties - boolean - false - true - If true, the plugin will include system properties when writing the properties file. System properties override -both environment variables and project properties. - - - properties.outputFile - java.io.File - true - true - The file that properties will be written to - - - project - org.apache.maven.project.MavenProject - true - false - - - - properties.quiet - boolean - false - true - If true, the plugin will silently ignore any non-existent properties files, and the build will continue - - - - - - - - - - - - - - - org.apache.maven - maven-plugin-api - jar - 2.0 - - - org.apache.maven - maven-project - jar - 2.0 - - - org.apache.maven - maven-profile - jar - 2.0 - - - org.apache.maven - maven-model - jar - 2.0 - - - org.apache.maven - maven-artifact-manager - jar - 2.0 - - - org.apache.maven - maven-repository-metadata - jar - 2.0 - - - org.apache.maven.wagon - wagon-provider-api - jar - 1.0-alpha-5 - - - org.codehaus.plexus - plexus-utils - jar - 1.0.4 - - - org.apache.maven - maven-artifact - jar - 2.0 - - - org.codehaus.plexus - plexus-container-default - jar - 1.0-alpha-8 - - - classworlds - classworlds - jar - 1.1-alpha-2 - - - org.kuali.maven.plugins - properties-maven-plugin - jar - 1.3.2 - - - org.springframework - spring-core - jar - 3.1.1.RELEASE - - - org.springframework - spring-asm - jar - 3.1.1.RELEASE - - - org.jasypt - jasypt - jar - 1.9.0 - - - org.kuali.maven.common - maven-kuali-common - jar - 1.2.8 - - - org.apache.ant - ant - jar - 1.8.2 - - - org.apache.ant - ant-launcher - jar - 1.8.2 - - - org.codehaus.plexus - plexus-interpolation - jar - 1.15 - - - commons-lang - commons-lang - jar - 2.6 - - - commons-io - commons-io - jar - 2.5 - - - org.slf4j - jcl-over-slf4j - jar - 1.6.4 - - - org.slf4j - slf4j-api - jar - 1.7.22 - - - org.slf4j - slf4j-log4j12 - jar - 1.7.22 - - - log4j - log4j - jar - 1.2.17 - - - javax.servlet - javax.servlet-api - jar - 3.1.0 - - - \ No newline at end of file diff --git a/dhp-build/dhp-build-properties-maven-plugin/target/classes/eu/dnetlib/maven/plugin/properties/GenerateOoziePropertiesMojo.class b/dhp-build/dhp-build-properties-maven-plugin/target/classes/eu/dnetlib/maven/plugin/properties/GenerateOoziePropertiesMojo.class deleted file mode 100644 index 3eeb323f7..000000000 Binary files a/dhp-build/dhp-build-properties-maven-plugin/target/classes/eu/dnetlib/maven/plugin/properties/GenerateOoziePropertiesMojo.class and /dev/null differ diff --git a/dhp-build/dhp-build-properties-maven-plugin/target/classes/eu/dnetlib/maven/plugin/properties/WritePredefinedProjectProperties.class b/dhp-build/dhp-build-properties-maven-plugin/target/classes/eu/dnetlib/maven/plugin/properties/WritePredefinedProjectProperties.class deleted file mode 100644 index e09929dea..000000000 Binary files a/dhp-build/dhp-build-properties-maven-plugin/target/classes/eu/dnetlib/maven/plugin/properties/WritePredefinedProjectProperties.class and /dev/null differ diff --git a/dhp-build/dhp-build-properties-maven-plugin/target/maven-status/maven-compiler-plugin/compile/default-compile/createdFiles.lst b/dhp-build/dhp-build-properties-maven-plugin/target/maven-status/maven-compiler-plugin/compile/default-compile/createdFiles.lst deleted file mode 100644 index 5c141f830..000000000 --- a/dhp-build/dhp-build-properties-maven-plugin/target/maven-status/maven-compiler-plugin/compile/default-compile/createdFiles.lst +++ /dev/null @@ -1,2 +0,0 @@ -eu/dnetlib/maven/plugin/properties/WritePredefinedProjectProperties.class -eu/dnetlib/maven/plugin/properties/GenerateOoziePropertiesMojo.class diff --git a/dhp-build/dhp-build-properties-maven-plugin/target/maven-status/maven-compiler-plugin/compile/default-compile/inputFiles.lst b/dhp-build/dhp-build-properties-maven-plugin/target/maven-status/maven-compiler-plugin/compile/default-compile/inputFiles.lst deleted file mode 100644 index cac9348aa..000000000 --- a/dhp-build/dhp-build-properties-maven-plugin/target/maven-status/maven-compiler-plugin/compile/default-compile/inputFiles.lst +++ /dev/null @@ -1,2 +0,0 @@ -/Users/claudio/workspace/dnet-hadoop/dhp-build/dhp-build-properties-maven-plugin/src/main/java/eu/dnetlib/maven/plugin/properties/GenerateOoziePropertiesMojo.java -/Users/claudio/workspace/dnet-hadoop/dhp-build/dhp-build-properties-maven-plugin/src/main/java/eu/dnetlib/maven/plugin/properties/WritePredefinedProjectProperties.java diff --git a/dhp-build/pom.xml b/dhp-build/pom.xml index a930af4ea..bc95d979a 100644 --- a/dhp-build/pom.xml +++ b/dhp-build/pom.xml @@ -9,8 +9,7 @@ dhp-build pom - dhp-build-assembly-resources - dhp-build-properties-maven-plugin + diff --git a/dhp-collector-worker/pom.xml b/dhp-collector-worker/pom.xml index aa0e1c917..e187d86f1 100644 --- a/dhp-collector-worker/pom.xml +++ b/dhp-collector-worker/pom.xml @@ -3,13 +3,15 @@ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> - dhp - eu.dnetlib.dhp - 1.0.0-SNAPSHOT + org.springframework.boot + spring-boot-starter-parent + 2.1.3.RELEASE 4.0.0 + eu.dnetlib dhp-collector-worker + 1.0.0 diff --git a/dhp-collector-worker/src/main/java/eu/dnetlib/collector/worker/DnetCollectorWorkerApplication.java b/dhp-collector-worker/src/main/java/eu/dnetlib/collector/worker/DnetCollectorWorkerApplication.java index b10f11c5f..a0fb1a4d4 100644 --- a/dhp-collector-worker/src/main/java/eu/dnetlib/collector/worker/DnetCollectorWorkerApplication.java +++ b/dhp-collector-worker/src/main/java/eu/dnetlib/collector/worker/DnetCollectorWorkerApplication.java @@ -1,5 +1,12 @@ package eu.dnetlib.collector.worker; +import com.fasterxml.jackson.databind.ObjectMapper; +import eu.dnetlib.collector.worker.model.ApiDescriptor; +import eu.dnetlib.collector.worker.plugins.CollectorPlugin; +import eu.dnetlib.collector.worker.utils.CollectorPluginEnumerator; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.SequenceFile; import org.apache.hadoop.io.Text; @@ -10,21 +17,21 @@ import org.springframework.boot.CommandLineRunner; import org.springframework.boot.SpringApplication; import org.springframework.boot.autoconfigure.SpringBootApplication; -import com.fasterxml.jackson.databind.ObjectMapper; - -import eu.dnetlib.collector.worker.model.ApiDescriptor; -import eu.dnetlib.collector.worker.plugins.CollectorPlugin; -import eu.dnetlib.collector.worker.utils.CollectorPluginEnumerator; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FSDataOutputStream; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; - import java.io.IOException; import java.net.URI; import java.util.concurrent.atomic.AtomicInteger; -import java.util.stream.Stream; + +/** + * + * DnetCollectortWorkerApplication is the main class responsible to start + * the Dnet Collection into HDFS. + * This module will be executed on the hadoop cluster and taking in input some parameters + * that tells it which is the right collector plugin to use and where store the data into HDFS path + * + * + * @author Sandro La Bruzzo + */ @SpringBootApplication public class DnetCollectorWorkerApplication implements CommandLineRunner { @@ -42,7 +49,9 @@ public class DnetCollectorWorkerApplication implements CommandLineRunner { } /** - * + * This module expect two arguments: + * path hdfs where store the sequential file. + * Json serialization of {@link ApiDescriptor} */ @Override public void run(final String... args) throws Exception { @@ -70,12 +79,9 @@ public class DnetCollectorWorkerApplication implements CommandLineRunner { // Because of Maven conf.set("fs.hdfs.impl", org.apache.hadoop.hdfs.DistributedFileSystem.class.getName()); conf.set("fs.file.impl", org.apache.hadoop.fs.LocalFileSystem.class.getName()); - // Set HADOOP user - System.setProperty("HADOOP_USER_NAME", "sandro.labruzzo"); System.setProperty("hadoop.home.dir", "/"); //Get the filesystem - HDFS FileSystem fs = FileSystem.get(URI.create(hdfsuri), conf); - Path hdfswritepath = new Path(hdfsPath); log.info("Created path "+hdfswritepath.toString()); diff --git a/dhp-common/pom.xml b/dhp-common/pom.xml index 42b8864fa..51db097b7 100644 --- a/dhp-common/pom.xml +++ b/dhp-common/pom.xml @@ -15,163 +15,10 @@ - - ${project.groupId} - dhp-schemas - ${project.version} - - - org.apache.oozie - oozie-core - provided - - - - org.apache.hadoop - hadoop-mapreduce-client-core - - - - org.apache.hadoop - hadoop-common - - - - org.apache.spark - spark-core_2.10 - - - - org.apache.spark - spark-sql_2.10 - - - - org.apache.avro - avro - - - - org.apache.avro - avro-mapred - hadoop2 - - - - org.apache.commons - commons-lang3 - - - - - org.springframework - spring-beans - - - - com.beust - jcommander - - - - org.apache.pig - pig - - - - com.linkedin.datafu - datafu - - - - commons-beanutils - commons-beanutils - - - - commons-io - commons-io - - - - org.jdom - jdom - - - - - net.alchim31.maven - scala-maven-plugin - - - - org.apache.avro - avro-maven-plugin - - - generate-test-sources - - schema - idl-protocol - - - String - - - - - - - org.codehaus.mojo - build-helper-maven-plugin - - - add-test-sources - generate-test-sources - - add-test-source - - - - ${project.build.directory}/generated-test-sources/avro/ - - - - - - - - - org.apache.maven.plugins - maven-jar-plugin - - - - test-jar - - - - - - - org.apache.maven.plugins - maven-surefire-plugin - - eu.dnetlib.iis.common.IntegrationTest - - - - - org.apache.maven.plugins - maven-failsafe-plugin - - - - diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/FsShellPermissions.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/FsShellPermissions.java deleted file mode 100644 index 7fbcd8fef..000000000 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/FsShellPermissions.java +++ /dev/null @@ -1,106 +0,0 @@ -package eu.dnetlib.dhp.common; - -import java.lang.reflect.Method; -import java.util.Arrays; -import java.util.LinkedList; - -import org.apache.hadoop.conf.Configurable; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.FsShell; -import org.springframework.beans.BeanUtils; -import org.springframework.util.ClassUtils; -import org.springframework.util.ReflectionUtils; - -/** - * Extracted from: - * https://github.com/spring-projects/spring-hadoop/blob/master/spring-hadoop-core/src/main/java/org/springframework/data/hadoop/fs/FsShellPermissions.java - * - * Utility class for accessing Hadoop FsShellPermissions (which is not public) - * without having to duplicate its code. - * @author Costin Leau - * - */ -public class FsShellPermissions { - - private static boolean IS_HADOOP_20X = ClassUtils.isPresent("org.apache.hadoop.fs.FsShellPermissions$Chmod", - FsShellPermissions.class.getClassLoader()); - - public enum Op { - CHOWN("-chown"), CHMOD("-chmod"), CHGRP("-chgrp"); - - private final String cmd; - - Op(String cmd) { - this.cmd = cmd; - } - - public String getCmd() { - return cmd; - } - } - - // TODO: move this into Spring Core (but add JDK 1.5 compatibility first) - @SafeVarargs - static T[] concatAll(T[] first, T[]... rest) { - // can add some sanity checks - int totalLength = first.length; - for (T[] array : rest) { - totalLength += array.length; - } - T[] result = Arrays.copyOf(first, totalLength); - int offset = first.length; - for (T[] array : rest) { - System.arraycopy(array, 0, result, offset, array.length); - offset += array.length; - } - return result; - } - - public static void changePermissions(FileSystem fs, Configuration config, - Op op, boolean recursive, String group, String uri) { - changePermissions(fs, config, op, recursive, group, new String[] {uri}); - } - - public static void changePermissions(FileSystem fs, Configuration config, - Op op, boolean recursive, String group, String... uris) { - String[] argvs; - if (recursive) { - argvs = new String[1]; - argvs[0] = "-R"; - } else { - argvs = new String[0]; - } - argvs = concatAll(argvs, new String[] { group }, uris); - - // Hadoop 1.0.x - if (!IS_HADOOP_20X) { - Class cls = ClassUtils.resolveClassName("org.apache.hadoop.fs.FsShellPermissions", config.getClass().getClassLoader()); - Object[] args = new Object[] { fs, op.getCmd(), argvs, 0, new FsShell(config) }; - - Method m = ReflectionUtils.findMethod(cls, "changePermissions", FileSystem.class, String.class, String[].class, int.class, FsShell.class); - ReflectionUtils.makeAccessible(m); - ReflectionUtils.invokeMethod(m, null, args); - } - // Hadoop 2.x - else { - Class cmd = ClassUtils.resolveClassName("org.apache.hadoop.fs.shell.Command", config.getClass().getClassLoader()); - Class targetClz = ClassUtils.resolveClassName("org.apache.hadoop.fs.FsShellPermissions$Chmod", config.getClass().getClassLoader()); - Configurable target = (Configurable) BeanUtils.instantiate(targetClz); - target.setConf(config); - // run(String...) swallows the exceptions - re-implement it here - // - LinkedList args = new LinkedList(Arrays.asList(argvs)); - try { - Method m = ReflectionUtils.findMethod(cmd, "processOptions", LinkedList.class); - ReflectionUtils.makeAccessible(m); - ReflectionUtils.invokeMethod(m, target, args); - m = ReflectionUtils.findMethod(cmd, "processRawArguments", LinkedList.class); - ReflectionUtils.makeAccessible(m); - ReflectionUtils.invokeMethod(m, target, args); - } catch (IllegalStateException ex){ - throw new RuntimeException("Cannot change permissions/ownership " + ex); - } - } - } -} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/InfoSpaceConstants.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/InfoSpaceConstants.java deleted file mode 100644 index 1ce7cd426..000000000 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/InfoSpaceConstants.java +++ /dev/null @@ -1,75 +0,0 @@ -package eu.dnetlib.dhp.common; - -import java.io.UnsupportedEncodingException; - -/** - * InfoSpaceConstants constants. - * - * @author mhorst - * - */ -public final class InfoSpaceConstants { - - public static final float CONFIDENCE_TO_TRUST_LEVEL_FACTOR = 0.9f; - - public static final String ENCODING_UTF8 = "utf-8"; - - public static final char ROW_PREFIX_SEPARATOR = '|'; - - public static final String ID_NAMESPACE_SEPARATOR = "::"; - public static final String CLASSIFICATION_HIERARCHY_SEPARATOR = ID_NAMESPACE_SEPARATOR; - public static final String INFERENCE_PROVENANCE_SEPARATOR = ID_NAMESPACE_SEPARATOR; - - public static final String ROW_PREFIX_RESULT = "50|"; - public static final String ROW_PREFIX_PROJECT = "40|"; - public static final String ROW_PREFIX_PERSON = "30|"; - public static final String ROW_PREFIX_ORGANIZATION = "20|"; - public static final String ROW_PREFIX_DATASOURCE = "10|"; - - public static final String QUALIFIER_BODY_STRING = "body"; - public static final byte[] QUALIFIER_BODY; - - public static final String SEMANTIC_CLASS_MAIN_TITLE = "main title"; - public static final String SEMANTIC_CLASS_PUBLICATION = "publication"; - public static final String SEMANTIC_CLASS_UNKNOWN = "UNKNOWN"; - - public static final String SEMANTIC_SCHEME_DNET_PERSON_ROLES = "dnet:personroles"; - public static final String SEMANTIC_SCHEME_DNET_RELATIONS_RESULT_RESULT = "dnet:result_result_relations"; - public static final String SEMANTIC_SCHEME_DNET_RELATIONS_RESULT_PROJECT = "dnet:result_project_relations"; - - public static final String SEMANTIC_SCHEME_DNET_TITLE = "dnet:dataCite_title"; - public static final String SEMANTIC_SCHEME_DNET_TITLE_TYPOLOGIES = "dnet:title_typologies"; - public static final String SEMANTIC_SCHEME_DNET_RESULT_TYPOLOGIES = "dnet:result_typologies"; - public static final String SEMANTIC_SCHEME_DNET_PROVENANCE_ACTIONS = "dnet:provenanceActions"; - public static final String SEMANTIC_SCHEME_DNET_LANGUAGES = "dnet:languages"; - public static final String SEMANTIC_SCHEME_DNET_PID_TYPES = "dnet:pid_types"; - public static final String SEMANTIC_SCHEME_DNET_CLASSIFICATION_TAXONOMIES = "dnet:subject_classification_typologies"; - - // resultResult citation and similarity related - public static final String SEMANTIC_SCHEME_DNET_DATASET_PUBLICATION_RELS = "dnet:dataset_publication_rels"; - - public static final String SEMANTIC_CLASS_TAXONOMIES_ARXIV = "arxiv"; - public static final String SEMANTIC_CLASS_TAXONOMIES_WOS = "wos"; - public static final String SEMANTIC_CLASS_TAXONOMIES_DDC = "ddc"; - public static final String SEMANTIC_CLASS_TAXONOMIES_MESHEUROPMC = "mesheuropmc"; - public static final String SEMANTIC_CLASS_TAXONOMIES_ACM = "acm"; - - public static final String EXTERNAL_ID_TYPE_INSTANCE_URL = "dnet:instance-url"; - public static final String EXTERNAL_ID_TYPE_UNKNOWN = "unknown"; - - // publication types class ids - public static final String SEMANTIC_CLASS_INSTANCE_TYPE_ARTICLE = "0001"; - public static final String SEMANTIC_CLASS_INSTANCE_TYPE_DATASET = "0021"; - - static { - try { - QUALIFIER_BODY = QUALIFIER_BODY_STRING.getBytes(ENCODING_UTF8); - - } catch (UnsupportedEncodingException e) { - throw new RuntimeException(e); - } - } - - private InfoSpaceConstants() { - } -} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/WorkflowRuntimeParameters.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/WorkflowRuntimeParameters.java deleted file mode 100644 index e71d69027..000000000 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/WorkflowRuntimeParameters.java +++ /dev/null @@ -1,74 +0,0 @@ -package eu.dnetlib.dhp.common; - -import java.util.Map; - -import org.apache.commons.lang.StringUtils; -import org.apache.hadoop.conf.Configuration; - -/** - * Utility class holding parameter names and method simplifying access to parameters from hadoop context. - * @author mhorst - * - */ -public final class WorkflowRuntimeParameters { - - public static final String OOZIE_ACTION_OUTPUT_FILENAME = "oozie.action.output.properties"; - - public static final char DEFAULT_CSV_DELIMITER = ','; - - public static final String UNDEFINED_NONEMPTY_VALUE = "$UNDEFINED$"; - - // default values - public static final String DNET_SERVICE_READ_TIMEOUT_DEFAULT_VALUE = "60000"; - public static final String DNET_SERVICE_CONNECTION_TIMEOUT_DEFAULT_VALUE = "60000"; - // parameter names - public static final String DNET_SERVICE_CLIENT_READ_TIMEOUT = "dnet.service.client.read.timeout"; - public static final String DNET_SERVICE_CLIENT_CONNECTION_TIMEOUT = "dnet.service.client.connection.timeout"; - - // ----------------- CONSTRUCTORS ----------------------------- - - private WorkflowRuntimeParameters() {} - - /** - * Retrieves parameter from hadoop context configuration when set to value different than {@link WorkflowRuntimeParameters#UNDEFINED_NONEMPTY_VALUE}. - */ - public static String getParamValue(String paramName, Configuration configuration) { - String paramValue = configuration.get(paramName); - if (StringUtils.isNotBlank(paramValue) && !UNDEFINED_NONEMPTY_VALUE.equals(paramValue)) { - return paramValue; - } else { - return null; - } - } - - /** - * Retrieves {@link Integer} parameter from hadoop context configuration when set to non-empty value different than {@link WorkflowRuntimeParameters#UNDEFINED_NONEMPTY_VALUE}. - * Null is returned when parameter was not set. - * @throws {@link NumberFormatException} if parameter value does not contain a parsable integer - */ - public static Integer getIntegerParamValue(String paramName, Configuration configuration) throws NumberFormatException { - String paramValue = getParamValue(paramName, configuration); - return paramValue!=null?Integer.valueOf(paramValue):null; - } - - /** - * Retrieves parameter from hadoop context configuration when set to value different than {@link WorkflowRuntimeParameters#UNDEFINED_NONEMPTY_VALUE}. - * If requested parameter was not set, fallback parameter is retrieved using the same logic. - */ - public static String getParamValue(String paramName, String fallbackParamName, Configuration configuration) { - String resultCandidate = getParamValue(paramName, configuration); - return resultCandidate!=null?resultCandidate:getParamValue(fallbackParamName, configuration); - } - - /** - * Provides parameter value. Returns default value when entry not found among parameters. - * - * @param paramName parameter name - * @param defaultValue parameter default value to be returned when entry not found among parameters - * @param parameters map of parameters - */ - public static String getParamValue(String paramName, String defaultValue, Map parameters) { - return parameters.containsKey(paramName)?parameters.get(paramName):defaultValue; - } - -} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/counter/NamedCounters.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/counter/NamedCounters.java deleted file mode 100644 index 5b37d31f1..000000000 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/counter/NamedCounters.java +++ /dev/null @@ -1,111 +0,0 @@ -package eu.dnetlib.dhp.common.counter; - -import java.io.Serializable; -import java.util.Collection; -import java.util.Map; - -import com.google.common.base.Preconditions; -import com.google.common.collect.Maps; - -/** - * Class that groups several counters which are identified by name (String value). - * - * @author madryk - */ -public class NamedCounters implements Serializable { - - private static final long serialVersionUID = 1L; - - - private final Map counters; - - - //------------------------ CONSTRUCTORS -------------------------- - - /** - * Creates {@link NamedCounters} with empty initial counters. - */ - public NamedCounters() { - this.counters = Maps.newHashMap(); - } - - /** - * Creates {@link NamedCounters} with initial counters.
- * Starting value of initial counters is zero. - * - * @param initialCounterNames - names of initial counters - */ - public NamedCounters(String[] initialCounterNames) { - Preconditions.checkNotNull(initialCounterNames); - - this.counters = Maps.newHashMap(); - - for (String initialCounterName : initialCounterNames) { - this.counters.put(initialCounterName, 0L); - } - } - - /** - * Creates {@link NamedCounters} with initial counters.
- * Starting value of initial counters is zero. - * - * @param initialCounterNamesEnumClass - enum class providing names of initial counters - */ - public > NamedCounters(Class initialCounterNamesEnumClass) { - Preconditions.checkNotNull(initialCounterNamesEnumClass); - - this.counters = Maps.newHashMap(); - Enum[] enumConstants = initialCounterNamesEnumClass.getEnumConstants(); - - for (int i=0; i - * Internally uses {@link #increment(String, Long)} - */ - public void increment(String counterName) { - increment(counterName, 1L); - } - - /** - * Increments value of a counter with the name specified as parameter by the given value.
- * If current instance of {@link NamedCounters} does not contain counter - * with provided name, then before incrementing counter will be created with starting - * value equal to zero. - */ - public void increment(String counterName, Long incrementValue) { - - long oldValue = counters.getOrDefault(counterName, 0L); - counters.put(counterName, oldValue + incrementValue); - } - - /** - * Returns current value of a counter with the name specified as parameter. - * - * @throws IllegalArgumentException when {@link NamedCounters} does not contain counter - * with provided name - */ - public long currentValue(String counterName) { - - if (!counters.containsKey(counterName)) { - throw new IllegalArgumentException("Couldn't find counter with name: " + counterName); - } - - return counters.get(counterName); - } - - /** - * Returns names of currently tracked counters. - */ - public Collection counterNames() { - return counters.keySet(); - } - -} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/counter/NamedCountersAccumulableParam.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/counter/NamedCountersAccumulableParam.java deleted file mode 100644 index 6686432dd..000000000 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/counter/NamedCountersAccumulableParam.java +++ /dev/null @@ -1,48 +0,0 @@ -package eu.dnetlib.dhp.common.counter; - -import org.apache.spark.AccumulableParam; - -import scala.Tuple2; - -/** - * Spark {@link AccumulableParam} for tracking multiple counter values using {@link NamedCounters}. - * - * @author madryk - */ -public class NamedCountersAccumulableParam implements AccumulableParam> { - - private static final long serialVersionUID = 1L; - - - //------------------------ LOGIC -------------------------- - - /** - * Increments {@link NamedCounters} counter with the name same as the first element of passed incrementValue tuple - * by value defined in the second element of incrementValue tuple. - */ - @Override - public NamedCounters addAccumulator(NamedCounters counters, Tuple2 incrementValue) { - counters.increment(incrementValue._1, incrementValue._2); - return counters; - } - - /** - * Merges two passed {@link NamedCounters}. - */ - @Override - public NamedCounters addInPlace(NamedCounters counters1, NamedCounters counters2) { - for (String counterName2 : counters2.counterNames()) { - counters1.increment(counterName2, counters2.currentValue(counterName2)); - } - return counters1; - } - - /** - * Returns passed initialCounters value without any modifications. - */ - @Override - public NamedCounters zero(NamedCounters initialCounters) { - return initialCounters; - } - -} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/counter/NamedCountersFileWriter.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/counter/NamedCountersFileWriter.java deleted file mode 100644 index bebb82b6e..000000000 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/counter/NamedCountersFileWriter.java +++ /dev/null @@ -1,52 +0,0 @@ -package eu.dnetlib.dhp.common.counter; - -import java.io.File; -import java.io.FileOutputStream; -import java.io.IOException; -import java.io.OutputStream; -import java.util.Properties; - -/** - * Writer of {@link NamedCounters} object into a properties file. - * - * @author madryk - */ -public class NamedCountersFileWriter { - - - //------------------------ LOGIC -------------------------- - - /** - * Writes {@link NamedCounters} as a properties file located under - * provided filePath. - * - * @throws IOException if writing to properties file resulted in an error - */ - public void writeCounters(NamedCounters counters, String filePath) throws IOException { - - Properties counterProperties = buildPropertiesFromCounters(counters); - - File file = new File(filePath); - try (OutputStream os = new FileOutputStream(file)) { - - counterProperties.store(os, null); - - } - - } - - - //------------------------ PRIVATE -------------------------- - - private Properties buildPropertiesFromCounters(NamedCounters counters) { - - Properties properties = new Properties(); - - for (String counterName : counters.counterNames()) { - long count = counters.currentValue(counterName); - properties.put(counterName, String.valueOf(count)); - } - - return properties; - } -} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/fault/FaultUtils.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/fault/FaultUtils.java deleted file mode 100644 index bcc6494b2..000000000 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/fault/FaultUtils.java +++ /dev/null @@ -1,67 +0,0 @@ -package eu.dnetlib.dhp.common.fault; - -import java.io.PrintWriter; -import java.io.StringWriter; -import java.util.ArrayList; -import java.util.List; -import java.util.Map; - -import eu.dnetlib.dhp.audit.schemas.Cause; -import eu.dnetlib.dhp.audit.schemas.Fault; - -/** - * {@link Fault} related utilities. - * @author mhorst - * - */ -public final class FaultUtils { - - // ---------------------- CONSTRUCTORS ------------------- - - private FaultUtils() {} - - // ---------------------- LOGIC -------------------------- - - /** - * Generates {@link Fault} instance based on {@link Throwable}. - * @param entityId entity identifier - * @param throwable - * @param auditSupplementaryData - * @return {@link Fault} instance generated for {@link Throwable} - */ - public static Fault exceptionToFault(CharSequence entityId, Throwable throwable, - Map auditSupplementaryData) { - Fault.Builder faultBuilder = Fault.newBuilder(); - faultBuilder.setInputObjectId(entityId); - faultBuilder.setTimestamp(System.currentTimeMillis()); - faultBuilder.setCode(throwable.getClass().getName()); - faultBuilder.setMessage(throwable.getMessage()); - StringWriter strWriter = new StringWriter(); - PrintWriter pw = new PrintWriter(strWriter); - throwable.printStackTrace(pw); - pw.close(); - faultBuilder.setStackTrace(strWriter.toString()); - if (throwable.getCause()!=null) { - faultBuilder.setCauses(appendThrowableToCauses( - throwable.getCause(), new ArrayList())); - } - if (auditSupplementaryData!=null && !auditSupplementaryData.isEmpty()) { - faultBuilder.setSupplementaryData(auditSupplementaryData); - } - return faultBuilder.build(); - } - - protected static List appendThrowableToCauses(Throwable e, List causes) { - Cause.Builder causeBuilder = Cause.newBuilder(); - causeBuilder.setCode(e.getClass().getName()); - causeBuilder.setMessage(e.getMessage()); - causes.add(causeBuilder.build()); - if (e.getCause()!=null) { - return appendThrowableToCauses( - e.getCause(),causes); - } else { - return causes; - } - } - -} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/CmdLineParser.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/CmdLineParser.java deleted file mode 100644 index b6106044b..000000000 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/CmdLineParser.java +++ /dev/null @@ -1,98 +0,0 @@ -package eu.dnetlib.dhp.common.java; - -import org.apache.commons.cli.CommandLine; -import org.apache.commons.cli.CommandLineParser; -import org.apache.commons.cli.GnuParser; -import org.apache.commons.cli.HelpFormatter; -import org.apache.commons.cli.Option; -import org.apache.commons.cli.OptionBuilder; -import org.apache.commons.cli.Options; -import org.apache.commons.cli.ParseException; -import org.apache.commons.lang3.StringUtils; - -/** - * - * @author Mateusz Kobos - * - */ -@SuppressWarnings("deprecation") -public final class CmdLineParser { - /** HACK: make the names of various types of parameters of the program - * more readable, e.g. "--Input_person=..." instead of "-Iperson=...", - * "--Output_merged=..." instead of "-Omerged=...". I wasn't able to - * get such notation so far using the Apache CLI. */ - public static final String constructorPrefix = "C"; - public static final String inputPrefix = "I"; - public static final String outputPrefix = "O"; - public static final String specialParametersPrefix = "S"; - /** HACK: This field should be removed since this list of special - * parameters is empty, thus not used anywhere.*/ - public static final String[] mandatorySpecialParameters = new String[]{}; - public static final String processParametersPrefix = "P"; - - // ------------------------- CONSTRUCTORS ------------------------------ - - private CmdLineParser() {} - - // ------------------------- LOGIC ------------------------------------- - - public static CommandLine parse(String[] args) { - Options options = new Options(); - @SuppressWarnings("static-access") - Option constructorParams = OptionBuilder.withArgName("STRING") - .hasArg() - .withDescription("Constructor parameter") - .withLongOpt("ConstructorParam") - .create(constructorPrefix); - options.addOption(constructorParams); - @SuppressWarnings("static-access") - Option inputs = OptionBuilder.withArgName("portName=URI") - .hasArgs(2) - .withValueSeparator() - .withDescription("Path binding for a given input port") - .withLongOpt("Input") - .create(inputPrefix); - options.addOption(inputs); - @SuppressWarnings("static-access") - Option outputs = OptionBuilder.withArgName("portName=URI") - .hasArgs(2) - .withValueSeparator() - .withDescription("Path binding for a given output port") - .create(outputPrefix); - options.addOption(outputs); - @SuppressWarnings("static-access") - Option specialParameter = OptionBuilder.withArgName("parameter_name=string") - .hasArgs(2) - .withValueSeparator() - .withDescription(String.format("Value of special parameter. " - + "These are the mandatory parameters={%s}", - StringUtils.join(mandatorySpecialParameters, ","))) - .create(specialParametersPrefix); - options.addOption(specialParameter); - @SuppressWarnings("static-access") - Option otherParameter = OptionBuilder.withArgName("parameter_name=string") - .hasArgs(2) - .withValueSeparator() - .withDescription( - String.format("Value of some other parameter.")) - .create(processParametersPrefix); - options.addOption(otherParameter); - - Option help = new Option("help", "print this message"); - options.addOption(help); - - CommandLineParser parser = new GnuParser(); - try { - CommandLine cmdLine = parser.parse(options, args); - if(cmdLine.hasOption("help")){ - HelpFormatter formatter = new HelpFormatter(); - formatter.printHelp("", options ); - System.exit(1); - } - return cmdLine; - } catch (ParseException e) { - throw new CmdLineParserException("Parsing command line arguments failed", e); - } - - } -} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/CmdLineParserException.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/CmdLineParserException.java deleted file mode 100644 index bbcad8d84..000000000 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/CmdLineParserException.java +++ /dev/null @@ -1,21 +0,0 @@ -package eu.dnetlib.dhp.common.java; - -/** - * Command line parsing exception - * @author Mateusz Kobos - * - */ -public class CmdLineParserException extends RuntimeException { - /** - * - */ - private static final long serialVersionUID = 9219928547611876284L; - - public CmdLineParserException(String message){ - super(message); - } - - public CmdLineParserException(String message, Throwable cause){ - super(message, cause); - } -} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/CmdLineParserForProcessConstruction.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/CmdLineParserForProcessConstruction.java deleted file mode 100644 index 2c65d0892..000000000 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/CmdLineParserForProcessConstruction.java +++ /dev/null @@ -1,45 +0,0 @@ -package eu.dnetlib.dhp.common.java; - -import java.lang.reflect.Constructor; - -import org.apache.commons.cli.CommandLine; - -/** - * Handles parsing the command line arguments provided by the Oozie - * to create a {@link Process} - * @author Mateusz Kobos - * - */ -public class CmdLineParserForProcessConstruction { - public Process run(CommandLine cmdLine){ - String[] args = cmdLine.getArgs(); - if(args.length != 1){ - throw new CmdLineParserException("The name of the class has "+ - "to be specified as the first agrument"); - } - String className = args[0]; - - String[] constructorParams = cmdLine.getOptionValues( - CmdLineParser.constructorPrefix); - if(constructorParams == null){ - constructorParams = new String[0]; - } - try { - Class processClass = Class.forName(className); - Constructor processConstructor = null; - if(constructorParams.length == 0){ - try{ - processConstructor = processClass.getConstructor(); - return (Process) processConstructor.newInstance(); - } catch(NoSuchMethodException ex){ - } - } - processConstructor = processClass.getConstructor(String[].class); - return (Process) processConstructor.newInstance( - (Object)constructorParams); - } catch (Exception e) { - throw new CmdLineParserException(String.format( - "Problem while creating class \"%s\"", className), e); - } - } -} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/CmdLineParserForProcessRunParameters.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/CmdLineParserForProcessRunParameters.java deleted file mode 100644 index 31db33103..000000000 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/CmdLineParserForProcessRunParameters.java +++ /dev/null @@ -1,100 +0,0 @@ -package eu.dnetlib.dhp.common.java; - -import java.util.HashMap; -import java.util.Map; -import java.util.Map.Entry; -import java.util.Properties; -import java.util.Set; - -import org.apache.commons.cli.CommandLine; -import org.apache.hadoop.fs.Path; - -/** - * Handles parsing parameters passed to the {@link Process} - * @author Mateusz Kobos - * - */ -public class CmdLineParserForProcessRunParameters { - /** Parse the command line arguments. - * - * @param cmdLine command line arguments - * @param ports names of ports that ought to be extracted from command line - */ - public ProcessParameters run(CommandLine cmdLine, Ports ports) { - - Properties inputProperties = cmdLine.getOptionProperties( - CmdLineParser.inputPrefix); - assumePortNamesMatch(CmdLineParser.inputPrefix, inputProperties, - ports.getInput().keySet()); - Map inputBindings = getBindings( - inputProperties, ports.getInput().keySet()); - - Properties outputProperties = cmdLine.getOptionProperties( - CmdLineParser.outputPrefix); - assumePortNamesMatch(CmdLineParser.outputPrefix, outputProperties, - ports.getOutput().keySet()); - Map outputBindings = getBindings( - outputProperties, ports.getOutput().keySet()); - - PortBindings bindings = new PortBindings(inputBindings, outputBindings); - - Properties specialProperties = cmdLine.getOptionProperties( - CmdLineParser.specialParametersPrefix); - assumeContainAllMandatoryParameters( - specialProperties, CmdLineParser.mandatorySpecialParameters); - - Properties rawProperties = cmdLine.getOptionProperties( - CmdLineParser.processParametersPrefix); - Map processParameters = new HashMap(); - for(Entry entry: rawProperties.entrySet()){ - processParameters.put( - (String)entry.getKey(), (String)entry.getValue()); - } - - return new ProcessParameters(bindings, processParameters); - } - - private static void assumeContainAllMandatoryParameters( - Properties properties, String[] mandatoryParameters){ - for(String otherParameter: mandatoryParameters){ - if(!properties.containsKey(otherParameter)){ - throw new CmdLineParserException(String.format( - "Not all mandatory properties are set using the \"%s\" " - + "option are given, e.g. \"-%s\" parameter is missing", - CmdLineParser.specialParametersPrefix, otherParameter)); - } - } - } - - private static void assumePortNamesMatch(String cmdLineParamPrefix, - Properties cmdLineProperties, Set portNames) { - for (String name : portNames) { - if (!cmdLineProperties.containsKey(name)) { - throw new CmdLineParserException(String.format( - "The port with name \"%s\" is not specified in " - + "command line (command line option \"-%s\" is missing)", - name, cmdLineParamPrefix + name)); - } - } - for (Object cmdLineKeyObject : cmdLineProperties.keySet()) { - String name = (String) cmdLineKeyObject; - if (!portNames.contains(name)) { - throw new CmdLineParserException(String.format( - "A port name \"%s\" which is not specified is given " - + "in the command line " - + "(command line option \"%s\" is excess)", - name, cmdLineParamPrefix + name)); - } - } - } - - private static Map getBindings( - Properties cmdLineProperties, Set portNames) { - Map bindings = new HashMap(); - for (String name : portNames) { - Path path = new Path((String) cmdLineProperties.get(name)); - bindings.put(name, path); - } - return bindings; - } -} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/PortBindings.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/PortBindings.java deleted file mode 100644 index 792002689..000000000 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/PortBindings.java +++ /dev/null @@ -1,43 +0,0 @@ -package eu.dnetlib.dhp.common.java; - -import java.util.Map; - -import org.apache.commons.lang.NotImplementedException; -import org.apache.hadoop.fs.Path; - -/** - * Port names (see {@link Ports}) bound to certain paths in the file system - * @author Mateusz Kobos - * - */ -public class PortBindings { - private final Map input; - private final Map output; - - public PortBindings(Map input, Map output) { - this.input = input; - this.output = output; - } - - public Map getInput() { - return input; - } - - public Map getOutput() { - return output; - } - - @Override - public boolean equals(Object o){ - if(!(o instanceof PortBindings)){ - return false; - } - PortBindings other = (PortBindings) o; - return input.equals(other.input) && output.equals(other.output); - } - - @Override - public int hashCode(){ - throw new NotImplementedException(); - } -} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/Ports.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/Ports.java deleted file mode 100644 index 165f25061..000000000 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/Ports.java +++ /dev/null @@ -1,27 +0,0 @@ -package eu.dnetlib.dhp.common.java; - -import java.util.Map; - -import eu.dnetlib.dhp.common.java.porttype.PortType; - -/** - * A class that groups information about input and output ports, i.e. - * their (name of the port -> type of the port) mappings. - * @author Mateusz Kobos - */ -public class Ports { - private final Map input; - private final Map output; - - public Ports(Map input, Map output){ - this.input = input; - this.output = output; - } - - public Map getInput() { - return input; - } - public Map getOutput() { - return output; - } -} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/Process.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/Process.java deleted file mode 100644 index 77e7b617a..000000000 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/Process.java +++ /dev/null @@ -1,43 +0,0 @@ -package eu.dnetlib.dhp.common.java; - -import java.util.Map; - -import org.apache.hadoop.conf.Configuration; - -import eu.dnetlib.dhp.common.java.porttype.PortType; - -/** Workflow node written in Java. - * - * The implementing class has to define a constructor with no parameters - * (possibly the default one) or a constructor with String[] as a single - * parameter. - * @author Mateusz Kobos - */ -public interface Process { - /** - * Run the process. - * - * The process ends with a success status if no exception is thrown, - * otherwise it ends with an error status. - * - * @param parameters parameters of the process. Each parameter - * corresponds to a single entry in the map, its name is the key, its - * value is the value. - * @throws Exception if thrown, it means that the process finished - * with an error status - */ - void run(PortBindings portBindings, Configuration conf, - Map parameters) throws Exception; - - /** - * @return map containing as the key: name of the port, as the value: type - * of the port - */ - Map getInputPorts(); - - /** - * @return map containing as the key: name of the port, as the value: type - * of the port - */ - Map getOutputPorts(); -} \ No newline at end of file diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/ProcessException.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/ProcessException.java deleted file mode 100644 index 9d8d82779..000000000 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/ProcessException.java +++ /dev/null @@ -1,20 +0,0 @@ -package eu.dnetlib.dhp.common.java; - -/** - * Process exception - * @author Dominika Tkaczyk - * - */ -public class ProcessException extends RuntimeException { - - private static final long serialVersionUID = 2758953138374438377L; - - public ProcessException(String message){ - super(message); - } - - public ProcessException(String message, Throwable cause){ - super(message, cause); - } - -} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/ProcessParameters.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/ProcessParameters.java deleted file mode 100644 index 33902dc20..000000000 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/ProcessParameters.java +++ /dev/null @@ -1,43 +0,0 @@ -package eu.dnetlib.dhp.common.java; - -import java.util.Map; - -import org.apache.commons.lang.NotImplementedException; - -/** - * Parameters of the Process retrieved from Oozie - * @author Mateusz Kobos - * - */ -public class ProcessParameters { - private final PortBindings portBindings; - private final Map parameters; - - public PortBindings getPortBindings() { - return portBindings; - } - - public Map getParameters(){ - return parameters; - } - - public ProcessParameters(PortBindings portBindings, - Map parameters) { - this.portBindings = portBindings; - this.parameters = parameters; - } - - @Override - public boolean equals(Object o){ - if(!(o instanceof ProcessParameters)){ - return false; - } - ProcessParameters other = (ProcessParameters) o; - return this.portBindings.equals(other.portBindings); - } - - @Override - public int hashCode(){ - throw new NotImplementedException(); - } -} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/ProcessUtils.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/ProcessUtils.java deleted file mode 100644 index 084a521e6..000000000 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/ProcessUtils.java +++ /dev/null @@ -1,43 +0,0 @@ -package eu.dnetlib.dhp.common.java; - -import java.util.Map; - -import org.apache.hadoop.conf.Configuration; - -/** - * {@link Process} related utility class. - * @author mhorst - * - */ -public final class ProcessUtils { - - // ------------- CONSTRUCTORS ---------------- - - private ProcessUtils() {} - - // ------------- LOGIC ----------------------- - - /** - * Returns parameter value retrived from parameters or context. - * @param paramName - * @param hadoopConf - * @param parameters - * @return parameter value - */ - public static String getParameterValue(String paramName, - Configuration hadoopConf, - Map parameters) { - if (parameters!=null && !parameters.isEmpty()) { - String result = null; - result = parameters.get(paramName); - if (result!=null) { - return result; - } - } - if (hadoopConf!=null) { - return hadoopConf.get(paramName); - } else { - return null; - } - } -} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/ProcessWrapper.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/ProcessWrapper.java deleted file mode 100644 index d60eb0cd9..000000000 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/ProcessWrapper.java +++ /dev/null @@ -1,88 +0,0 @@ -package eu.dnetlib.dhp.common.java; - -import java.io.IOException; -import java.util.Map; - -import org.apache.avro.file.DataFileWriter; -import org.apache.avro.generic.GenericContainer; -import org.apache.commons.cli.CommandLine; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.LocatedFileStatus; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.fs.RemoteIterator; - -import eu.dnetlib.dhp.common.java.io.DataStore; -import eu.dnetlib.dhp.common.java.io.FileSystemPath; -import eu.dnetlib.dhp.common.java.porttype.AvroPortType; -import eu.dnetlib.dhp.common.java.porttype.PortType; - -/** - * Creates {@link Process} object through reflection by parsing - * the command-line arguments - * @author Mateusz Kobos - * - */ -public class ProcessWrapper { - - public Configuration getConfiguration() throws Exception{ - return new Configuration(); - } - - public static void main(String[] args) throws Exception { - ProcessWrapper wrapper = new ProcessWrapper(); - wrapper.run(args); - } - - public void run(String[] args) throws Exception{ - CommandLine cmdLine = CmdLineParser.parse(args); - - CmdLineParserForProcessConstruction constructionParser = - new CmdLineParserForProcessConstruction(); - Process process = constructionParser.run(cmdLine); - Ports ports = - new Ports(process.getInputPorts(), process.getOutputPorts()); - CmdLineParserForProcessRunParameters runParametersParser = - new CmdLineParserForProcessRunParameters(); - ProcessParameters params = runParametersParser.run(cmdLine, ports); - Configuration conf = getConfiguration(); - process.run(params.getPortBindings(), conf, params.getParameters()); - createOutputsIfDontExist( - process.getOutputPorts(), params.getPortBindings().getOutput(), - conf); - } - - private static void createOutputsIfDontExist( - Map outputPortsSpecification, - Map outputPortBindings, Configuration conf) throws IOException{ - FileSystem fs = FileSystem.get(conf); - for(Map.Entry entry: outputPortBindings.entrySet()){ - Path path = entry.getValue(); - if(!fs.exists(path) || isEmptyDirectory(fs, path)){ - PortType rawType = outputPortsSpecification.get(entry.getKey()); - if(!(rawType instanceof AvroPortType)){ - throw new RuntimeException("The port \""+entry.getKey()+ - "\" is not of Avro type and only Avro types are "+ - "supported"); - } - AvroPortType type = (AvroPortType) rawType; - FileSystemPath fsPath = new FileSystemPath(fs, path); - DataFileWriter writer = - DataStore.create(fsPath, type.getSchema()); - writer.close(); - } - } - } - - private static boolean isEmptyDirectory(FileSystem fs, Path path) throws IOException{ - if(!fs.isDirectory(path)){ - return false; - } - RemoteIterator files = fs.listFiles(path, false); - /** There's at least one file, so the directory is not empty */ - if(files.hasNext()){ - return false; - } - return true; - } -} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/io/AvroDataStoreReader.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/io/AvroDataStoreReader.java deleted file mode 100644 index 4d3618854..000000000 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/io/AvroDataStoreReader.java +++ /dev/null @@ -1,156 +0,0 @@ -package eu.dnetlib.dhp.common.java.io; - -import java.io.IOException; -import java.util.NoSuchElementException; -import java.util.regex.Pattern; - -import org.apache.avro.Schema; -import org.apache.avro.file.DataFileReader; -import org.apache.avro.specific.SpecificDatumReader; -import org.apache.hadoop.fs.AvroFSInput; -import org.apache.hadoop.fs.FSDataInputStream; -import org.apache.hadoop.fs.LocatedFileStatus; -import org.apache.hadoop.fs.RemoteIterator; - - - -/** - * An abstraction over data store format which allows - * iterating over records stored in the data store. - * It handles the standard case of a data store that is a directory containing - * many Avro files (but it can also read records from a single file). - * - * @author mhorst - * @author Mateusz Kobos - */ -class AvroDataStoreReader implements CloseableIterator { - - private DataFileReader currentReader; - private RemoteIterator fileIterator; - private final FileSystemPath path; - private final Schema readerSchema; - - /** - * Ignore file starting with underscore. Such files are also ignored by - * default by map-reduce jobs. - */ - private final Pattern whitelistPattern = Pattern.compile("^(?!_).*"); - - /** - * Here the schema used for reading the data store is set to be the same - * as the one that was used to write it. - */ - public AvroDataStoreReader(final FileSystemPath path) - throws IOException { - this(path, null); - } - - /** - * @param path path to the data store to be read - * @param readerSchema the schema onto which the read data store will - * be projected - */ - public AvroDataStoreReader(final FileSystemPath path, Schema readerSchema) - throws IOException { - this.path = path; - this.readerSchema = readerSchema; - fileIterator = path.getFileSystem().listFiles(path.getPath(), false); - currentReader = getNextNonemptyReader(); - } - - private DataFileReader getNextNonemptyReader() throws IOException { - while (fileIterator != null && fileIterator.hasNext()) { - LocatedFileStatus currentFileStatus = fileIterator.next(); - if (isValidFile(currentFileStatus)) { - FileSystemPath currPath = new FileSystemPath( - path.getFileSystem(), currentFileStatus.getPath()); - DataFileReader reader = - getSingleFileReader(currPath, readerSchema); - /** Check if the file contains at least one record */ - if(reader.hasNext()){ - return reader; - } else { - reader.close(); - } - } - } - /** fallback */ - return null; - } - - /** - * Get a reader for the specified Avro file. A utility function. - * @param path path to the existing file - * @param readerSchema optional reader schema. If you want to use the - * default option of using writer schema as the reader schema, pass the - * {@code null} value. - * @throws IOException - */ - private static DataFileReader getSingleFileReader( - FileSystemPath path, Schema readerSchema) throws IOException{ - try{ - SpecificDatumReader datumReader = new SpecificDatumReader(); - if(readerSchema != null){ - datumReader.setExpected(readerSchema); - } - long len = path.getFileSystem().getFileStatus(path.getPath()).getLen(); - FSDataInputStream inputStream = path.getFileSystem().open(path.getPath()); - return new DataFileReader( - new AvroFSInput(inputStream, len), datumReader); - } catch (IOException ex){ - throw new IOException("Problem with file \""+ - path.getPath().toString()+"\": "+ex.getMessage(), ex); - } - } - - /** - * Checks whether file is valid - * - * @param fileStatus - * @return true when valid, false otherwise - */ - private boolean isValidFile(LocatedFileStatus fileStatus) { - if (fileStatus.isFile()) { - return whitelistPattern.matcher( - fileStatus.getPath().getName()).matches(); - } - /** fallback */ - return false; - } - - @Override - public boolean hasNext() { - return currentReader != null; - } - - @Override - public T next(){ - if(currentReader == null){ - throw new NoSuchElementException(); - } - T obj = currentReader.next(); - if(!currentReader.hasNext()){ - try{ - currentReader.close(); - currentReader = getNextNonemptyReader(); - } catch(IOException ex){ - throw new RuntimeException(ex); - } - } - return obj; - } - - @Override - public void remove() { - throw new UnsupportedOperationException(); - } - - @Override - public void close() throws IOException { - if(currentReader != null){ - currentReader.close(); - currentReader = null; - } - fileIterator = null; - } -} \ No newline at end of file diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/io/CloseableIterator.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/io/CloseableIterator.java deleted file mode 100644 index 1fc77832e..000000000 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/io/CloseableIterator.java +++ /dev/null @@ -1,25 +0,0 @@ -package eu.dnetlib.dhp.common.java.io; - -import java.io.Closeable; -import java.util.Iterator; - -/** - * An iterator for I/O operations that can be {@code close}d explicitly to - * release the resources it holds. - * - * You should call {@code close} only when interrupting the iteration in the - * middle since in such situation there is no way for the iterator to know if - * you're going to continue the iteration and it should still hold the resources - * or not. There's no need to call {@code close} when iterating over all - * elements since in such situation it is called automatically after the - * end of iteration. - * - * @author mhorst - * - * @param - */ -public interface CloseableIterator extends Iterator, Closeable { - - -} - diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/io/CountingIterator.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/io/CountingIterator.java deleted file mode 100644 index a08f975e1..000000000 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/io/CountingIterator.java +++ /dev/null @@ -1,19 +0,0 @@ -package eu.dnetlib.dhp.common.java.io; - -import java.util.Iterator; - -/** - * Counting iterator providing total number of results. - * @author mhorst - * - * @param - */ -public interface CountingIterator extends Iterator { - - /** - * Provides total number of results to be iterating on. - * @return total number of results to be iterating on - */ - int getCount(); - -} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/io/DataStore.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/io/DataStore.java deleted file mode 100644 index aa66b6f51..000000000 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/io/DataStore.java +++ /dev/null @@ -1,172 +0,0 @@ -package eu.dnetlib.dhp.common.java.io; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; - -import org.apache.avro.Schema; -import org.apache.avro.file.DataFileWriter; -import org.apache.avro.generic.GenericContainer; -import org.apache.avro.io.DatumWriter; -import org.apache.avro.specific.SpecificDatumWriter; - - -/** - * Utility for accessing to Avro-based data stores stored in file system - * @author Mateusz Kobos - * - */ -public final class DataStore { - - private final static String singleDataStoreFileName = "content.avro"; - - private static final int FILE_NO_PADDING_LENGTH = 7; - - private DataStore(){} - - /** - * Create a new data store directory with single file and return writer that allows - * adding new records - * @param path path to a directory to be created - * @param schema schema of the records to be stored in the file - * @return - * @throws IOException - */ - public static DataFileWriter create( - FileSystemPath path, Schema schema) throws IOException{ - return create(path, schema, singleDataStoreFileName); - } - - - /** - * Create a new data store directory and return writer that allows - * adding new records - * @param path path to a directory to be created - * @param schema schema of the records to be stored in the file - * @param dataStoreFileName datastore file name - * @return - * @throws IOException - */ - public static DataFileWriter create( - FileSystemPath path, Schema schema, String dataStoreFileName) throws IOException{ - path.getFileSystem().mkdirs(path.getPath()); - FileSystemPath outFile = new FileSystemPath( - path, dataStoreFileName); - return DataStore.createSingleFile(outFile, schema); - } - - /** - * Get reader for reading records from given data store - * - * Here the schema used for reading the data store is set to be the same - * as the one that was used to write it. - * - * @see getReader(FileSystemPath path, Schema readerSchema) for details. - * - */ - public static CloseableIterator getReader(FileSystemPath path) - throws IOException{ - return getReader(path, null); - } - - /** - * Get reader for reading records from given data store - * @param path path to a directory corresponding to data store - * @param readerSchema the schema onto which the read data store will - * be projected - */ - public static CloseableIterator getReader( - FileSystemPath path, Schema readerSchema) throws IOException{ - return new AvroDataStoreReader(path, readerSchema); - } - - /** - * Read data store entries and insert them into a list. A utility function. - * - * Here the schema used for reading the data store is set to be the same - * as the one that was used to write it. - */ - public static List read(FileSystemPath path) - throws IOException{ - return read(path, null); - } - - /** - * Read data store entries and insert them into a list. A utility function. - * - * @param readerSchema the schema onto which the read data store will - * be projected - */ - public static List read(FileSystemPath path, Schema readerSchema) - throws IOException{ - CloseableIterator iterator = getReader(path, readerSchema); - List elems = new ArrayList(); - while(iterator.hasNext()){ - elems.add(iterator.next()); - } - return elems; - } - - /** - * Create a data store from a list of entries. A utility function. - * The schema is implicitly - * taken from the first element from the {@code elements} list. - * @param elements list of elements to write. At least one element has - * to be present, because it is used to retrieve schema of the - * structures passed in the list. - */ - public static void create( - List elements, FileSystemPath path) throws IOException{ - if(elements.isEmpty()){ - throw new IllegalArgumentException( - "The list of elements has to be non-empty"); - } - Schema schema = elements.get(0).getSchema(); - create(elements, path, schema); - } - - /** - * Create a data store from a list of entries with schema given explicitly. - * A utility function. - */ - public static void create( - List elements, FileSystemPath path, Schema schema) - throws IOException{ - DataFileWriter writer = create(path, schema); - try{ - for(T i: elements){ - writer.append(i); - } - } finally { - if(writer != null){ - writer.close(); - } - } - } - - /** - * Create a single Avro file. This method shouldn't be normally used to - * create data stores since it creates only a single Avro file, - * while a data store consists of a directory containing one or more files. - */ - public static DataFileWriter createSingleFile( - FileSystemPath path, Schema schema) throws IOException{ - DatumWriter datumWriter = new SpecificDatumWriter(); - DataFileWriter writer = new DataFileWriter(datumWriter); - writer.create(schema, path.getFileSystem().create(path.getPath())); - return writer; - } - - /** - * Generates filename for given file number. - * @param fileNo file sequence number - */ - public static String generateFileName(int fileNo) { - StringBuffer strBuff = new StringBuffer(String.valueOf(fileNo)); - while(strBuff.length() { - - private SequenceFile.Reader sequenceReader; - - private final RemoteIterator fileIt; - - private final FileSystem fs; - - /** - * Ignore file starting with underscore. Such files are also ignored by - * default by map-reduce jobs. - */ - private final static Pattern WHITELIST_REGEXP = Pattern.compile("^[^_].*"); - - private Text toBeReturned; - - //------------------------ CONSTRUCTORS -------------------------- - - /** - * Default constructor. - * - * @param path HDFS path along with associated FileSystem - * @throws IOException - */ - public SequenceFileTextValueReader(final FileSystemPath path) throws IOException { - this.fs = path.getFileSystem(); - if (fs.isDirectory(path.getPath())) { - fileIt = fs.listFiles(path.getPath(), false); - sequenceReader = getNextSequenceReader(); - } else { - fileIt = null; - sequenceReader = new Reader(fs.getConf(), SequenceFile.Reader.file(path.getPath())); - } - } - - //------------------------ LOGIC --------------------------------- - - /* - * (non-Javadoc) - * - * @see java.util.Iterator#hasNext() - */ - @Override - public boolean hasNext() { - // check and provide next when already returned - if (toBeReturned == null) { - toBeReturned = getNext(); - } - return toBeReturned != null; - } - - /* - * (non-Javadoc) - * - * @see java.util.Iterator#next() - */ - @Override - public Text next() { - if (toBeReturned != null) { - // element fetched while executing hasNext() - Text result = toBeReturned; - toBeReturned = null; - return result; - } else { - Text resultCandidate = getNext(); - if (resultCandidate!=null) { - return resultCandidate; - } else { - throw new NoSuchElementException(); - } - } - } - - /* - * (non-Javadoc) - * - * @see eu.dnetlib.dhp.exp.iterator.ClosableIterator#close() - */ - @Override - public void close() throws IOException { - if (sequenceReader != null) { - sequenceReader.close(); - } - } - - //------------------------ PRIVATE ------------------------------- - - private final Reader getNextSequenceReader() throws IOException { - while (fileIt != null && fileIt.hasNext()) { - LocatedFileStatus currentFileStatus = fileIt.next(); - if (isValidFile(currentFileStatus)) { - return new Reader(this.fs.getConf(), SequenceFile.Reader.file(currentFileStatus.getPath())); - } - } - // fallback - return null; - } - - /** - * Checks whether file is valid candidate. - * - * @param fileStatus - * file status holding file name - * @return true when valid, false otherwise - */ - private final boolean isValidFile(LocatedFileStatus fileStatus) { - if (fileStatus.isFile()) { - return WHITELIST_REGEXP.matcher(fileStatus.getPath().getName()).matches(); - } else { - return false; - } - } - - /** - * @return next data package - */ - private Text getNext() { - try { - if (sequenceReader == null) { - return null; - } - Writable key = (Writable) ReflectionUtils.newInstance(sequenceReader.getKeyClass(), fs.getConf()); - Writable value = (Writable) ReflectionUtils.newInstance(sequenceReader.getValueClass(), fs.getConf()); - if (sequenceReader.next(key, value)) { - return (Text) value; - } else { - sequenceReader.close(); - sequenceReader = getNextSequenceReader(); - if (sequenceReader != null) { - return getNext(); - } - } - // fallback - return null; - } catch (IOException e) { - throw new RuntimeException(e); - } - } -} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/jsonworkflownodes/ClassPathResourceToHdfsCopier.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/jsonworkflownodes/ClassPathResourceToHdfsCopier.java deleted file mode 100644 index 00a071ac9..000000000 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/jsonworkflownodes/ClassPathResourceToHdfsCopier.java +++ /dev/null @@ -1,54 +0,0 @@ -package eu.dnetlib.dhp.common.java.jsonworkflownodes; - -import java.io.InputStream; -import java.io.OutputStream; -import java.util.HashMap; -import java.util.Map; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.io.IOUtils; - -import com.google.common.base.Preconditions; - -import eu.dnetlib.dhp.common.java.PortBindings; -import eu.dnetlib.dhp.common.java.Process; -import eu.dnetlib.dhp.common.java.porttype.PortType; - -/** - * Utility class responsible for copying resources available on classpath to specified HDFS location. - * @author mhorst - * - */ -public class ClassPathResourceToHdfsCopier implements Process { - - private static final String PARAM_INPUT_CLASSPATH_RESOURCE = "inputClasspathResource"; - - private static final String PARAM_OUTPUT_HDFS_FILE_LOCATION = "outputHdfsFileLocation"; - - @Override - public void run(PortBindings portBindings, Configuration conf, Map parameters) throws Exception { - Preconditions.checkNotNull(parameters.get(PARAM_INPUT_CLASSPATH_RESOURCE), PARAM_INPUT_CLASSPATH_RESOURCE + " parameter was not specified!"); - Preconditions.checkNotNull(parameters.get(PARAM_OUTPUT_HDFS_FILE_LOCATION), PARAM_OUTPUT_HDFS_FILE_LOCATION + " parameter was not specified!"); - - FileSystem fs = FileSystem.get(conf); - - try (InputStream in = Thread.currentThread().getContextClassLoader() - .getResourceAsStream(parameters.get(PARAM_INPUT_CLASSPATH_RESOURCE)); - OutputStream os = fs.create(new Path(parameters.get(PARAM_OUTPUT_HDFS_FILE_LOCATION)))) { - IOUtils.copyBytes(in, os, 4096, false); - } - } - - @Override - public Map getInputPorts() { - return new HashMap(); - } - - @Override - public Map getOutputPorts() { - return new HashMap(); - } - -} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/jsonworkflownodes/PortSpecifications.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/jsonworkflownodes/PortSpecifications.java deleted file mode 100644 index 9de33809a..000000000 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/jsonworkflownodes/PortSpecifications.java +++ /dev/null @@ -1,66 +0,0 @@ -package eu.dnetlib.dhp.common.java.jsonworkflownodes; - -import java.util.HashMap; -import java.util.Map; - -import org.apache.avro.Schema; - -import eu.dnetlib.dhp.common.java.jsonworkflownodes.StringPortSpecificationExtractor.PortSpecification; -import eu.dnetlib.dhp.common.java.porttype.AvroPortType; -import eu.dnetlib.dhp.common.java.porttype.PortType; -import eu.dnetlib.dhp.common.utils.AvroUtils; - -/** - * @author Mateusz Kobos - */ -public class PortSpecifications { - private static final String[] propertyRegexps = - new String[]{"[\\w\\.]+", "[\\w\\./_\\-]+"}; - private final Map specs; - - public static class SpecificationValues { - - private final Schema schema; - - private final String jsonFilePath; - - public SpecificationValues(Schema schema, String jsonFilePath) { - this.schema = schema; - this.jsonFilePath = jsonFilePath; - } - - public Schema getSchema() { - return schema; - } - - public String getJsonFilePath() { - return jsonFilePath; - } - - } - - public PortSpecifications(String[] portSpecifications){ - StringPortSpecificationExtractor portSpecExtractor = - new StringPortSpecificationExtractor(propertyRegexps); - specs = new HashMap(); - for(int i = 0; i < portSpecifications.length; i++){ - PortSpecification portSpec = portSpecExtractor.getSpecification(portSpecifications[i]); - Schema schema = AvroUtils.toSchema(portSpec.getProperties()[0]); - String jsonPath = portSpec.getProperties()[1]; - specs.put(portSpec.getName(), new SpecificationValues(schema, jsonPath)); - } - } - - public SpecificationValues get(String portName){ - return specs.get(portName); - } - - public Map getPortTypes(){ - Map ports = new HashMap(); - for(Map.Entry e: specs.entrySet()){ - Schema schema = e.getValue().schema; - ports.put(e.getKey(), new AvroPortType(schema)); - } - return ports; - } -} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/jsonworkflownodes/StringPortSpecificationExtractor.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/jsonworkflownodes/StringPortSpecificationExtractor.java deleted file mode 100644 index 0b10b6805..000000000 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/jsonworkflownodes/StringPortSpecificationExtractor.java +++ /dev/null @@ -1,89 +0,0 @@ -package eu.dnetlib.dhp.common.java.jsonworkflownodes; - -import java.util.regex.Matcher; -import java.util.regex.Pattern; - -/** - * Extracts information about port name and its properties from a string - * of a form "{port_name, property_1, property_2, ...}" - * @author Mateusz Kobos - */ -public class StringPortSpecificationExtractor { - private final String[] propertiesRegexp; - private final String portSpecificationRegexp; - private final Pattern pattern; - - public static class PortSpecification { - - private final String name; - - private final String[] properties; - - public PortSpecification(String name, String[] properties) { - this.name = name; - this.properties = properties; - } - - public String getName() { - return name; - } - - public String[] getProperties() { - return properties; - } - } - - /** - * @param propertiesRegexp regular expressions specifying pattern for - * each of the properties associated with a port. An example of a single - * specification: {@code "[\\w\\.]+"}. - */ - public StringPortSpecificationExtractor(String[] propertiesRegexp){ - this.propertiesRegexp = propertiesRegexp; - this.portSpecificationRegexp = createRegexpString("[\\w\\._]+", propertiesRegexp); - this.pattern = Pattern.compile(this.portSpecificationRegexp); - } - - private static String createRegexpString(String portNameRegexp, String[] propertiesRegexp){ - StringBuilder regexp = new StringBuilder(); - regexp.append("s*\\{\\s*"); - regexp.append("("+portNameRegexp+")"); - for(String propertyRegexp: propertiesRegexp){ - regexp.append(",\\s*("+propertyRegexp+")"); - } - regexp.append("\\s*\\}\\s*"); - return regexp.toString(); - } - - private int getPropertiesCount(){ - return propertiesRegexp.length; - } - - public PortSpecification getSpecification(String text){ - Matcher m = pattern.matcher(text); - if(!m.matches()){ - throw new RuntimeException(String.format("Specification of " + - "the port (\"%s\") does not match regexp \"%s\"", - text, portSpecificationRegexp)); - } - final int expectedGroupsCount = getPropertiesCount()+1; - if(m.groupCount() != expectedGroupsCount){ - StringBuilder groups = new StringBuilder(); - for(int i = 0; i < m.groupCount(); i++){ - groups.append("\""+m.group(i)+"\""); - if(i != m.groupCount()-1) { - groups.append(", "); - } - } - throw new RuntimeException(String.format( - "Invalid output port specification \"%s\": got %d groups "+ - "instead of %d (namely: %s)", text, m.groupCount(), - expectedGroupsCount, groups.toString())); - } - String[] properties = new String[getPropertiesCount()]; - for(int i = 0; i < getPropertiesCount(); i++){ - properties[i] = m.group(i+2); - } - return new PortSpecification(m.group(1), properties); - } -} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/porttype/AnyPortType.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/porttype/AnyPortType.java deleted file mode 100644 index 57bda6ed7..000000000 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/porttype/AnyPortType.java +++ /dev/null @@ -1,20 +0,0 @@ -package eu.dnetlib.dhp.common.java.porttype; - -/** - * A port type that accepts any type of data - * @author Mateusz Kobos - * - */ -public class AnyPortType implements PortType { - - @Override - public String getName() { - return "Any"; - } - - @Override - public boolean accepts(PortType other) { - return true; - } - -} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/porttype/AvroPortType.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/porttype/AvroPortType.java deleted file mode 100644 index 47a57164e..000000000 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/porttype/AvroPortType.java +++ /dev/null @@ -1,65 +0,0 @@ -package eu.dnetlib.dhp.common.java.porttype; - -import org.apache.avro.Schema; -import org.apache.commons.lang.NotImplementedException; - -/** - * This port type accepts data stores in a format of Avro - * Object Container Files, i.e. Avro data files. - * This kind of file corresponds to a list of objects, each one being of the - * same type, i.e. each one is defined by the same Avro schema. - * @author Mateusz Kobos - */ -public class AvroPortType implements PortType { - - private final Schema schema; - - - public AvroPortType(Schema schema) { - this.schema = schema; - } - - @Override - public String getName() { - return schema.getFullName(); - } - - @Override - /** Simple check if the port types are exactly the same - * (as defined by the {@code equals} method). - * - * TODO: this should work in a more relaxed way - - * {@code this.accepts(other)} should be true if {@code this} - * describes a subset of structures defined in {@code other}. To be - * more precise: the JSON schema tree tree defined by {@code this} should - * form a sub-tree of the JSON schema tree defined by {@code other}. */ - public boolean accepts(PortType other) { - return this.equals(other); - } - - /** - * Two patterns are equal if their schemas are the same. - */ - @Override - public boolean equals(Object o){ - if(!(o instanceof AvroPortType)){ - return false; - } - AvroPortType other = (AvroPortType) o; - return this.schema.equals(other.schema); - } - - @Override - public int hashCode(){ - throw new NotImplementedException(); - } - - /** - * Returns avro schema. - * @return avro schema - */ - public Schema getSchema() { - return schema; - } - -} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/porttype/PortType.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/porttype/PortType.java deleted file mode 100644 index 5ae2d48c2..000000000 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/porttype/PortType.java +++ /dev/null @@ -1,33 +0,0 @@ -package eu.dnetlib.dhp.common.java.porttype; - -/** - * Type of the port. This is used to specify what kind of data is - * accepted on a certain input port or produced on a certain output port - * of a workflow node. - * - * @author Mateusz Kobos - * - */ -public interface PortType { - - String getName(); - - /** - * This should be used to check whether data produced by a workflow node - * conforms to the data consumed by other workflow node. - * In a scenario when A produces certain data on a port p and B consumes - * this data on a port q, type(q).accepts(type(p)) has to be true. - * - * @return {@code true} if {@code this} port type is a more general - * version of the {@code other} port type, - * or as an alternative explanation: {@code other} is a subset of - * {@code this}, i.e. {@code other} has at least all the properties present - * in {@code this} (and possibly some others). This is analogous to a - * situation in object-oriented programming, where in order for assignment - * operation {@code this = other} to work, the type of {@code this} has to - * accept type of {@code other}, or in other words {@code other} has to - * inherit from {@code this}, or in yet other words: {@code other} has to - * conform to {@code this}. - */ - boolean accepts(PortType other); -} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/lock/LockManagingProcess.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/lock/LockManagingProcess.java deleted file mode 100644 index d5d38fe29..000000000 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/lock/LockManagingProcess.java +++ /dev/null @@ -1,149 +0,0 @@ -package eu.dnetlib.dhp.common.lock; - -import java.security.InvalidParameterException; -import java.util.Collections; -import java.util.Map; -import java.util.concurrent.Semaphore; - -import org.apache.commons.lang.StringUtils; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.ha.ZKFailoverController; -import org.apache.log4j.Logger; -import org.apache.zookeeper.CreateMode; -import org.apache.zookeeper.KeeperException; -import org.apache.zookeeper.Watcher.Event; -import org.apache.zookeeper.ZooDefs; -import org.apache.zookeeper.ZooKeeper; - -import com.google.common.base.Preconditions; -import com.google.common.base.Stopwatch; - -import eu.dnetlib.dhp.common.java.PortBindings; -import eu.dnetlib.dhp.common.java.porttype.PortType; - -/** - * Zookeeper lock managing process. Blocks until lock is released. - * - * @author mhorst - * - */ -public class LockManagingProcess implements eu.dnetlib.dhp.common.java.Process { - - public static final String DEFAULT_ROOT_NODE = "/cache"; - - public static final String NODE_SEPARATOR = "/"; - - public static final String PARAM_ZK_SESSION_TIMEOUT = "zk_session_timeout"; - - public static final String PARAM_NODE_ID = "node_id"; - - public static final String PARAM_LOCK_MODE = "mode"; - - public static enum LockMode { - obtain, - release - } - - public static final int DEFAULT_SESSION_TIMEOUT = 60000; - - public static final Logger log = Logger.getLogger(LockManagingProcess.class); - - @Override - public Map getInputPorts() { - return Collections.emptyMap(); - } - - @Override - public Map getOutputPorts() { - return Collections.emptyMap(); - } - - @Override - public void run(PortBindings portBindings, Configuration conf, - Map parameters) throws Exception { - - Preconditions.checkArgument(parameters.containsKey(PARAM_NODE_ID), "node id not provided!"); - Preconditions.checkArgument(parameters.containsKey(PARAM_LOCK_MODE), "lock mode not provided!"); - - String zkConnectionString = conf.get(ZKFailoverController.ZK_QUORUM_KEY); - Preconditions.checkArgument(StringUtils.isNotBlank(zkConnectionString), - "zookeeper quorum is unknown, invalid '%s' property value: %s", ZKFailoverController.ZK_QUORUM_KEY, zkConnectionString); - - int sessionTimeout = parameters.containsKey(PARAM_ZK_SESSION_TIMEOUT)? - Integer.valueOf(parameters.get(PARAM_ZK_SESSION_TIMEOUT)) : DEFAULT_SESSION_TIMEOUT; - - final ZooKeeper zooKeeper = new ZooKeeper(zkConnectionString, sessionTimeout, (e) -> { - // we are not interested in generic events - }); - -// initializing root node if does not exist - if (zooKeeper.exists(DEFAULT_ROOT_NODE, false) == null) { - log.info("initializing root node: " + DEFAULT_ROOT_NODE); - zooKeeper.create(DEFAULT_ROOT_NODE, new byte[0], ZooDefs.Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT); - log.info("root node initialized"); - } - - final String nodePath = generatePath(parameters.get(PARAM_NODE_ID), DEFAULT_ROOT_NODE); - - final Semaphore semaphore = new Semaphore(1); - semaphore.acquire(); - - switch(LockMode.valueOf(parameters.get(PARAM_LOCK_MODE))) { - case obtain: { - obtain(zooKeeper, nodePath, semaphore); - break; - } - case release: { - release(zooKeeper, nodePath); - break; - } - default: { - throw new InvalidParameterException("unsupported lock mode: " + parameters.get(PARAM_LOCK_MODE)); - } - } - } - - // ------------------------- PRIVATE -------------------------- - - private void obtain(final ZooKeeper zooKeeper, final String nodePath, final Semaphore semaphore) throws KeeperException, InterruptedException { - log.info("trying to obtain lock: " + nodePath); - if (zooKeeper.exists(nodePath, (event) -> { - if (Event.EventType.NodeDeleted == event.getType()) { - try { - log.info(nodePath + " lock release detected"); - log.info("creating new lock instance: " + nodePath + "..."); - zooKeeper.create(nodePath, new byte[0], ZooDefs.Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT); - log.info("lock" + nodePath + " created"); - semaphore.release(); - } catch (KeeperException e) { - throw new RuntimeException(e); - } catch (InterruptedException e) { - throw new RuntimeException(e); - } - } - }) == null) { - log.info("lock not found, creating new lock instance: " + nodePath); - zooKeeper.create(nodePath, new byte[0], ZooDefs.Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT); - log.info("lock" + nodePath + " created"); - semaphore.release(); - } else { - // waiting until node is removed by other lock manager - log.info("waiting until lock is released"); - Stopwatch timer = new Stopwatch().start(); - semaphore.acquire(); - log.info("lock released, waited for " + timer.elapsedMillis() + " ms"); - semaphore.release(); - } - } - - private void release(final ZooKeeper zooKeeper, final String nodePath) throws InterruptedException, KeeperException { - log.info("removing lock" + nodePath + "..."); - zooKeeper.delete(nodePath, -1); - log.info("lock" + nodePath + " removed"); - } - - private static final String generatePath(String nodeId, String rootNode) { - return rootNode + NODE_SEPARATOR + nodeId.replace('/', '_'); - } - -} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/oozie/OozieClientFactory.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/oozie/OozieClientFactory.java deleted file mode 100644 index 71599277a..000000000 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/oozie/OozieClientFactory.java +++ /dev/null @@ -1,24 +0,0 @@ -package eu.dnetlib.dhp.common.oozie; - -import org.apache.oozie.client.OozieClient; - -/** - * Factory of {@link OozieClient} - * - * @author madryk - */ -public class OozieClientFactory { - - - //------------------------ LOGIC -------------------------- - - /** - * Returns {@link OozieClient} object used for communication with oozie - */ - public OozieClient createOozieClient(String oozieUrl) { - - OozieClient oozieClient = new OozieClient(oozieUrl); - - return oozieClient; - } -} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/oozie/property/ConditionalPropertySetter.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/oozie/property/ConditionalPropertySetter.java deleted file mode 100644 index 6b7fe2975..000000000 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/oozie/property/ConditionalPropertySetter.java +++ /dev/null @@ -1,76 +0,0 @@ -package eu.dnetlib.dhp.common.oozie.property; - -import java.io.File; -import java.io.FileOutputStream; -import java.io.OutputStream; -import java.util.Collections; -import java.util.Map; -import java.util.Properties; - -import eu.dnetlib.dhp.common.java.PortBindings; -import eu.dnetlib.dhp.common.java.Process; -import eu.dnetlib.dhp.common.java.porttype.PortType; -import org.apache.hadoop.conf.Configuration; - -import static eu.dnetlib.dhp.common.WorkflowRuntimeParameters.OOZIE_ACTION_OUTPUT_FILENAME; - -/** - * This process is a solution for setting dynamic properties in oozie workflow definition. - * - * Expects three parameters to be provided: the first 'condition' parameter is boolean value - * based on which either first 'inCaseOfTrue' or second 'elseCase' parameter value is set as - * the 'result' property. - * - * This can be understood as the: - * - * condition ? inCaseOfTrue : elseCase - * - * java syntax equivalent. - * - * @author mhorst - * - */ -public class ConditionalPropertySetter implements Process { - - public static final String PARAM_CONDITION = "condition"; - public static final String PARAM_INCASEOFTRUE = "inCaseOfTrue"; - public static final String PARAM_ELSECASE = "elseCase"; - - public static final String OUTPUT_PROPERTY_RESULT = "result"; - - @Override - public Map getInputPorts() { - return Collections.emptyMap(); - } - - @Override - public Map getOutputPorts() { - return Collections.emptyMap(); - } - - @Override - public void run(PortBindings portBindings, Configuration conf, - Map parameters) throws Exception { - - String condition = parameters.get(PARAM_CONDITION); - if (condition == null) { - throw new RuntimeException("unable to make decision: " + - PARAM_CONDITION + " parameter was not set!"); - } - - Properties props = new Properties(); - props.setProperty(OUTPUT_PROPERTY_RESULT, - Boolean.parseBoolean(condition)? - parameters.get(PARAM_INCASEOFTRUE): - parameters.get(PARAM_ELSECASE)); - OutputStream os = new FileOutputStream( - new File(System.getProperty(OOZIE_ACTION_OUTPUT_FILENAME))); - try { - props.store(os, ""); - } finally { - os.close(); - } - - } - -} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/protobuf/AvroToProtoBufConverter.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/protobuf/AvroToProtoBufConverter.java deleted file mode 100644 index 78da47f12..000000000 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/protobuf/AvroToProtoBufConverter.java +++ /dev/null @@ -1,12 +0,0 @@ -package eu.dnetlib.dhp.common.protobuf; - -import com.google.protobuf.Message; -import org.apache.avro.generic.IndexedRecord; - -/** - * @author Mateusz Fedoryszak (m.fedoryszak@icm.edu.pl) - */ -public interface AvroToProtoBufConverter { - String convertIntoKey(IN datum); - OUT convertIntoValue(IN datum); -} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/protobuf/AvroToProtoBufOneToOneMapper.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/protobuf/AvroToProtoBufOneToOneMapper.java deleted file mode 100644 index 00a318aff..000000000 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/protobuf/AvroToProtoBufOneToOneMapper.java +++ /dev/null @@ -1,62 +0,0 @@ -package eu.dnetlib.dhp.common.protobuf; - -import com.google.protobuf.Message; -import org.apache.avro.generic.IndexedRecord; -import org.apache.avro.mapred.AvroKey; -import org.apache.hadoop.io.BytesWritable; -import org.apache.hadoop.io.NullWritable; -import org.apache.hadoop.io.Text; -import org.apache.hadoop.mapreduce.Mapper; -import org.apache.log4j.Logger; - -import java.io.IOException; - -/** - * @author Mateusz Fedoryszak (m.fedoryszak@icm.edu.pl) - */ -public class AvroToProtoBufOneToOneMapper - extends Mapper, NullWritable, Text, BytesWritable> { - private static final String CONVERTER_CLASS_PROPERTY = "converter_class"; - private static final Logger log = Logger.getLogger(AvroToProtoBufOneToOneMapper.class); - - private final Text keyWritable = new Text(); - private final BytesWritable valueWritable = new BytesWritable(); - private AvroToProtoBufConverter converter; - - @SuppressWarnings("unchecked") - @Override - public void setup(Context context) throws IOException, InterruptedException { - Class converterClass = context.getConfiguration().getClass(CONVERTER_CLASS_PROPERTY, null); - - if (converterClass == null) { - throw new IOException("Please specify " + CONVERTER_CLASS_PROPERTY); - } - - try { - converter = (AvroToProtoBufConverter) converterClass.newInstance(); - } catch (ClassCastException e) { - throw new IOException( - "Class specified in " + CONVERTER_CLASS_PROPERTY + " doesn't implement AvroToProtoBufConverter", e); - } catch (Exception e) { - throw new IOException( - "Could not instantiate specified AvroToProtoBufConverter class, " + converterClass, e); - } - } - - @Override - public void map(AvroKey avro, NullWritable ignore, Context context) - throws IOException, InterruptedException { - String key = null; - try { - key = converter.convertIntoKey(avro.datum()); - keyWritable.set(key); - - byte[] value = converter.convertIntoValue(avro.datum()).toByteArray(); - valueWritable.set(value, 0, value.length); - - context.write(keyWritable, valueWritable); - } catch (Exception e) { - log.error("Error" + (key != null ? " while processing " + key : ""), e); - } - } -} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/report/ReportEntryFactory.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/report/ReportEntryFactory.java deleted file mode 100644 index 3c301fa8d..000000000 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/report/ReportEntryFactory.java +++ /dev/null @@ -1,32 +0,0 @@ -package eu.dnetlib.dhp.common.report; - -import eu.dnetlib.dhp.common.schemas.ReportEntry; -import eu.dnetlib.dhp.common.schemas.ReportEntryType; - -/** - * Factory of {@link ReportEntry} objects. - * - * @author madryk - */ -public final class ReportEntryFactory { - - // ----------------------- CONSTRUCTORS ----------------------------- - - private ReportEntryFactory() {} - - // ----------------------- LOGIC ------------------------------------ - - /** - * Creates {@link ReportEntry} with {@link ReportEntryType#COUNTER} type - */ - public static ReportEntry createCounterReportEntry(String key, long count) { - return new ReportEntry(key, ReportEntryType.COUNTER, String.valueOf(count)); - } - - /** - * Creates {@link ReportEntry} with {@link ReportEntryType#DURATION} type - */ - public static ReportEntry createDurationReportEntry(String key, long duration) { - return new ReportEntry(key, ReportEntryType.DURATION, String.valueOf(duration)); - } -} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/report/ReportGenerator.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/report/ReportGenerator.java deleted file mode 100644 index 1ed3dfb08..000000000 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/report/ReportGenerator.java +++ /dev/null @@ -1,110 +0,0 @@ -package eu.dnetlib.dhp.common.report; - -import java.util.Collections; -import java.util.List; -import java.util.Map; -import java.util.stream.Collectors; - -import org.apache.commons.lang3.tuple.Pair; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; - -import com.google.common.collect.Lists; - -import eu.dnetlib.dhp.common.java.PortBindings; -import eu.dnetlib.dhp.common.java.Process; -import eu.dnetlib.dhp.common.java.io.DataStore; -import eu.dnetlib.dhp.common.java.io.FileSystemPath; -import eu.dnetlib.dhp.common.java.porttype.AvroPortType; -import eu.dnetlib.dhp.common.java.porttype.PortType; -import eu.dnetlib.dhp.common.schemas.ReportEntry; - -/** - * Java workflow node process for building report.
- * It writes report properties into avro datastore of {@link ReportEntry}s - * with location specified in output port.
- * Report property name must start with report. to - * be included in output datastore. - * - * Usage example:
- *
- * {@code
- * 
- *     
- *         eu.dnetlib.dhp.common.java.ProcessWrapper
- *         eu.dnetlib.dhp.common.report.ReportGenerator
- *         -Preport.someProperty=someValue
- *         -Oreport=/report/path
- *     
- *     ...
- * 
- * }
- * 
- * Above example will produce avro datastore in /report/path - * with single {@link ReportEntry}. - * Where the {@link ReportEntry#getKey()} will be equal to someProperty and - * the {@link ReportEntry#getValue()} will be equal to someValue - * (notice the stripped report. prefix from the entry key). - * - * - * @author madryk - * - */ -public class ReportGenerator implements Process { - - private static final String REPORT_PORT_OUT_NAME = "report"; - - private static final String REPORT_PROPERTY_PREFIX = "report."; - - - //------------------------ LOGIC -------------------------- - - @Override - public Map getInputPorts() { - return Collections.emptyMap(); - } - - @Override - public Map getOutputPorts() { - return Collections.singletonMap(REPORT_PORT_OUT_NAME, new AvroPortType(ReportEntry.SCHEMA$)); - } - - @Override - public void run(PortBindings portBindings, Configuration conf, Map parameters) throws Exception { - - Map entriesToReport = collectEntriesToReport(parameters); - - List avroReport = convertToAvroReport(entriesToReport); - - - FileSystem fs = FileSystem.get(conf); - - Path reportPath = portBindings.getOutput().get(REPORT_PORT_OUT_NAME); - - DataStore.create(avroReport, new FileSystemPath(fs, reportPath)); - - } - - - //------------------------ PRIVATE -------------------------- - - private Map collectEntriesToReport(Map parameters) { - - return parameters.entrySet().stream() - .filter(property -> property.getKey().startsWith(REPORT_PROPERTY_PREFIX)) - .map(x -> Pair.of(x.getKey().substring(REPORT_PROPERTY_PREFIX.length()), x.getValue())) - .collect(Collectors.toMap(e -> e.getLeft(), e -> e.getRight())); - - } - - private List convertToAvroReport(Map entriesToReport) { - - List avroReport = Lists.newArrayList(); - entriesToReport.forEach((key, value) -> avroReport.add(ReportEntryFactory.createCounterReportEntry(key, Long.valueOf(value)))); - - return avroReport; - } - - -} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/spark/pipe/SparkPipeExecutor.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/spark/pipe/SparkPipeExecutor.java deleted file mode 100644 index 33b7f788c..000000000 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/spark/pipe/SparkPipeExecutor.java +++ /dev/null @@ -1,74 +0,0 @@ -package eu.dnetlib.dhp.common.spark.pipe; - -import java.io.Serializable; - -import org.apache.avro.generic.GenericRecord; -import org.apache.avro.mapred.AvroKey; -import org.apache.hadoop.io.NullWritable; -import org.apache.spark.SparkFiles; -import org.apache.spark.api.java.JavaPairRDD; -import org.apache.spark.api.java.JavaRDD; - -import eu.dnetlib.dhp.common.utils.AvroGsonFactory; -import scala.Tuple2; - - -/** - * Executor of mapreduce scripts using spark pipes. - * It imitates hadoop streaming behavior. - * - * @author madryk - * - */ -public class SparkPipeExecutor implements Serializable { - - private static final long serialVersionUID = 1L; - - - //------------------------ LOGIC -------------------------- - - /** - * Imitates map part of hadoop streaming job. - * It executes provided script for every key in inputRecords rdd. - *

- * It is assumed that provided script will read records from standard input (one line for one record) - * and write mapped record into standard output (also one line for one record). - * Mapped record can be a key/value pair. In that case script should return key and value - * splitted by tab (\t) character in single line. - */ - public JavaPairRDD doMap(JavaPairRDD, NullWritable> inputRecords, String scriptName, String args) { - - JavaRDD mappedRecords = inputRecords.keys().pipe("python " + SparkFiles.get(scriptName) + " " + args); - - JavaPairRDD outputRecords = mappedRecords - .mapToPair(line -> { - String[] splittedPair = line.split("\t"); - return new Tuple2(splittedPair[0], (splittedPair.length == 1) ? null : splittedPair[1]); - }); - - return outputRecords; - } - - /** - * Imitates reduce part of hadoop streaming job. - *

- * It is assumed that provided script will read records from standard input (one line for one record) - * and group records with the same key into single record (reduce). - * Method assures that all input records with the same key will be transfered in adjacent lines. - * Reduced records should be written by script into standard output (one line for one record). - * Reduced records must be json strings of class provided as argument. - */ - public JavaPairRDD, NullWritable> doReduce(JavaPairRDD inputRecords, String scriptName, String args, Class outputClass) { - - JavaRDD reducedRecords = inputRecords.sortByKey() - .map(record -> record._1 + ((record._2 == null) ? "" : ("\t" + record._2))) - .pipe("python " + SparkFiles.get(scriptName) + " " + args); - - JavaPairRDD, NullWritable> outputRecords = reducedRecords - .map(recordString -> AvroGsonFactory.create().fromJson(recordString, outputClass)) - .mapToPair(record -> new Tuple2, NullWritable>(new AvroKey<>(record), NullWritable.get())); - - return outputRecords; - } - -} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/string/CharSequenceUtils.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/string/CharSequenceUtils.java deleted file mode 100644 index 5889cf57a..000000000 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/string/CharSequenceUtils.java +++ /dev/null @@ -1,31 +0,0 @@ -package eu.dnetlib.dhp.common.string; - -/** - * Operations on {@link CharSequence} - * - * @author Łukasz Dumiszewski -*/ - -public final class CharSequenceUtils { - - - //------------------------ CONSTRUCTORS -------------------------- - - private CharSequenceUtils() { - throw new IllegalStateException("may not be initialized"); - } - - - //------------------------ LOGIC -------------------------- - - /** - * Converts the given {@link CharSequence} value to {@link String} by using {@link CharSequence#toString()}. - * Returns empty string if value is null. - */ - public static String toStringWithNullToEmpty(CharSequence value) { - - return value == null? "": value.toString(); - - } - -} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/string/DiacriticsRemover.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/string/DiacriticsRemover.java deleted file mode 100644 index b69ed3419..000000000 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/string/DiacriticsRemover.java +++ /dev/null @@ -1,113 +0,0 @@ -/* - * This file is part of CoAnSys project. - * Copyright (c) 2012-2015 ICM-UW - * - * CoAnSys is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - - * CoAnSys is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Affero General Public License for more details. - * - * You should have received a copy of the GNU Affero General Public License - * along with CoAnSys. If not, see . - */ - -package eu.dnetlib.dhp.common.string; - -import java.text.Normalizer; -import java.util.HashMap; -import java.util.Map; - -/** - * Mapping to the basic Latin alphabet (a-z, A-Z). In most cases, a character is - * mapped to the closest visual form, rather than functional one, e.g.: "ö" is - * mapped to "o" rather than "oe", and "đ" is mapped to "d" rather than "dj" or - * "gj". Notable exceptions include: "ĸ" mapped to "q", "ß" mapped to "ss", and - * "Þ", "þ" mapped to "Y", "y". - * - *

Each character is processed as follows:

  1. the character is - * compatibility decomposed,
  2. all the combining marks are removed,
  3. - *
  4. the character is compatibility composed,
  5. additional "manual" - * substitutions are applied.

- * - *

All the characters from the "Latin-1 Supplement" and "Latin Extended-A" - * Unicode blocks are mapped to the "Basic Latin" block. Characters from other - * alphabets are generally left intact, although the decomposable ones may be - * affected by the procedure.

- * - * @author Lukasz Bolikowski (bolo@icm.edu.pl) - * - * @author Łukasz Dumiszewski /just copied from coansys-commons/ - * - */ -public final class DiacriticsRemover { - - private static final Character[] from = { - 'Æ', 'Ð', 'Ø', 'Þ', 'ß', 'æ', 'ð', 'ø', 'þ', 'Đ', 'đ', 'Ħ', - 'ħ', 'ı', 'ĸ', 'Ł', 'ł', 'Ŋ', 'ŋ', 'Œ', 'œ', 'Ŧ', 'ŧ'}; - private static final String[] to = { - "AE", "D", "O", "Y", "ss", "ae", "d", "o", "y", "D", "d", "H", - "h", "i", "q", "L", "l", "N", "n", "OE", "oe", "T", "t"}; - - private static Map lookup = buildLookup(); - - - //------------------------ CONSTRUCTORS ------------------- - - - private DiacriticsRemover() {} - - - //------------------------ LOGIC -------------------------- - - - /** - * Removes diacritics from a text. - * - * @param text Text to process. - * @return Text without diacritics. - */ - public static String removeDiacritics(String text) { - if (text == null) { - return null; - } - - String tmp = Normalizer.normalize(text, Normalizer.Form.NFKD); - - StringBuilder builder = new StringBuilder(); - for (int i = 0; i < tmp.length(); i++) { - Character ch = tmp.charAt(i); - if (Character.getType(ch) == Character.NON_SPACING_MARK) { - continue; - } - - if (lookup.containsKey(ch)) { - builder.append(lookup.get(ch)); - } else { - builder.append(ch); - } - } - - return builder.toString(); - } - - - //------------------------ PRIVATE -------------------------- - - private static Map buildLookup() { - if (from.length != to.length) { - throw new IllegalStateException(); - } - - Map _lookup = new HashMap(); - for (int i = 0; i < from.length; i++) { - _lookup.put(from[i], to[i]); - } - - return _lookup; - } -} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/string/LenientComparisonStringNormalizer.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/string/LenientComparisonStringNormalizer.java deleted file mode 100644 index bae64ae38..000000000 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/string/LenientComparisonStringNormalizer.java +++ /dev/null @@ -1,130 +0,0 @@ -/* - * This file is part of CoAnSys project. - * Copyright (c) 2012-2015 ICM-UW - * - * CoAnSys is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - - * CoAnSys is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Affero General Public License for more details. - * - * You should have received a copy of the GNU Affero General Public License - * along with CoAnSys. If not, see . - */ -package eu.dnetlib.dhp.common.string; - -import java.io.Serializable; -import java.util.List; - -import org.apache.commons.lang3.StringUtils; - -import com.google.common.collect.ImmutableList; - -/** - * An implementation of {@link StringNormalizer} that normalizes strings for non-strict comparisons - * in which one does not care about characters other than letters and digits or about differently written diacritics. - * - * @author Łukasz Dumiszewski - * - */ -public final class LenientComparisonStringNormalizer implements StringNormalizer, Serializable { - - - private static final long serialVersionUID = 1L; - - - private List whitelistCharacters; - - - //------------------------ CONSTRUCTORS -------------------------- - - public LenientComparisonStringNormalizer() { - this(ImmutableList.of()); - } - - /** - * @param whitelistCharacters - non alphanumeric characters that will not be removed - * during normalization - */ - public LenientComparisonStringNormalizer(List whitelistCharacters) { - this.whitelistCharacters = whitelistCharacters; - } - - - //------------------------ LOGIC -------------------------- - - - - /** - * Normalizes the given value.
- * The normalized strings are better suited for non-strict comparisons, in which one does NOT care about characters that are - * neither letters nor digits; about accidental spaces or different diacritics etc.

- * This method: - *
    - *
  • Replaces all characters that are not letters or digits with spaces (except those on whitelist characters list)
  • - *
  • Replaces white spaces with spaces
  • - *
  • Trims
  • - *
  • Compacts multi-space gaps to one-space gaps
  • - *
  • Removes diacritics
  • - *
  • Changes characters to lower case
  • - *
- * Returns "" if the passed value is null or blank - * - * @param value the string to normalize - * @see DiacriticsRemover#removeDiacritics(String, boolean) - * - * - */ - public String normalize(String value) { - - if (StringUtils.isBlank(value)) { - - return ""; - - } - - - String result = value; - - result = DiacriticsRemover.removeDiacritics(result); - - result = removeNonLetterDigitCharacters(result); - - result = result.toLowerCase(); - - result = result.trim().replaceAll(" +", " "); - - return result; - } - - - - - //------------------------ PRIVATE -------------------------- - - - private String removeNonLetterDigitCharacters(final String value) { - - StringBuilder sb = new StringBuilder(); - - for (int i = 0; i < value.length(); ++i) { - - char c = value.charAt(i); - - if (Character.isLetterOrDigit(c) || whitelistCharacters.contains(c)) { - sb.append(c); - } else { - sb.append(' '); - } - } - - return sb.toString(); - } - - - -} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/string/StringNormalizer.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/string/StringNormalizer.java deleted file mode 100644 index 6e28422bc..000000000 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/string/StringNormalizer.java +++ /dev/null @@ -1,16 +0,0 @@ -package eu.dnetlib.dhp.common.string; - -/** - * String normalizer. - * - * @author Łukasz Dumiszewski - * - */ -public interface StringNormalizer { - - /** - * Normalizes the given string value. - */ - String normalize(String value); - -} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/utils/AvroGsonFactory.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/utils/AvroGsonFactory.java deleted file mode 100644 index 7fcc0506a..000000000 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/utils/AvroGsonFactory.java +++ /dev/null @@ -1,45 +0,0 @@ -package eu.dnetlib.dhp.common.utils; - -import java.lang.reflect.Type; - -import com.google.gson.Gson; -import com.google.gson.GsonBuilder; -import com.google.gson.JsonDeserializationContext; -import com.google.gson.JsonDeserializer; -import com.google.gson.JsonElement; -import com.google.gson.JsonParseException; - -/** - * Factory for gson object that supports serializing avro generated classes - * - * @author madryk - * - */ -public final class AvroGsonFactory { - - //------------------------ CONSTRUCTORS ------------------- - - - private AvroGsonFactory() {} - - - //------------------------ LOGIC -------------------------- - - public static Gson create() { - GsonBuilder builder = new GsonBuilder(); - - builder.registerTypeAdapter(CharSequence.class, new CharSequenceDeserializer()); - - return builder.create(); - } - - public static class CharSequenceDeserializer implements JsonDeserializer { - - @Override - public CharSequence deserialize(JsonElement json, Type typeOfT, JsonDeserializationContext context) - throws JsonParseException { - return json.getAsString(); - } - - } -} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/utils/AvroUtils.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/utils/AvroUtils.java deleted file mode 100644 index 44dd218b5..000000000 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/utils/AvroUtils.java +++ /dev/null @@ -1,77 +0,0 @@ -package eu.dnetlib.dhp.common.utils; - -import java.lang.reflect.Field; - -import org.apache.avro.Schema; - -/** - * - * @author Mateusz Kobos - * - */ -public final class AvroUtils { - - public final static String primitiveTypePrefix = "org.apache.avro.Schema.Type."; - - - //------------------------ CONSTRUCTORS ------------------- - - - private AvroUtils() {} - - - //------------------------ LOGIC -------------------------- - - - /** - * For a given name of a class generated from Avro schema return - * a JSON schema. - * - * Apart from a name of a class you can also give a name of one of enums - * defined in {@link org.apache.avro.Schema.Type}; in such case an - * appropriate primitive type will be returned. - * - * @param typeName fully qualified name of a class generated from Avro schema, - * e.g. {@code eu.dnetlib.dhp.common.avro.Person}, - * or a fully qualified name of enum defined by - * {@link org.apache.avro.Schema.Type}, - * e.g. {@link org.apache.avro.Schema.Type.STRING}. - * @return JSON string - */ - public static Schema toSchema(String typeName) { - Schema schema = null; - if(typeName.startsWith(primitiveTypePrefix)){ - String shortName = typeName.substring( - primitiveTypePrefix.length(), typeName.length()); - schema = getPrimitiveTypeSchema(shortName); - } else { - schema = getAvroClassSchema(typeName); - } - return schema; - } - - private static Schema getPrimitiveTypeSchema(String shortName){ - Schema.Type type = Schema.Type.valueOf(shortName); - return Schema.create(type); - } - - private static Schema getAvroClassSchema(String className){ - try { - Class avroClass = Class.forName(className); - Field f = avroClass.getDeclaredField("SCHEMA$"); - return (Schema) f.get(null); - } catch (ClassNotFoundException e) { - throw new RuntimeException( - "Class \""+className+"\" does not exist", e); - } catch (SecurityException e) { - throw new RuntimeException(e); - } catch (NoSuchFieldException e) { - throw new RuntimeException(e); - } catch (IllegalArgumentException e) { - throw new RuntimeException(e); - } catch (IllegalAccessException e) { - throw new RuntimeException(e); - } - } - -} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/utils/ByteArrayUtils.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/utils/ByteArrayUtils.java deleted file mode 100644 index 152271ab7..000000000 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/utils/ByteArrayUtils.java +++ /dev/null @@ -1,45 +0,0 @@ -package eu.dnetlib.dhp.common.utils; - -/** - * Byte array utility class. - * @author mhorst - * - */ -public final class ByteArrayUtils { - - //------------------------ CONSTRUCTORS ------------------- - - private ByteArrayUtils() {} - - //------------------------ LOGIC -------------------------- - - /** - * Does this byte array begin with match array content? - * @param source Byte array to examine - * @param match Byte array to locate in source - * @return true If the starting bytes are equal - */ - public static boolean startsWith(byte[] source, byte[] match) { - return startsWith(source, 0, match); - } - - /** - * Does this byte array begin with match array content? - * @param source Byte array to examine - * @param offset An offset into the source array - * @param match Byte array to locate in source - * @return true If the starting bytes are equal - */ - public static boolean startsWith(byte[] source, int offset, byte[] match) { - if (match.length > (source.length - offset)) { - return false; - } - for (int i = 0; i < match.length; i++) { - if (source[offset + i] != match[i]) { - return false; - } - } - return true; - } - -} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/utils/EmptyDatastoreVerifierProcess.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/utils/EmptyDatastoreVerifierProcess.java deleted file mode 100644 index 1e6e04149..000000000 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/utils/EmptyDatastoreVerifierProcess.java +++ /dev/null @@ -1,89 +0,0 @@ -package eu.dnetlib.dhp.common.utils; - -import java.io.File; -import java.io.FileOutputStream; -import java.io.IOException; -import java.io.OutputStream; -import java.security.InvalidParameterException; -import java.util.Collections; -import java.util.HashMap; -import java.util.Map; -import java.util.Properties; - -import eu.dnetlib.dhp.common.java.PortBindings; -import eu.dnetlib.dhp.common.java.Ports; -import eu.dnetlib.dhp.common.java.Process; -import eu.dnetlib.dhp.common.java.io.CloseableIterator; -import eu.dnetlib.dhp.common.java.io.DataStore; -import eu.dnetlib.dhp.common.java.io.FileSystemPath; -import eu.dnetlib.dhp.common.java.porttype.AnyPortType; -import eu.dnetlib.dhp.common.java.porttype.PortType; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; - -import static eu.dnetlib.dhp.common.WorkflowRuntimeParameters.OOZIE_ACTION_OUTPUT_FILENAME; - -/** - * Simple process verifying whether given datastore is empty. - * @author mhorst - * - */ -public class EmptyDatastoreVerifierProcess implements Process { - - public static final String INPUT_PORT_NAME = "input"; - - public static final String DEFAULT_ENCODING = "UTF-8"; - - public static final String OUTPUT_PROPERTY_IS_EMPTY = "isEmpty"; - - /** - * Ports handled by this module. - */ - private final Ports ports; - - - // ------------------------ CONSTRUCTORS -------------------------- - - public EmptyDatastoreVerifierProcess() { -// preparing ports - Map input = new HashMap(); - input.put(INPUT_PORT_NAME, new AnyPortType()); - Map output = Collections.emptyMap(); - ports = new Ports(input, output); - } - - @Override - public Map getInputPorts() { - return ports.getInput(); - } - - @Override - public Map getOutputPorts() { - return ports.getOutput(); - } - - @Override - public void run(PortBindings portBindings, Configuration conf, Map parameters) throws Exception { - if (!portBindings.getInput().containsKey(INPUT_PORT_NAME)) { - throw new InvalidParameterException("missing input port!"); - } - - try (CloseableIterator closeableIt = getIterator(conf, portBindings.getInput().get(INPUT_PORT_NAME))) { - File file = new File(System.getProperty(OOZIE_ACTION_OUTPUT_FILENAME)); - Properties props = new Properties(); - props.setProperty(OUTPUT_PROPERTY_IS_EMPTY, Boolean.toString(!closeableIt.hasNext())); - try (OutputStream os = new FileOutputStream(file)) { - props.store(os, ""); - } - } - } - - /** - * Returns iterator over datastore. - */ - protected CloseableIterator getIterator(Configuration conf, Path path) throws IOException { - return DataStore.getReader(new FileSystemPath(FileSystem.get(conf), path)); - } - -} diff --git a/dhp-schemas/pom.xml b/dhp-schemas/pom.xml index 2c6e18f27..6ead99f42 100644 --- a/dhp-schemas/pom.xml +++ b/dhp-schemas/pom.xml @@ -13,50 +13,8 @@ dhp-schemas jar - - - org.apache.avro - avro - - - - - - - org.apache.avro - avro-maven-plugin - - - generate-sources - - schema - idl-protocol - - - - - - - org.codehaus.mojo - build-helper-maven-plugin - - - add-source - generate-sources - - add-source - - - - ${project.build.directory}/generated-sources/avro/ - - - - - - - + + diff --git a/dhp-schemas/src/main/avro/eu/dnetlib/dhp/audit/Fault.avdl b/dhp-schemas/src/main/avro/eu/dnetlib/dhp/audit/Fault.avdl deleted file mode 100644 index 3bce821a4..000000000 --- a/dhp-schemas/src/main/avro/eu/dnetlib/dhp/audit/Fault.avdl +++ /dev/null @@ -1,29 +0,0 @@ -@namespace("eu.dnetlib.dhp.audit.schemas") -protocol DHP { - - record Cause { -// generic cause code, root exception class name when derived from exception - string code; -// cause message - union { null , string } message = null; - } - - record Fault { -// input object identifier - string inputObjectId; -// fault creation timestamp - long timestamp; -// generic fault code, root exception class name when derived from exception - string code; -// fault message - union { null , string } message = null; -// stack trace - union { null , string } stackTrace = null; -// fault causes, array is indexed with cause depth - union { null , array } causes = null; -// Other supplementary data related to specific type of fault. -// See parameters description in oozie workflow.xml documentation of modules -// that use this structure for information what exactly can be stored as supplementary data. - union { null , map } supplementaryData = null; - } -} diff --git a/dhp-schemas/src/main/avro/eu/dnetlib/dhp/common/ReportEntry.avdl b/dhp-schemas/src/main/avro/eu/dnetlib/dhp/common/ReportEntry.avdl deleted file mode 100644 index 99406b4f0..000000000 --- a/dhp-schemas/src/main/avro/eu/dnetlib/dhp/common/ReportEntry.avdl +++ /dev/null @@ -1,16 +0,0 @@ -@namespace("eu.dnetlib.dhp.common.schemas") -protocol DHP{ - - enum ReportEntryType { - COUNTER, DURATION - } - - - record ReportEntry { - - string key; - ReportEntryType type; - string value; - - } -} diff --git a/dhp-schemas/src/main/avro/eu/dnetlib/dhp/importer/NativeRecord.avdl b/dhp-schemas/src/main/avro/eu/dnetlib/dhp/importer/NativeRecord.avdl deleted file mode 100644 index 9ad5435fa..000000000 --- a/dhp-schemas/src/main/avro/eu/dnetlib/dhp/importer/NativeRecord.avdl +++ /dev/null @@ -1,21 +0,0 @@ -@namespace("eu.dnetlib.dhp.importer.schemas") -protocol DHP { - - enum RecordFormat { - XML, JSON - } - - record ImportedRecord { - - // record identifier - string id; - - RecordFormat format; - - // format name (OAF, OAI_DC, Datacite, etc) for which there is a parser implementation - string formatName; - - // record body - string body; - } -} diff --git a/dhp-spark-jobs/pom.xml b/dhp-spark-jobs/pom.xml new file mode 100644 index 000000000..323fa1d59 --- /dev/null +++ b/dhp-spark-jobs/pom.xml @@ -0,0 +1,26 @@ + + + 4.0.0 + + + eu.dnetlib.dhp + dhp + 1.0.0-SNAPSHOT + + dhp-spark-jobs + + + org.apache.spark + spark-core_2.11 + + + + org.apache.spark + spark-sql_2.11 + + + + + diff --git a/dhp-spark-jobs/src/main/java/eu/dnetlib/collection/GenerateNativeStoreSparkJob.java b/dhp-spark-jobs/src/main/java/eu/dnetlib/collection/GenerateNativeStoreSparkJob.java new file mode 100644 index 000000000..d21c0a18e --- /dev/null +++ b/dhp-spark-jobs/src/main/java/eu/dnetlib/collection/GenerateNativeStoreSparkJob.java @@ -0,0 +1,79 @@ +package eu.dnetlib.collection; + +import org.apache.hadoop.io.IntWritable; +import org.apache.hadoop.io.Text; +import org.apache.spark.api.java.JavaPairRDD; +import org.apache.spark.api.java.JavaRDD; +import org.apache.spark.api.java.JavaSparkContext; +import org.apache.spark.api.java.function.Function; +import org.apache.spark.sql.*; +import org.apache.spark.sql.types.DataTypes; +import org.apache.spark.sql.types.StructField; +import org.apache.spark.sql.types.StructType; +import scala.Tuple2; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import static org.apache.spark.sql.functions.array_contains; + +public class GenerateNativeStoreSparkJob { + + + public static void main(String[] args) { + + final SparkSession spark = SparkSession + .builder() + .appName("GenerateNativeStoreSparkJob") + .master("local[*]") + .getOrCreate(); + + final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext()); + + JavaPairRDD f = sc.sequenceFile("/home/sandro/Downloads/mdstore_oai", IntWritable.class, Text.class); + + String first = f.map(a -> a._2().toString()).first(); + + + final List fields = new ArrayList<>(); + + fields.add(DataTypes.createStructField("id", DataTypes.StringType, false)); + fields.add(DataTypes.createStructField("format", DataTypes.StringType, false)); + fields.add(DataTypes.createStructField("formatName", DataTypes.StringType, true)); + fields.add(DataTypes.createStructField("body", DataTypes.StringType, true)); + + JavaRDD mdRdd = f.map((Function, Row>) item -> RowFactory.create("" + item._1().get(), "xml", null, item._2().toString())); + + final StructType schema = DataTypes.createStructType(fields); + Dataset ds = spark.createDataFrame(mdRdd, schema); + +// ds.write().save("/home/sandro/Downloads/test.parquet"); + + Publication p2 = new Publication(); + p2.setDates(Collections.singletonList("2018-09-09")); + p2.setTitles(Collections.singletonList("Titolo 2")); + p2.setIdentifiers(Collections.singletonList(new PID("pmID", "1234567"))); + + Publication p1 = new Publication(); + p1.setDates(Collections.singletonList("2018-09-09")); + p1.setTitles(Collections.singletonList("Titolo 1")); + p1.setIdentifiers(Collections.singletonList(new PID("doi", "1234567"))); + + + + + Encoder encoder = Encoders.bean(Publication.class); + + Dataset dp = spark.createDataset(Arrays.asList(p1,p2), encoder); + + + long count = dp.where(array_contains(new Column("identifiers.schema"), "doi")).count(); + + System.out.println("count = " + count); + + System.out.println(ds.count()); + + + } +} diff --git a/dhp-wf/dhp-wf-import/pom.xml b/dhp-wf/dhp-wf-import/pom.xml deleted file mode 100644 index 6bf4ba825..000000000 --- a/dhp-wf/dhp-wf-import/pom.xml +++ /dev/null @@ -1,105 +0,0 @@ - - - - eu.dnetlib.dhp - dhp-wf - 1.0.0-SNAPSHOT - - 4.0.0 - - dhp-wf-import - - - - - ${project.groupId} - dhp-common - ${project.version} - - - - ${project.groupId} - dhp-common - ${project.version} - test-jar - test - - - - ${project.groupId} - dhp-schemas - ${project.version} - - - - - org.apache.hadoop - hadoop-mapreduce-client-core - - - org.apache.hadoop - hadoop-common - - - - com.googlecode.json-simple - json-simple - - - commons-cli - commons-cli - - - eu.dnetlib - dnet-objectstore-rmi - - - eu.dnetlib - cnr-rmi-api - - - eu.dnetlib - cnr-resultset-client - - - eu.dnetlib - dnet-openaireplus-mapping-utils - - - - org.springframework - spring-context - - - org.apache.cxf - cxf-rt-frontend-jaxws - - - com.google.code.gson - gson - - - - org.apache.spark - spark-core_2.10 - - - org.apache.spark - spark-sql_2.10 - - - com.databricks - spark-avro_2.10 - - - org.mongodb.spark - mongo-spark-connector_2.10 - - - - - - - diff --git a/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/DataFileRecordReceiver.java b/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/DataFileRecordReceiver.java deleted file mode 100644 index 214d6691d..000000000 --- a/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/DataFileRecordReceiver.java +++ /dev/null @@ -1,29 +0,0 @@ -package eu.dnetlib.dhp.wf.importer; - -import java.io.IOException; - -import org.apache.avro.file.DataFileWriter; - -/** - * {@link DataFileWriter} based record receiver. - * @author mhorst - * - */ -public class DataFileRecordReceiver implements RecordReceiver { - - private final DataFileWriter writer; - - /** - * Default constructor. - * @param writer - */ - public DataFileRecordReceiver(DataFileWriter writer) { - this.writer = writer; - } - - @Override - public void receive(T object) throws IOException { - this.writer.append(object); - } - -} diff --git a/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/DataFileRecordReceiverWithCounter.java b/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/DataFileRecordReceiverWithCounter.java deleted file mode 100644 index 955f18065..000000000 --- a/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/DataFileRecordReceiverWithCounter.java +++ /dev/null @@ -1,50 +0,0 @@ -package eu.dnetlib.dhp.wf.importer; - -import java.io.IOException; - -import org.apache.avro.file.DataFileWriter; - -/** - * {@link DataFileWriter} based record receiver with counter of - * received records. - * - * @author madryk - */ -public class DataFileRecordReceiverWithCounter extends DataFileRecordReceiver { - - private long receivedCount = 0L; - - - //------------------------ CONSTRUCTORS -------------------------- - - /** - * Default constructor - * - * @param writer - writer of the received records - */ - public DataFileRecordReceiverWithCounter(DataFileWriter writer) { - super(writer); - } - - - //------------------------ GETTERS -------------------------- - - /** - * Returns number of received records - */ - public long getReceivedCount() { - return receivedCount; - } - - - //------------------------ LOGIC -------------------------- - - /** - * Receives passed record and increments the counter. - */ - @Override - public void receive(T record) throws IOException { - super.receive(record); - ++receivedCount; - } -} diff --git a/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/ImportWorkflowRuntimeParameters.java b/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/ImportWorkflowRuntimeParameters.java deleted file mode 100644 index 40f673ee0..000000000 --- a/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/ImportWorkflowRuntimeParameters.java +++ /dev/null @@ -1,52 +0,0 @@ -package eu.dnetlib.dhp.wf.importer; - -/** - * Import realated workflow parameters. - * @author mhorst - * - */ -public final class ImportWorkflowRuntimeParameters { - - // parameter names - - public static final String IMPORT_INFERENCE_PROVENANCE_BLACKLIST = "import.inference.provenance.blacklist"; - public static final String IMPORT_SKIP_DELETED_BY_INFERENCE = "import.skip.deleted.by.inference"; - public static final String IMPORT_TRUST_LEVEL_THRESHOLD = "import.trust.level.threshold"; - public static final String IMPORT_APPROVED_DATASOURCES_CSV = "import.approved.datasources.csv"; - public static final String IMPORT_APPROVED_COLUMNFAMILIES_CSV = "import.approved.columnfamilies.csv"; - public static final String IMPORT_MERGE_BODY_WITH_UPDATES = "import.merge.body.with.updates"; - public static final String IMPORT_CONTENT_APPROVED_OBJECSTORES_CSV = "import.content.approved.objectstores.csv"; - public static final String IMPORT_CONTENT_BLACKLISTED_OBJECSTORES_CSV = "import.content.blacklisted.objectstores.csv"; - - public static final String IMPORT_CONTENT_OBJECT_STORE_LOC = "import.content.object.store.location"; - public static final String IMPORT_CONTENT_OBJECT_STORE_IDS_CSV = "import.content.object.store.ids.csv"; - public static final String IMPORT_CONTENT_MAX_FILE_SIZE_MB = "import.content.max.file.size.mb"; - public static final String IMPORT_CONTENT_CONNECTION_TIMEOUT = "import.content.connection.timeout"; - public static final String IMPORT_CONTENT_READ_TIMEOUT = "import.content.read.timeout"; - - public static final String IMPORT_MDSTORE_IDS_CSV = "import.mdstore.ids.csv"; - public static final String IMPORT_MDSTORE_SERVICE_LOCATION = "import.mdstore.service.location"; - public static final String IMPORT_MDSTORE_RECORD_MAXLENGTH = "import.mdstore.record.maxlength"; - - public static final String IMPORT_ISLOOKUP_SERVICE_LOCATION = "import.islookup.service.location"; - public static final String IMPORT_VOCABULARY_CODE = "import.vocabulary.code"; - public static final String IMPORT_VOCABULARY_OUTPUT_FILENAME = "import.vocabulary.output.filename"; - - public static final String IMPORT_RESULT_SET_CLIENT_READ_TIMEOUT = "import.resultset.client.read.timeout"; - public static final String IMPORT_RESULT_SET_CLIENT_CONNECTION_TIMEOUT = "import.resultset.client.connection.timeout"; - public static final String IMPORT_RESULT_SET_PAGESIZE = "import.resultset.pagesize"; - - - public static final String HBASE_ENCODING = "hbase.table.encoding"; - - public static final String IMPORT_FACADE_FACTORY_CLASS = "import.facade.factory.classname"; - - // default values - - public static final String RESULTSET_READ_TIMEOUT_DEFAULT_VALUE = "60000"; - public static final String RESULTSET_CONNECTION_TIMEOUT_DEFAULT_VALUE = "60000"; - public static final String RESULTSET_PAGESIZE_DEFAULT_VALUE = "100"; - - private ImportWorkflowRuntimeParameters() {} - -} diff --git a/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/RecordReceiver.java b/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/RecordReceiver.java deleted file mode 100644 index c0a5e8950..000000000 --- a/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/RecordReceiver.java +++ /dev/null @@ -1,14 +0,0 @@ -package eu.dnetlib.dhp.wf.importer; - -import java.io.IOException; - -/** - * Record receiver interface. - * @author mhorst - * - * @param - */ -public interface RecordReceiver { - - void receive(T object) throws IOException; -} diff --git a/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/AbstractResultSetAwareWebServiceFacade.java b/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/AbstractResultSetAwareWebServiceFacade.java deleted file mode 100644 index 0a3cd6fb4..000000000 --- a/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/AbstractResultSetAwareWebServiceFacade.java +++ /dev/null @@ -1,104 +0,0 @@ -package eu.dnetlib.dhp.wf.importer.facade; - -import java.util.Map; - -import javax.xml.ws.BindingProvider; -import javax.xml.ws.wsaddressing.W3CEndpointReferenceBuilder; - -import org.apache.log4j.Logger; - -import eu.dnetlib.enabling.tools.JaxwsServiceResolverImpl; - -/** - * Abstract class utilized by all WebService facades. - * @author mhorst - * - */ -public abstract class AbstractResultSetAwareWebServiceFacade { - - private final Logger log = Logger.getLogger(this.getClass()); - - /** - * Web service. - */ - private final T service; - - /** - * ResultSet read timeout. - */ - private final long resultSetReadTimeout; - - /** - * ResultSet connection timeout. - */ - private final long resultSetConnectionTimeout; - - /** - * ResultSet page size. - */ - private final int resultSetPageSize; - - - //------------------------ CONSTRUCTORS ------------------- - - /** - * Instantiates underlying service. - * @param clazz webservice class - * @param serviceLocation webservice location - * @param serviceReadTimeout service read timeout - * @param serviceConnectionTimeout service connection timeout - * @param resultSetReadTimeout resultset read timeout - * @param resultSetConnectionTimeout resultset connection timeout - * @param resultSetPageSize resultset page size - */ - protected AbstractResultSetAwareWebServiceFacade(Class clazz, String serviceLocation, - long serviceReadTimeout, long serviceConnectionTimeout, - long resultSetReadTimeout, long resultSetConnectionTimeout, int resultSetPageSize) { - W3CEndpointReferenceBuilder eprBuilder = new W3CEndpointReferenceBuilder(); - eprBuilder.address(serviceLocation); - eprBuilder.build(); - this.service = new JaxwsServiceResolverImpl().getService(clazz, eprBuilder.build()); - if (this.service instanceof BindingProvider) { - log.info(String.format("setting timeouts for %s: read timeout (%s) and connect timeout (%s)", - BindingProvider.class, serviceReadTimeout, serviceConnectionTimeout)); - final Map requestContext = ((BindingProvider) service).getRequestContext(); - - // can't be sure about which will be used. Set them all. - requestContext.put("com.sun.xml.internal.ws.request.timeout", serviceReadTimeout); - requestContext.put("com.sun.xml.internal.ws.connect.timeout", serviceConnectionTimeout); - - requestContext.put("com.sun.xml.ws.request.timeout", serviceReadTimeout); - requestContext.put("com.sun.xml.ws.connect.timeout", serviceConnectionTimeout); - - requestContext.put("javax.xml.ws.client.receiveTimeout", serviceReadTimeout); - requestContext.put("javax.xml.ws.client.connectionTimeout", serviceConnectionTimeout); - } - - this.resultSetReadTimeout = resultSetReadTimeout; - this.resultSetConnectionTimeout = resultSetConnectionTimeout; - this.resultSetPageSize = resultSetPageSize; - } - - - //------------------------ GETTERS ------------------------- - - public T getService() { - return service; - } - - - public long getResultSetReadTimeout() { - return resultSetReadTimeout; - } - - - public long getResultSetConnectionTimeout() { - return resultSetConnectionTimeout; - } - - - public int getResultSetPageSize() { - return resultSetPageSize; - } - -} diff --git a/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/ISLookupFacade.java b/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/ISLookupFacade.java deleted file mode 100644 index c156ae1cc..000000000 --- a/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/ISLookupFacade.java +++ /dev/null @@ -1,17 +0,0 @@ -package eu.dnetlib.dhp.wf.importer.facade; - -/** - * ISLookup service facade. - * - * @author mhorst - * - */ -public interface ISLookupFacade { - - /** - * Provides all profiles matching given query - * @param xPathQuery XPath query - */ - Iterable searchProfile(String xPathQuery) throws ServiceFacadeException; - -} diff --git a/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/MDStoreFacade.java b/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/MDStoreFacade.java deleted file mode 100644 index f50b02b98..000000000 --- a/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/MDStoreFacade.java +++ /dev/null @@ -1,17 +0,0 @@ -package eu.dnetlib.dhp.wf.importer.facade; - -/** - * MDStore service facade. - * - * @author mhorst - * - */ -public interface MDStoreFacade { - - /** - * Delivers all records for given MDStore identifier - * @param mdStoreId MDStore identifier - */ - Iterable deliverMDRecords(String mdStoreId) throws ServiceFacadeException; - -} diff --git a/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/ObjectStoreFacade.java b/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/ObjectStoreFacade.java deleted file mode 100644 index 0e1aa19ef..000000000 --- a/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/ObjectStoreFacade.java +++ /dev/null @@ -1,19 +0,0 @@ -package eu.dnetlib.dhp.wf.importer.facade; - -/** - * ObjectStore service facade. - * - * @author mhorst - * - */ -public interface ObjectStoreFacade { - - /** - * Returns metadata records from given objectstore created in specified time range. - * @param objectStoreId object store identifier - * @param from from time in millis - * @param until until time in millis - */ - Iterable deliverObjects(String objectStoreId, long from, long until) throws ServiceFacadeException; - -} diff --git a/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/ServiceFacadeException.java b/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/ServiceFacadeException.java deleted file mode 100644 index 9776306fa..000000000 --- a/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/ServiceFacadeException.java +++ /dev/null @@ -1,27 +0,0 @@ -package eu.dnetlib.dhp.wf.importer.facade; - -/** - * Service facade generic exception. - * - * @author mhorst - * - */ -public class ServiceFacadeException extends Exception { - - private static final long serialVersionUID = 0L; - - //------------------------ CONSTRUCTORS ------------------- - - public ServiceFacadeException(String message, Throwable cause) { - super(message, cause); - } - - public ServiceFacadeException(String message) { - super(message); - } - - public ServiceFacadeException(Throwable cause) { - super(cause); - } - -} diff --git a/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/ServiceFacadeFactory.java b/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/ServiceFacadeFactory.java deleted file mode 100644 index 94b9307c4..000000000 --- a/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/ServiceFacadeFactory.java +++ /dev/null @@ -1,20 +0,0 @@ -package eu.dnetlib.dhp.wf.importer.facade; - -import java.util.Map; - -/** - * Generic service facade factory. All implementations must be instantiable with no-argument construtor. - * - * @author mhorst - * - */ -public interface ServiceFacadeFactory { - - /** - * Creates service of given type configured with parameters. - * - * @param parameters service configuration - * - */ - T instantiate(Map parameters); -} diff --git a/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/ServiceFacadeUtils.java b/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/ServiceFacadeUtils.java deleted file mode 100644 index 53a76d761..000000000 --- a/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/ServiceFacadeUtils.java +++ /dev/null @@ -1,80 +0,0 @@ -package eu.dnetlib.dhp.wf.importer.facade; - -import static eu.dnetlib.dhp.wf.importer.ImportWorkflowRuntimeParameters.IMPORT_FACADE_FACTORY_CLASS; - -import java.lang.reflect.Constructor; -import java.util.Map; - -import org.apache.commons.lang.StringUtils; -import org.apache.hadoop.conf.Configuration; - -import com.google.common.collect.ImmutableMap; - -import eu.dnetlib.dhp.wf.importer.ImportWorkflowRuntimeParameters; - -/** - * Service facade utility methods. - * @author mhorst - * - */ -public final class ServiceFacadeUtils { - - //------------------------ CONSTRUCTORS ------------------- - - private ServiceFacadeUtils() {} - - //------------------------ LOGIC -------------------------- - - /** - * Instantiates service based on provided parameters. - * - * Service factory class name is mandatory and has to be provided as {@value ImportWorkflowRuntimeParameters#IMPORT_FACADE_FACTORY_CLASS} parameter. - * Other parameters will be used by factory itself. Factory must be instantiable with no-argument construtor. - * - * @param parameters set of parameters required for service instantiation - * - */ - public static T instantiate(Map parameters) throws ServiceFacadeException { - String serviceFactoryClassName = parameters.get(IMPORT_FACADE_FACTORY_CLASS); - if (StringUtils.isBlank(serviceFactoryClassName)) { - throw new ServiceFacadeException("unknown service facade factory, no " + IMPORT_FACADE_FACTORY_CLASS + " parameter provided!"); - } - try { - Class clazz = Class.forName(serviceFactoryClassName); - Constructor constructor = clazz.getConstructor(); - @SuppressWarnings("unchecked") - ServiceFacadeFactory serviceFactory = (ServiceFacadeFactory) constructor.newInstance(); - return serviceFactory.instantiate(parameters); - } catch (Exception e) { - throw new ServiceFacadeException("exception occurred while instantiating service by facade factory: " + IMPORT_FACADE_FACTORY_CLASS, e); - } - - } - - /** - * Instantiates service based on provided configuration. - * - * Service factory class name is mandatory and has to be provided as {@value ImportWorkflowRuntimeParameters#IMPORT_FACADE_FACTORY_CLASS} configuration entry. - * Other parameters will be used by factory itself. Factory must be instantiable with no-argument construtor. - * - * @param config set of configuration entries required for service instantiation - */ - public static T instantiate(Configuration config) throws ServiceFacadeException { - return instantiate(buildParameters(config)); - } - - - // ------------------------ PRIVATE -------------------------- - - /** - * Converts configuration entries into plain map. - */ - private static Map buildParameters(Configuration config) { - ImmutableMap.Builder builder = ImmutableMap.builder(); - for (Map.Entry entry : config) { - builder.put(entry); - } - return builder.build(); - } - -} diff --git a/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/WebServiceISLookupFacade.java b/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/WebServiceISLookupFacade.java deleted file mode 100644 index 7c787f2f8..000000000 --- a/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/WebServiceISLookupFacade.java +++ /dev/null @@ -1,55 +0,0 @@ -package eu.dnetlib.dhp.wf.importer.facade; - -import java.util.Collections; - -import org.apache.log4j.Logger; - -import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpDocumentNotFoundException; -import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; -import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; - -/** - * WebService based database facade. - * - * @author mhorst - * - */ -public class WebServiceISLookupFacade extends AbstractResultSetAwareWebServiceFacade implements ISLookupFacade { - - private static final Logger log = Logger.getLogger(WebServiceISLookupFacade.class); - - - //------------------------ CONSTRUCTORS ------------------- - - /** - * @param serviceLocation database service location - * @param serviceReadTimeout service read timeout - * @param serviceConnectionTimeout service connection timeout - * @param resultSetReadTimeout result set providing database results read timeout - * @param resultSetConnectionTimeout result set connection timeout - * @param resultSetPageSize result set data chunk size - */ - public WebServiceISLookupFacade(String serviceLocation, - long serviceReadTimeout, long serviceConnectionTimeout, - long resultSetReadTimeout, long resultSetConnectionTimeout, int resultSetPageSize) { - super(ISLookUpService.class, serviceLocation, - serviceReadTimeout, serviceConnectionTimeout, - resultSetReadTimeout, resultSetConnectionTimeout, resultSetPageSize); - } - - //------------------------ LOGIC -------------------------- - - @Override - public Iterable searchProfile(String xPathQuery) throws ServiceFacadeException { - try { - return getService().quickSearchProfile(xPathQuery); - } catch (ISLookUpDocumentNotFoundException e) { - log.error("unable to find profile for query: " + xPathQuery, e); - return Collections.emptyList(); - } catch (ISLookUpException e) { - throw new ServiceFacadeException("searching profiles in ISLookup failed with query '" + xPathQuery + "'", e); - } - - } - -} diff --git a/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/WebServiceISLookupFacadeFactory.java b/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/WebServiceISLookupFacadeFactory.java deleted file mode 100644 index 6557ead94..000000000 --- a/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/WebServiceISLookupFacadeFactory.java +++ /dev/null @@ -1,45 +0,0 @@ -package eu.dnetlib.dhp.wf.importer.facade; - -import static eu.dnetlib.dhp.common.WorkflowRuntimeParameters.DNET_SERVICE_CLIENT_CONNECTION_TIMEOUT; -import static eu.dnetlib.dhp.common.WorkflowRuntimeParameters.DNET_SERVICE_CLIENT_READ_TIMEOUT; -import static eu.dnetlib.dhp.common.WorkflowRuntimeParameters.DNET_SERVICE_CONNECTION_TIMEOUT_DEFAULT_VALUE; -import static eu.dnetlib.dhp.common.WorkflowRuntimeParameters.DNET_SERVICE_READ_TIMEOUT_DEFAULT_VALUE; -import static eu.dnetlib.dhp.wf.importer.ImportWorkflowRuntimeParameters.IMPORT_ISLOOKUP_SERVICE_LOCATION; -import static eu.dnetlib.dhp.wf.importer.ImportWorkflowRuntimeParameters.IMPORT_RESULT_SET_CLIENT_CONNECTION_TIMEOUT; -import static eu.dnetlib.dhp.wf.importer.ImportWorkflowRuntimeParameters.IMPORT_RESULT_SET_CLIENT_READ_TIMEOUT; -import static eu.dnetlib.dhp.wf.importer.ImportWorkflowRuntimeParameters.IMPORT_RESULT_SET_PAGESIZE; -import static eu.dnetlib.dhp.wf.importer.ImportWorkflowRuntimeParameters.RESULTSET_CONNECTION_TIMEOUT_DEFAULT_VALUE; -import static eu.dnetlib.dhp.wf.importer.ImportWorkflowRuntimeParameters.RESULTSET_PAGESIZE_DEFAULT_VALUE; -import static eu.dnetlib.dhp.wf.importer.ImportWorkflowRuntimeParameters.RESULTSET_READ_TIMEOUT_DEFAULT_VALUE; - -import java.util.Map; - -import com.google.common.base.Preconditions; - -import eu.dnetlib.dhp.common.WorkflowRuntimeParameters; - -/** - * WebService Database service facade factory. - * - * @author mhorst - * - */ -public class WebServiceISLookupFacadeFactory implements ServiceFacadeFactory { - - - //------------------------ LOGIC -------------------------- - - @Override - public ISLookupFacade instantiate(Map parameters) { - Preconditions.checkArgument(parameters.containsKey(IMPORT_ISLOOKUP_SERVICE_LOCATION), - "unknown ISLookup service location: no parameter provided: '%s'", IMPORT_ISLOOKUP_SERVICE_LOCATION); - - return new WebServiceISLookupFacade(parameters.get(IMPORT_ISLOOKUP_SERVICE_LOCATION), - Long.parseLong(WorkflowRuntimeParameters.getParamValue(DNET_SERVICE_CLIENT_READ_TIMEOUT, DNET_SERVICE_READ_TIMEOUT_DEFAULT_VALUE, parameters)), - Long.parseLong(WorkflowRuntimeParameters.getParamValue(DNET_SERVICE_CLIENT_CONNECTION_TIMEOUT, DNET_SERVICE_CONNECTION_TIMEOUT_DEFAULT_VALUE, parameters)), - Long.parseLong(WorkflowRuntimeParameters.getParamValue(IMPORT_RESULT_SET_CLIENT_READ_TIMEOUT, RESULTSET_READ_TIMEOUT_DEFAULT_VALUE, parameters)), - Long.parseLong(WorkflowRuntimeParameters.getParamValue(IMPORT_RESULT_SET_CLIENT_CONNECTION_TIMEOUT, RESULTSET_CONNECTION_TIMEOUT_DEFAULT_VALUE, parameters)), - Integer.parseInt(WorkflowRuntimeParameters.getParamValue(IMPORT_RESULT_SET_PAGESIZE, RESULTSET_PAGESIZE_DEFAULT_VALUE, parameters))); - } - -} diff --git a/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/WebServiceMDStoreFacade.java b/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/WebServiceMDStoreFacade.java deleted file mode 100644 index d37d020ed..000000000 --- a/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/WebServiceMDStoreFacade.java +++ /dev/null @@ -1,52 +0,0 @@ -package eu.dnetlib.dhp.wf.importer.facade; - -import javax.xml.ws.wsaddressing.W3CEndpointReference; - -import eu.dnetlib.data.mdstore.MDStoreService; -import eu.dnetlib.data.mdstore.MDStoreServiceException; -import eu.dnetlib.enabling.resultset.client.ResultSetClientFactory; -import eu.dnetlib.enabling.tools.JaxwsServiceResolverImpl; - -/** - * WebService based MDStore facade. - * - * @author mhorst - * - */ -public class WebServiceMDStoreFacade extends AbstractResultSetAwareWebServiceFacade implements MDStoreFacade { - - - //------------------------ CONSTRUCTORS ------------------- - - /** - * @param serviceLocation MDStore webservice location - * @param serviceReadTimeout service read timeout - * @param serviceConnectionTimeout service connection timeout - * @param resultSetReadTimeout resultset read timeout - * @param resultSetConnectionTimeout result set connection timeout - * @param resultSetPageSize resultset page size - */ - public WebServiceMDStoreFacade(String serviceLocation, - long serviceReadTimeout, long serviceConnectionTimeout, - long resultSetReadTimeout, long resultSetConnectionTimeout, int resultSetPageSize) { - super(MDStoreService.class, serviceLocation, - serviceReadTimeout, serviceConnectionTimeout, - resultSetReadTimeout, resultSetConnectionTimeout, resultSetPageSize); - } - - //------------------------ LOGIC -------------------------- - - @Override - public Iterable deliverMDRecords(String mdStoreId) throws ServiceFacadeException { - try { - W3CEndpointReference eprResult = getService().deliverMDRecords(mdStoreId, null, null, null); - ResultSetClientFactory rsFactory = new ResultSetClientFactory( - getResultSetPageSize(), getResultSetReadTimeout(), getResultSetConnectionTimeout()); - rsFactory.setServiceResolver(new JaxwsServiceResolverImpl()); - return rsFactory.getClient(eprResult); - } catch (MDStoreServiceException e) { - throw new ServiceFacadeException("delivering records for md store " + mdStoreId + " failed!", e); - } - } - -} diff --git a/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/WebServiceMDStoreFacadeFactory.java b/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/WebServiceMDStoreFacadeFactory.java deleted file mode 100644 index 00bb0c3f7..000000000 --- a/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/WebServiceMDStoreFacadeFactory.java +++ /dev/null @@ -1,45 +0,0 @@ -package eu.dnetlib.dhp.wf.importer.facade; - -import static eu.dnetlib.dhp.common.WorkflowRuntimeParameters.DNET_SERVICE_CLIENT_CONNECTION_TIMEOUT; -import static eu.dnetlib.dhp.common.WorkflowRuntimeParameters.DNET_SERVICE_CLIENT_READ_TIMEOUT; -import static eu.dnetlib.dhp.common.WorkflowRuntimeParameters.DNET_SERVICE_CONNECTION_TIMEOUT_DEFAULT_VALUE; -import static eu.dnetlib.dhp.common.WorkflowRuntimeParameters.DNET_SERVICE_READ_TIMEOUT_DEFAULT_VALUE; -import static eu.dnetlib.dhp.wf.importer.ImportWorkflowRuntimeParameters.IMPORT_MDSTORE_SERVICE_LOCATION; -import static eu.dnetlib.dhp.wf.importer.ImportWorkflowRuntimeParameters.IMPORT_RESULT_SET_CLIENT_CONNECTION_TIMEOUT; -import static eu.dnetlib.dhp.wf.importer.ImportWorkflowRuntimeParameters.IMPORT_RESULT_SET_CLIENT_READ_TIMEOUT; -import static eu.dnetlib.dhp.wf.importer.ImportWorkflowRuntimeParameters.IMPORT_RESULT_SET_PAGESIZE; -import static eu.dnetlib.dhp.wf.importer.ImportWorkflowRuntimeParameters.RESULTSET_CONNECTION_TIMEOUT_DEFAULT_VALUE; -import static eu.dnetlib.dhp.wf.importer.ImportWorkflowRuntimeParameters.RESULTSET_PAGESIZE_DEFAULT_VALUE; -import static eu.dnetlib.dhp.wf.importer.ImportWorkflowRuntimeParameters.RESULTSET_READ_TIMEOUT_DEFAULT_VALUE; - -import java.util.Map; - -import com.google.common.base.Preconditions; - -import eu.dnetlib.dhp.common.WorkflowRuntimeParameters; - -/** - * WebService MDStore service facade factory. - * - * @author mhorst - * - */ -public class WebServiceMDStoreFacadeFactory implements ServiceFacadeFactory { - - - //------------------------ LOGIC -------------------------- - - @Override - public WebServiceMDStoreFacade instantiate(Map parameters) { - Preconditions.checkArgument(parameters.containsKey(IMPORT_MDSTORE_SERVICE_LOCATION), - "unknown MDStore service location: no parameter provided: '%s'", IMPORT_MDSTORE_SERVICE_LOCATION); - - return new WebServiceMDStoreFacade(parameters.get(IMPORT_MDSTORE_SERVICE_LOCATION), - Long.parseLong(WorkflowRuntimeParameters.getParamValue(DNET_SERVICE_CLIENT_READ_TIMEOUT, DNET_SERVICE_READ_TIMEOUT_DEFAULT_VALUE, parameters)), - Long.parseLong(WorkflowRuntimeParameters.getParamValue(DNET_SERVICE_CLIENT_CONNECTION_TIMEOUT, DNET_SERVICE_CONNECTION_TIMEOUT_DEFAULT_VALUE, parameters)), - Long.parseLong(WorkflowRuntimeParameters.getParamValue(IMPORT_RESULT_SET_CLIENT_READ_TIMEOUT, RESULTSET_READ_TIMEOUT_DEFAULT_VALUE, parameters)), - Long.parseLong(WorkflowRuntimeParameters.getParamValue(IMPORT_RESULT_SET_CLIENT_CONNECTION_TIMEOUT, RESULTSET_CONNECTION_TIMEOUT_DEFAULT_VALUE, parameters)), - Integer.parseInt(WorkflowRuntimeParameters.getParamValue(IMPORT_RESULT_SET_PAGESIZE, RESULTSET_PAGESIZE_DEFAULT_VALUE, parameters))); - } - -} diff --git a/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/WebServiceObjectStoreFacade.java b/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/WebServiceObjectStoreFacade.java deleted file mode 100644 index 6e1aee80b..000000000 --- a/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/WebServiceObjectStoreFacade.java +++ /dev/null @@ -1,52 +0,0 @@ -package eu.dnetlib.dhp.wf.importer.facade; - -import javax.xml.ws.wsaddressing.W3CEndpointReference; - -import eu.dnetlib.data.objectstore.rmi.ObjectStoreService; -import eu.dnetlib.data.objectstore.rmi.ObjectStoreServiceException; -import eu.dnetlib.enabling.resultset.client.ResultSetClientFactory; -import eu.dnetlib.enabling.tools.JaxwsServiceResolverImpl; - -/** - * WebService based ObjectStore facade. - * - * @author mhorst - * - */ -public class WebServiceObjectStoreFacade extends AbstractResultSetAwareWebServiceFacade implements ObjectStoreFacade { - - - //------------------------ CONSTRUCTORS ------------------- - - /** - * @param serviceLocation ObjectStore webservice location - * @param serviceReadTimeout service read timeout - * @param serviceConnectionTimeout service connection timeout - * @param resultSetReadTimeout resultset read timeout - * @param resultSetConnectionTimeout result set connection timeout - * @param resultSetPageSize resultset page size - */ - public WebServiceObjectStoreFacade(String serviceLocation, - long serviceReadTimeout, long serviceConnectionTimeout, - long resultSetReadTimeout, long resultSetConnectionTimeout, int resultSetPageSize) { - super(ObjectStoreService.class, serviceLocation, - serviceReadTimeout, serviceConnectionTimeout, - resultSetReadTimeout, resultSetConnectionTimeout, resultSetPageSize); - } - - //------------------------ LOGIC -------------------------- - - @Override - public Iterable deliverObjects(String objectStoreId, long from, long until) throws ServiceFacadeException { - try { - W3CEndpointReference eprResult = getService().deliverObjects(objectStoreId, from, until); - ResultSetClientFactory rsFactory = new ResultSetClientFactory( - getResultSetPageSize(), getResultSetReadTimeout(), getResultSetConnectionTimeout()); - rsFactory.setServiceResolver(new JaxwsServiceResolverImpl()); - return rsFactory.getClient(eprResult); - } catch (ObjectStoreServiceException e) { - throw new ServiceFacadeException("delivering records for object store " + objectStoreId + " failed!", e); - } - } - -} diff --git a/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/WebServiceObjectStoreFacadeFactory.java b/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/WebServiceObjectStoreFacadeFactory.java deleted file mode 100644 index 9c77c4546..000000000 --- a/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/WebServiceObjectStoreFacadeFactory.java +++ /dev/null @@ -1,44 +0,0 @@ -package eu.dnetlib.dhp.wf.importer.facade; - -import static eu.dnetlib.dhp.common.WorkflowRuntimeParameters.DNET_SERVICE_CLIENT_CONNECTION_TIMEOUT; -import static eu.dnetlib.dhp.common.WorkflowRuntimeParameters.DNET_SERVICE_CLIENT_READ_TIMEOUT; -import static eu.dnetlib.dhp.common.WorkflowRuntimeParameters.DNET_SERVICE_CONNECTION_TIMEOUT_DEFAULT_VALUE; -import static eu.dnetlib.dhp.common.WorkflowRuntimeParameters.DNET_SERVICE_READ_TIMEOUT_DEFAULT_VALUE; -import static eu.dnetlib.dhp.wf.importer.ImportWorkflowRuntimeParameters.IMPORT_CONTENT_OBJECT_STORE_LOC; -import static eu.dnetlib.dhp.wf.importer.ImportWorkflowRuntimeParameters.IMPORT_RESULT_SET_CLIENT_CONNECTION_TIMEOUT; -import static eu.dnetlib.dhp.wf.importer.ImportWorkflowRuntimeParameters.IMPORT_RESULT_SET_CLIENT_READ_TIMEOUT; -import static eu.dnetlib.dhp.wf.importer.ImportWorkflowRuntimeParameters.IMPORT_RESULT_SET_PAGESIZE; -import static eu.dnetlib.dhp.wf.importer.ImportWorkflowRuntimeParameters.RESULTSET_CONNECTION_TIMEOUT_DEFAULT_VALUE; -import static eu.dnetlib.dhp.wf.importer.ImportWorkflowRuntimeParameters.RESULTSET_PAGESIZE_DEFAULT_VALUE; -import static eu.dnetlib.dhp.wf.importer.ImportWorkflowRuntimeParameters.RESULTSET_READ_TIMEOUT_DEFAULT_VALUE; - -import java.util.Map; - -import com.google.common.base.Preconditions; - -import eu.dnetlib.dhp.common.WorkflowRuntimeParameters; - -/** - * WebService ObjectStore facade factory. - * - * @author mhorst - * - */ -public class WebServiceObjectStoreFacadeFactory implements ServiceFacadeFactory { - - //------------------------ LOGIC -------------------------- - - @Override - public WebServiceObjectStoreFacade instantiate(Map parameters) { - Preconditions.checkArgument(parameters.containsKey(IMPORT_CONTENT_OBJECT_STORE_LOC), - "unknown object store service location: no parameter provided: '%s'", IMPORT_CONTENT_OBJECT_STORE_LOC); - - return new WebServiceObjectStoreFacade(parameters.get(IMPORT_CONTENT_OBJECT_STORE_LOC), - Long.parseLong(WorkflowRuntimeParameters.getParamValue(DNET_SERVICE_CLIENT_READ_TIMEOUT, DNET_SERVICE_READ_TIMEOUT_DEFAULT_VALUE, parameters)), - Long.parseLong(WorkflowRuntimeParameters.getParamValue(DNET_SERVICE_CLIENT_CONNECTION_TIMEOUT, DNET_SERVICE_CONNECTION_TIMEOUT_DEFAULT_VALUE, parameters)), - Long.parseLong(WorkflowRuntimeParameters.getParamValue(IMPORT_RESULT_SET_CLIENT_READ_TIMEOUT, RESULTSET_READ_TIMEOUT_DEFAULT_VALUE, parameters)), - Long.parseLong(WorkflowRuntimeParameters.getParamValue(IMPORT_RESULT_SET_CLIENT_CONNECTION_TIMEOUT, RESULTSET_CONNECTION_TIMEOUT_DEFAULT_VALUE, parameters)), - Integer.parseInt(WorkflowRuntimeParameters.getParamValue(IMPORT_RESULT_SET_PAGESIZE, RESULTSET_PAGESIZE_DEFAULT_VALUE, parameters))); - } - -} diff --git a/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/mdrecord/MDRecordHandler.java b/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/mdrecord/MDRecordHandler.java deleted file mode 100644 index 5d61f06a5..000000000 --- a/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/mdrecord/MDRecordHandler.java +++ /dev/null @@ -1,94 +0,0 @@ -package eu.dnetlib.dhp.wf.importer.mdrecord; - -import java.util.Stack; - -import org.xml.sax.Attributes; -import org.xml.sax.SAXException; -import org.xml.sax.helpers.DefaultHandler; - -import eu.dnetlib.dhp.common.InfoSpaceConstants; - -/** - * MDRecord handler extracting record identifier. - * - * Notice: writer is not being closed by handler. Created outside, let it be closed outside as well. - * @author mhorst - * - */ -public class MDRecordHandler extends DefaultHandler { - - public static final String ELEM_OBJ_IDENTIFIER = "objIdentifier"; - - private static final String ELEM_HEADER = "header"; - - private Stack parents; - - private StringBuilder currentValue = new StringBuilder(); - - private String recordId; - - - // ------------------------ LOGIC -------------------------- - - @Override - public void startDocument() throws SAXException { - parents = new Stack(); - recordId = null; - } - - @Override - public void startElement(String uri, String localName, String qName, - Attributes attributes) throws SAXException { - if (this.recordId == null) { - if (isWithinElement(localName, ELEM_OBJ_IDENTIFIER, ELEM_HEADER)) { -// identifierType attribute is mandatory - this.currentValue = new StringBuilder(); - } - this.parents.push(localName); - } - } - - @Override - public void endElement(String uri, String localName, String qName) - throws SAXException { - if (this.recordId == null) { - this.parents.pop(); - if (isWithinElement(localName, ELEM_OBJ_IDENTIFIER, ELEM_HEADER)) { - this.recordId = InfoSpaceConstants.ROW_PREFIX_RESULT + this.currentValue.toString().trim(); - } -// resetting current value; - this.currentValue = null; - } - } - - @Override - public void endDocument() throws SAXException { - parents.clear(); - parents = null; - } - - @Override - public void characters(char[] ch, int start, int length) - throws SAXException { - if (this.currentValue!=null) { - this.currentValue.append(ch, start, length); - } - } - - /** - * @return record identifier - */ - public String getRecordId() { - return recordId; - } - - // ------------------------ PRIVATE -------------------------- - - private boolean isWithinElement(String localName, String expectedElement, String expectedParent) { - return localName.equals(expectedElement) && !this.parents.isEmpty() && - expectedParent.equals(this.parents.peek()); - } - - -} - diff --git a/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/mdrecord/MDStoreRecordsImporter.java b/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/mdrecord/MDStoreRecordsImporter.java deleted file mode 100644 index 461093851..000000000 --- a/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/mdrecord/MDStoreRecordsImporter.java +++ /dev/null @@ -1,157 +0,0 @@ -package eu.dnetlib.dhp.wf.importer.mdrecord; - -import java.io.IOException; -import java.io.StringReader; -import java.util.Collections; -import java.util.HashMap; -import java.util.Map; -import javax.xml.parsers.SAXParser; -import javax.xml.parsers.SAXParserFactory; - -import com.google.common.base.Preconditions; -import eu.dnetlib.dhp.common.WorkflowRuntimeParameters; -import eu.dnetlib.dhp.common.counter.NamedCounters; -import eu.dnetlib.dhp.common.counter.NamedCountersFileWriter; -import eu.dnetlib.dhp.common.java.PortBindings; -import eu.dnetlib.dhp.common.java.Process; -import eu.dnetlib.dhp.common.java.io.DataStore; -import eu.dnetlib.dhp.common.java.io.FileSystemPath; -import eu.dnetlib.dhp.common.java.porttype.AvroPortType; -import eu.dnetlib.dhp.common.java.porttype.PortType; -import eu.dnetlib.dhp.importer.schemas.ImportedRecord; -import eu.dnetlib.dhp.importer.schemas.RecordFormat; -import eu.dnetlib.dhp.wf.importer.facade.MDStoreFacade; -import eu.dnetlib.dhp.wf.importer.facade.ServiceFacadeUtils; -import org.apache.avro.file.DataFileWriter; -import org.apache.commons.lang3.StringUtils; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileSystem; -import org.apache.log4j.Logger; -import org.xml.sax.InputSource; - -import static eu.dnetlib.dhp.common.WorkflowRuntimeParameters.OOZIE_ACTION_OUTPUT_FILENAME; -import static eu.dnetlib.dhp.wf.importer.ImportWorkflowRuntimeParameters.IMPORT_MDSTORE_IDS_CSV; -import static eu.dnetlib.dhp.wf.importer.ImportWorkflowRuntimeParameters.IMPORT_MDSTORE_RECORD_MAXLENGTH; - -/** - * {@link MDStoreFacade} based metadata records importer. - * @author mhorst - * - */ -public class MDStoreRecordsImporter implements Process { - - protected static final String COUNTER_NAME_TOTAL = "TOTAL"; - - protected static final String COUNTER_NAME_SIZE_EXCEEDED = "SIZE_EXCEEDED"; - - protected static final String PORT_OUT_MDRECORDS = "mdrecords"; - - private static final Logger log = Logger.getLogger(MDStoreRecordsImporter.class); - - private final static int progressLogInterval = 100000; - - private final NamedCountersFileWriter countersWriter = new NamedCountersFileWriter(); - - private final Map outputPorts = new HashMap(); - - - //------------------------ CONSTRUCTORS ------------------- - - public MDStoreRecordsImporter() { - outputPorts.put(PORT_OUT_MDRECORDS, new AvroPortType(ImportedRecord.SCHEMA$)); - } - - //------------------------ LOGIC -------------------------- - - @Override - public Map getInputPorts() { - return Collections.emptyMap(); - } - - @Override - public Map getOutputPorts() { - return outputPorts; - } - - @Override - public void run(PortBindings portBindings, Configuration conf, - Map parameters) throws Exception { - - Preconditions.checkArgument(parameters.containsKey(IMPORT_MDSTORE_IDS_CSV), - "unknown mdstore identifier, required parameter '%s' is missing!", IMPORT_MDSTORE_IDS_CSV); - String mdStoreIdsCSV = parameters.get(IMPORT_MDSTORE_IDS_CSV); - int recordMaxLength = parameters.containsKey(IMPORT_MDSTORE_RECORD_MAXLENGTH)? - Integer.parseInt(parameters.get(IMPORT_MDSTORE_RECORD_MAXLENGTH)):Integer.MAX_VALUE; - - NamedCounters counters = new NamedCounters(new String[] { COUNTER_NAME_TOTAL, COUNTER_NAME_SIZE_EXCEEDED }); - - if (StringUtils.isNotBlank(mdStoreIdsCSV) && !WorkflowRuntimeParameters.UNDEFINED_NONEMPTY_VALUE.equals(mdStoreIdsCSV)) { - - String[] mdStoreIds = StringUtils.split(mdStoreIdsCSV, WorkflowRuntimeParameters.DEFAULT_CSV_DELIMITER); - - try (DataFileWriter recordWriter = getWriter(FileSystem.get(conf), portBindings)) { - - MDStoreFacade mdStoreFacade = ServiceFacadeUtils.instantiate(parameters); - - SAXParserFactory parserFactory = SAXParserFactory.newInstance(); - parserFactory.setNamespaceAware(true); - SAXParser saxParser = parserFactory.newSAXParser(); - MDRecordHandler mdRecordHandler = new MDRecordHandler(); - - long startTime = System.currentTimeMillis(); - int currentCount = 0; - - for (String mdStoreId : mdStoreIds) { - for (String mdRecord : mdStoreFacade.deliverMDRecords(mdStoreId)) { - if (!StringUtils.isEmpty(mdRecord)) { - if (mdRecord.length() <= recordMaxLength) { - saxParser.parse(new InputSource(new StringReader(mdRecord)), mdRecordHandler); - String recordId = mdRecordHandler.getRecordId(); - if (StringUtils.isNotBlank(recordId)) { - recordWriter.append( - ImportedRecord.newBuilder() - .setId(recordId) - .setBody(mdRecord) - .setFormat(RecordFormat.XML) - .build()); - counters.increment(COUNTER_NAME_TOTAL); - } else { - log.error("skipping, unable to extract identifier from record: " + mdRecord); - } - } else { - counters.increment(COUNTER_NAME_SIZE_EXCEEDED); - log.error("mdstore record maximum length (" + recordMaxLength + "): was exceeded: " - + mdRecord.length() + ", record content:\n" + mdRecord); - } - - } else { - log.error("got empty metadata record from mdstore: " + mdStoreId); - } - currentCount++; - if (currentCount % progressLogInterval == 0) { - log.info("current progress: " + currentCount + ", last package of " + progressLogInterval - + " processed in " + ((System.currentTimeMillis() - startTime) / 1000) + " secs"); - startTime = System.currentTimeMillis(); - } - } - } - log.info("total number of processed records: " + currentCount); - } - } - - if (counters.currentValue(COUNTER_NAME_TOTAL)==0) { - log.warn("parsed 0 metadata records from mdstores: " + mdStoreIdsCSV); - } - countersWriter.writeCounters(counters, System.getProperty(OOZIE_ACTION_OUTPUT_FILENAME)); - - } - - /** - * Provides {@link ImportedRecord} writer consuming records. - */ - protected DataFileWriter getWriter(FileSystem fs, PortBindings portBindings) throws IOException { - return DataStore.create( - new FileSystemPath(fs, portBindings.getOutput().get(PORT_OUT_MDRECORDS)), ImportedRecord.SCHEMA$); - } - -} diff --git a/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/mdrecord/MongoRecordImporter.java b/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/mdrecord/MongoRecordImporter.java deleted file mode 100644 index 4776af22b..000000000 --- a/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/mdrecord/MongoRecordImporter.java +++ /dev/null @@ -1,48 +0,0 @@ -package eu.dnetlib.dhp.wf.importer.mdrecord; - -import java.util.Collections; -import java.util.HashMap; -import java.util.Map; - -import eu.dnetlib.dhp.common.java.PortBindings; -import eu.dnetlib.dhp.common.java.Process; -import eu.dnetlib.dhp.common.java.porttype.PortType; -import org.apache.hadoop.conf.Configuration; - -public class MongoRecordImporter implements Process { - - private final Map outputPorts = new HashMap(); - - @Override - public Map getInputPorts() { - return Collections.emptyMap(); - } - - @Override - public Map getOutputPorts() { - return outputPorts; - } - - @Override - public void run(final PortBindings portBindings, final Configuration conf, final Map parameters) throws Exception { - - /* - SparkSession spark = SparkSession.builder() - .master("local") - .appName("MongoSparkConnectorIntro") - .config("spark.mongodb.input.uri", "mongodb://127.0.0.1/test.myCollection") - .config("spark.mongodb.output.uri", "mongodb://127.0.0.1/test.myCollection") - .getOrCreate(); - - // Create a JavaSparkContext using the SparkSession's SparkContext object - JavaSparkContext jsc = new JavaSparkContext(spark.sparkContext()); - - // More application logic would go here... - - jsc.close(); - */ - - } - - -} diff --git a/dhp-wf/dhp-wf-import/src/main/resources/eu/dnetlib/dhp/wf/importer/mdrecord/oozie_app/workflow.xml b/dhp-wf/dhp-wf-import/src/main/resources/eu/dnetlib/dhp/wf/importer/mdrecord/oozie_app/workflow.xml deleted file mode 100644 index dee61af91..000000000 --- a/dhp-wf/dhp-wf-import/src/main/resources/eu/dnetlib/dhp/wf/importer/mdrecord/oozie_app/workflow.xml +++ /dev/null @@ -1,124 +0,0 @@ - - - - - mdstore_facade_factory_classname - eu.dnetlib.dhp.wf.importer.facade.WebServiceMDStoreFacadeFactory - ServiceFacadeFactory implementation class name producing eu.dnetlib.dhp.wf.importer.facade.WebServiceMDStoreFacade - - - mdstore_service_location - $UNDEFINED$ - mdstore service location - - - mdstore_ids_csv - $UNDEFINED$ - comma separated mdstore identifiers - - - mdstore_record_maxlength - 500000 - maximum allowed length of mdstore record - - - output - ImportedRecord avro datastore output holding mdrecords - - - output_report_root_path - base directory for storing reports - - - output_report_relative_path - import_mdrecord - directory for storing report (relative to output_report_root_path) - - - dnet_service_client_read_timeout - 60000 - DNet service client reading timeout (expressed in milliseconds) - - - dnet_service_client_connection_timeout - 60000 - DNet service client connection timeout (expressed in milliseconds) - - - resultset_client_read_timeout - 60000 - result set client reading timeout (expressed in milliseconds) - - - resultset_client_connection_timeout - 60000 - result set client connection timeout (expressed in milliseconds) - - - report_properties_prefix - import.mdrecord - report entry related to total number of imported records - - - - - ${jobTracker} - ${nameNode} - - - mapreduce.job.queuename - ${queueName} - - - oozie.launcher.mapred.job.queue.name - ${oozieLauncherQueueName} - - - - - - - - - - - - - eu.dnetlib.dhp.common.java.ProcessWrapper - eu.dnetlib.dhp.wf.importer.mdrecord.MDStoreRecordsImporter - - -Pimport.mdstore.service.location=${mdstore_service_location} - -Pimport.mdstore.ids.csv=${mdstore_ids_csv} - -Pimport.mdstore.record.maxlength=${mdstore_record_maxlength} - - -Pimport.resultset.client.read.timeout=${resultset_client_read_timeout} - -Pimport.resultset.client.connection.timeout=${resultset_client_connection_timeout} - -Pdnet.service.client.read.timeout=${dnet_service_client_read_timeout} - -Pdnet.service.client.connection.timeout=${dnet_service_client_connection_timeout} - - -Pimport.facade.factory.classname=${mdstore_facade_factory_classname} - -Omdrecords=${output} - - - - - - - - - eu.dnetlib.dhp.common.java.ProcessWrapper - eu.dnetlib.dhp.common.report.ReportGenerator - -Preport.${report_properties_prefix}.total=${wf:actionData('mdrecord-importer')['TOTAL']} - -Preport.${report_properties_prefix}.invalid.sizeExceeded=${wf:actionData('mdrecord-importer')['SIZE_EXCEEDED']} - -Oreport=${output_report_root_path}/${output_report_relative_path} - - - - - - - Unfortunately, the process failed -- error message: - [${wf:errorMessage(wf:lastErrorNode())}] - - - diff --git a/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/DataFileRecordReceiver.class b/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/DataFileRecordReceiver.class deleted file mode 100644 index 5bd02d360..000000000 Binary files a/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/DataFileRecordReceiver.class and /dev/null differ diff --git a/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/DataFileRecordReceiverWithCounter.class b/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/DataFileRecordReceiverWithCounter.class deleted file mode 100644 index 46f329580..000000000 Binary files a/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/DataFileRecordReceiverWithCounter.class and /dev/null differ diff --git a/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/ImportWorkflowRuntimeParameters.class b/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/ImportWorkflowRuntimeParameters.class deleted file mode 100644 index 13a46a67a..000000000 Binary files a/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/ImportWorkflowRuntimeParameters.class and /dev/null differ diff --git a/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/RecordReceiver.class b/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/RecordReceiver.class deleted file mode 100644 index fb8394cb5..000000000 Binary files a/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/RecordReceiver.class and /dev/null differ diff --git a/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/facade/AbstractResultSetAwareWebServiceFacade.class b/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/facade/AbstractResultSetAwareWebServiceFacade.class deleted file mode 100644 index abb39ad86..000000000 Binary files a/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/facade/AbstractResultSetAwareWebServiceFacade.class and /dev/null differ diff --git a/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/facade/ISLookupFacade.class b/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/facade/ISLookupFacade.class deleted file mode 100644 index 7689db9d6..000000000 Binary files a/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/facade/ISLookupFacade.class and /dev/null differ diff --git a/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/facade/MDStoreFacade.class b/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/facade/MDStoreFacade.class deleted file mode 100644 index ec0403005..000000000 Binary files a/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/facade/MDStoreFacade.class and /dev/null differ diff --git a/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/facade/ObjectStoreFacade.class b/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/facade/ObjectStoreFacade.class deleted file mode 100644 index e9e7a2cbd..000000000 Binary files a/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/facade/ObjectStoreFacade.class and /dev/null differ diff --git a/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/facade/ServiceFacadeException.class b/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/facade/ServiceFacadeException.class deleted file mode 100644 index 03ce390c2..000000000 Binary files a/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/facade/ServiceFacadeException.class and /dev/null differ diff --git a/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/facade/ServiceFacadeFactory.class b/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/facade/ServiceFacadeFactory.class deleted file mode 100644 index 18e211bf9..000000000 Binary files a/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/facade/ServiceFacadeFactory.class and /dev/null differ diff --git a/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/facade/ServiceFacadeUtils.class b/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/facade/ServiceFacadeUtils.class deleted file mode 100644 index d7663b0dd..000000000 Binary files a/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/facade/ServiceFacadeUtils.class and /dev/null differ diff --git a/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/facade/WebServiceISLookupFacade.class b/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/facade/WebServiceISLookupFacade.class deleted file mode 100644 index aff40623b..000000000 Binary files a/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/facade/WebServiceISLookupFacade.class and /dev/null differ diff --git a/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/facade/WebServiceISLookupFacadeFactory.class b/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/facade/WebServiceISLookupFacadeFactory.class deleted file mode 100644 index c9ac13238..000000000 Binary files a/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/facade/WebServiceISLookupFacadeFactory.class and /dev/null differ diff --git a/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/facade/WebServiceMDStoreFacade.class b/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/facade/WebServiceMDStoreFacade.class deleted file mode 100644 index cbd0403cb..000000000 Binary files a/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/facade/WebServiceMDStoreFacade.class and /dev/null differ diff --git a/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/facade/WebServiceMDStoreFacadeFactory.class b/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/facade/WebServiceMDStoreFacadeFactory.class deleted file mode 100644 index b7c9aaee6..000000000 Binary files a/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/facade/WebServiceMDStoreFacadeFactory.class and /dev/null differ diff --git a/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/facade/WebServiceObjectStoreFacade.class b/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/facade/WebServiceObjectStoreFacade.class deleted file mode 100644 index 8ffb1d81b..000000000 Binary files a/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/facade/WebServiceObjectStoreFacade.class and /dev/null differ diff --git a/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/facade/WebServiceObjectStoreFacadeFactory.class b/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/facade/WebServiceObjectStoreFacadeFactory.class deleted file mode 100644 index b6a1ceca9..000000000 Binary files a/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/facade/WebServiceObjectStoreFacadeFactory.class and /dev/null differ diff --git a/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/mdrecord/MDRecordHandler.class b/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/mdrecord/MDRecordHandler.class deleted file mode 100644 index 092c87c70..000000000 Binary files a/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/mdrecord/MDRecordHandler.class and /dev/null differ diff --git a/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/mdrecord/MDStoreRecordsImporter.class b/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/mdrecord/MDStoreRecordsImporter.class deleted file mode 100644 index 254a7f647..000000000 Binary files a/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/mdrecord/MDStoreRecordsImporter.class and /dev/null differ diff --git a/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/mdrecord/MongoRecordImporter.class b/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/mdrecord/MongoRecordImporter.class deleted file mode 100644 index 5079c0385..000000000 Binary files a/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/mdrecord/MongoRecordImporter.class and /dev/null differ diff --git a/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/mdrecord/oozie_app/workflow.xml b/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/mdrecord/oozie_app/workflow.xml deleted file mode 100644 index dee61af91..000000000 --- a/dhp-wf/dhp-wf-import/target/classes/eu/dnetlib/dhp/wf/importer/mdrecord/oozie_app/workflow.xml +++ /dev/null @@ -1,124 +0,0 @@ - - - - - mdstore_facade_factory_classname - eu.dnetlib.dhp.wf.importer.facade.WebServiceMDStoreFacadeFactory - ServiceFacadeFactory implementation class name producing eu.dnetlib.dhp.wf.importer.facade.WebServiceMDStoreFacade - - - mdstore_service_location - $UNDEFINED$ - mdstore service location - - - mdstore_ids_csv - $UNDEFINED$ - comma separated mdstore identifiers - - - mdstore_record_maxlength - 500000 - maximum allowed length of mdstore record - - - output - ImportedRecord avro datastore output holding mdrecords - - - output_report_root_path - base directory for storing reports - - - output_report_relative_path - import_mdrecord - directory for storing report (relative to output_report_root_path) - - - dnet_service_client_read_timeout - 60000 - DNet service client reading timeout (expressed in milliseconds) - - - dnet_service_client_connection_timeout - 60000 - DNet service client connection timeout (expressed in milliseconds) - - - resultset_client_read_timeout - 60000 - result set client reading timeout (expressed in milliseconds) - - - resultset_client_connection_timeout - 60000 - result set client connection timeout (expressed in milliseconds) - - - report_properties_prefix - import.mdrecord - report entry related to total number of imported records - - - - - ${jobTracker} - ${nameNode} - - - mapreduce.job.queuename - ${queueName} - - - oozie.launcher.mapred.job.queue.name - ${oozieLauncherQueueName} - - - - - - - - - - - - - eu.dnetlib.dhp.common.java.ProcessWrapper - eu.dnetlib.dhp.wf.importer.mdrecord.MDStoreRecordsImporter - - -Pimport.mdstore.service.location=${mdstore_service_location} - -Pimport.mdstore.ids.csv=${mdstore_ids_csv} - -Pimport.mdstore.record.maxlength=${mdstore_record_maxlength} - - -Pimport.resultset.client.read.timeout=${resultset_client_read_timeout} - -Pimport.resultset.client.connection.timeout=${resultset_client_connection_timeout} - -Pdnet.service.client.read.timeout=${dnet_service_client_read_timeout} - -Pdnet.service.client.connection.timeout=${dnet_service_client_connection_timeout} - - -Pimport.facade.factory.classname=${mdstore_facade_factory_classname} - -Omdrecords=${output} - - - - - - - - - eu.dnetlib.dhp.common.java.ProcessWrapper - eu.dnetlib.dhp.common.report.ReportGenerator - -Preport.${report_properties_prefix}.total=${wf:actionData('mdrecord-importer')['TOTAL']} - -Preport.${report_properties_prefix}.invalid.sizeExceeded=${wf:actionData('mdrecord-importer')['SIZE_EXCEEDED']} - -Oreport=${output_report_root_path}/${output_report_relative_path} - - - - - - - Unfortunately, the process failed -- error message: - [${wf:errorMessage(wf:lastErrorNode())}] - - - diff --git a/dhp-wf/dhp-wf-import/target/maven-status/maven-compiler-plugin/compile/default-compile/createdFiles.lst b/dhp-wf/dhp-wf-import/target/maven-status/maven-compiler-plugin/compile/default-compile/createdFiles.lst deleted file mode 100644 index 672248f22..000000000 --- a/dhp-wf/dhp-wf-import/target/maven-status/maven-compiler-plugin/compile/default-compile/createdFiles.lst +++ /dev/null @@ -1,20 +0,0 @@ -eu/dnetlib/dhp/wf/importer/RecordReceiver.class -eu/dnetlib/dhp/wf/importer/facade/ObjectStoreFacade.class -eu/dnetlib/dhp/wf/importer/facade/WebServiceISLookupFacade.class -eu/dnetlib/dhp/wf/importer/facade/WebServiceMDStoreFacade.class -eu/dnetlib/dhp/wf/importer/facade/ServiceFacadeFactory.class -eu/dnetlib/dhp/wf/importer/facade/MDStoreFacade.class -eu/dnetlib/dhp/wf/importer/facade/AbstractResultSetAwareWebServiceFacade.class -eu/dnetlib/dhp/wf/importer/mdrecord/MDStoreRecordsImporter.class -eu/dnetlib/dhp/wf/importer/DataFileRecordReceiver.class -eu/dnetlib/dhp/wf/importer/mdrecord/MongoRecordImporter.class -eu/dnetlib/dhp/wf/importer/facade/ServiceFacadeUtils.class -eu/dnetlib/dhp/wf/importer/facade/ServiceFacadeException.class -eu/dnetlib/dhp/wf/importer/DataFileRecordReceiverWithCounter.class -eu/dnetlib/dhp/wf/importer/facade/ISLookupFacade.class -eu/dnetlib/dhp/wf/importer/facade/WebServiceObjectStoreFacadeFactory.class -eu/dnetlib/dhp/wf/importer/mdrecord/MDRecordHandler.class -eu/dnetlib/dhp/wf/importer/facade/WebServiceObjectStoreFacade.class -eu/dnetlib/dhp/wf/importer/facade/WebServiceISLookupFacadeFactory.class -eu/dnetlib/dhp/wf/importer/ImportWorkflowRuntimeParameters.class -eu/dnetlib/dhp/wf/importer/facade/WebServiceMDStoreFacadeFactory.class diff --git a/dhp-wf/dhp-wf-import/target/maven-status/maven-compiler-plugin/compile/default-compile/inputFiles.lst b/dhp-wf/dhp-wf-import/target/maven-status/maven-compiler-plugin/compile/default-compile/inputFiles.lst deleted file mode 100644 index e9820d1d9..000000000 --- a/dhp-wf/dhp-wf-import/target/maven-status/maven-compiler-plugin/compile/default-compile/inputFiles.lst +++ /dev/null @@ -1,20 +0,0 @@ -/Users/claudio/workspace/dnet-hadoop/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/ISLookupFacade.java -/Users/claudio/workspace/dnet-hadoop/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/RecordReceiver.java -/Users/claudio/workspace/dnet-hadoop/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/mdrecord/MDRecordHandler.java -/Users/claudio/workspace/dnet-hadoop/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/mdrecord/MDStoreRecordsImporter.java -/Users/claudio/workspace/dnet-hadoop/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/WebServiceISLookupFacade.java -/Users/claudio/workspace/dnet-hadoop/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/WebServiceMDStoreFacade.java -/Users/claudio/workspace/dnet-hadoop/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/MDStoreFacade.java -/Users/claudio/workspace/dnet-hadoop/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/DataFileRecordReceiver.java -/Users/claudio/workspace/dnet-hadoop/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/ServiceFacadeException.java -/Users/claudio/workspace/dnet-hadoop/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/AbstractResultSetAwareWebServiceFacade.java -/Users/claudio/workspace/dnet-hadoop/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/DataFileRecordReceiverWithCounter.java -/Users/claudio/workspace/dnet-hadoop/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/WebServiceMDStoreFacadeFactory.java -/Users/claudio/workspace/dnet-hadoop/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/ObjectStoreFacade.java -/Users/claudio/workspace/dnet-hadoop/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/WebServiceObjectStoreFacadeFactory.java -/Users/claudio/workspace/dnet-hadoop/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/WebServiceObjectStoreFacade.java -/Users/claudio/workspace/dnet-hadoop/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/ImportWorkflowRuntimeParameters.java -/Users/claudio/workspace/dnet-hadoop/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/ServiceFacadeUtils.java -/Users/claudio/workspace/dnet-hadoop/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/mdrecord/MongoRecordImporter.java -/Users/claudio/workspace/dnet-hadoop/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/WebServiceISLookupFacadeFactory.java -/Users/claudio/workspace/dnet-hadoop/dhp-wf/dhp-wf-import/src/main/java/eu/dnetlib/dhp/wf/importer/facade/ServiceFacadeFactory.java diff --git a/dhp-wf/pom.xml b/dhp-wf/pom.xml index 4c0aa666a..9444f0d4a 100644 --- a/dhp-wf/pom.xml +++ b/dhp-wf/pom.xml @@ -14,236 +14,9 @@ pom - dhp-wf-import + - - yyyy-MM-dd_HH_mm - - - oozie-package - - src/test/resources/define/path/pointing/to/directory/holding/oozie_app - oozie_app - default - default - default - primed - runtime - - true - - ${user.home}/.dhp/application.properties - - ${maven.build.timestamp} - - ${project.version} - true - - - - - - - org.apache.oozie - oozie-client - - - net.schmizz - sshj - test - - - - - - oozie-package - - - - org.apache.maven.plugins - maven-enforcer-plugin - 1.4.1 - - - enforce-connection-properties-file-existence - initialize - - enforce - - - - - - ${dhpConnectionProperties} - - - The file with connection properties could not be found. Please, create the ${dhpConnectionProperties} file or set the location to another already created file by using - -DdhpConnectionProperties property. - - - - true - - - - - - org.apache.maven.plugins - maven-dependency-plugin - - - copy dependencies - prepare-package - - copy-dependencies - - - ${oozie.package.dependencies.include.scope} - ${oozie.package.dependencies.exclude.scope} - true - - - - - - - org.apache.maven.plugins - maven-jar-plugin - - - attach-test-resources-package - prepare-package - - test-jar - - - ${oozie.package.skip.test.jar} - - - - - - - - pl.project13.maven - git-commit-id-plugin - 2.1.11 - - - - revision - - - - - true - yyyy-MM-dd'T'HH:mm:ssZ - true - target/${oozie.package.file.name}/${oozieAppDir}/version.properties - - - - org.apache.maven.plugins - maven-assembly-plugin - 3.0.0 - - - assembly-oozie-installer - package - - single - - - false - ${oozie.package.file.name}_shell_scripts - - oozie-installer - - - - - - - - - - maven-antrun-plugin - - - - installer-copy-custom - process-resources - - run - - - - - - - - - - - package - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - run - - - - - - - - - diff --git a/pom.xml b/pom.xml index 8861cbe25..731bfe984 100644 --- a/pom.xml +++ b/pom.xml @@ -1,953 +1,245 @@ - - 4.0.0 - - eu.dnetlib.dhp - dhp - 1.0.0-SNAPSHOT - pom - - http://www.d-net.research-infrastructures.eu - - - - The Apache Software License, Version 2.0 - http://www.apache.org/licenses/LICENSE-2.0.txt - repo - A business-friendly OSS license - - - - - dhp-build - dhp-common - dhp-schemas - dhp-wf - - - - Redmine - https://issue.openaire.research-infrastructures.eu/projects/openaire - - - - jenkins - https://jenkins-dnet.d4science.org/ - - - - scm:git:ssh://git@github.com/??? - scm:git:ssh://git@github.com/???.git - https://github.com/??? - HEAD - - - - - - - - - dnet45-releases - D-Net 45 releases - http://maven.research-infrastructures.eu/nexus/content/repositories/dnet45-releases - default - - false - - - true - - - - dnet45-bootstrap-release - dnet45 bootstrap release - http://maven.research-infrastructures.eu/nexus/content/repositories/dnet45-bootstrap-release - default - - false - - - true - - - - - cloudera - Cloudera Repository - https://repository.cloudera.com/artifactory/cloudera-repos - - true - - - false - - - - - - - - org.slf4j - slf4j-api - 1.7.22 - - - org.slf4j - slf4j-log4j12 - 1.7.22 - - - log4j - log4j - 1.2.17 - - - javax.servlet - javax.servlet-api - 3.1.0 - runtime - - - junit - junit - 4.12 - test - - - org.hamcrest - hamcrest-all - 1.3 - test - - - org.mockito - mockito-all - 1.10.19 - test - - - org.powermock - powermock-core - 1.6.6 - test - - - com.google.code.findbugs - annotations - 3.0.1 - provided - - - com.google.code.findbugs - jsr305 - 3.0.1 - provided - - - - - - - - org.apache.hadoop - hadoop-common - ${dhp.hadoop.version} - provided - - - servlet-api - javax.servlet - - - - - - org.apache.hadoop - hadoop-yarn-common - ${dhp.hadoop.version} - provided - - - org.apache.hadoop - hadoop-mapreduce-client-common - ${dhp.hadoop.version} - provided - - - - org.apache.hadoop - hadoop-yarn-common - ${dhp.hadoop.version} - test - test-jar - - - - org.apache.hadoop - hadoop-mapreduce-client-common - ${dhp.hadoop.version} - test - test-jar - - - - org.apache.hadoop - hadoop-mapreduce-client-app - ${dhp.hadoop.version} - provided - - - servlet-api - javax.servlet - - - - - - org.apache.hadoop - hadoop-mapreduce-client-core - ${dhp.hadoop.version} - provided - - - servlet-api - javax.servlet - - - - org.apache.calcite - calcite-core - - - org.apache.calcite - calcite-avatica - - - - - - org.apache.hadoop - hadoop-hdfs - ${dhp.hadoop.version} - - - - org.apache.hadoop - hadoop-hdfs - ${dhp.hadoop.version} - test-jar - test - - - - - - org.apache.oozie - oozie-core - ${dhp.oozie.version} - - - - servlet-api - javax.servlet - - - - org.apache.calcite - calcite-core - - - org.apache.calcite - calcite-avatica - - - - - - org.apache.oozie - oozie-client - ${dhp.oozie.version} - - - - slf4j-simple - org.slf4j - - - - - - org.apache.spark - spark-core_2.10 - ${dhp.spark.version} - provided - - - - org.apache.spark - spark-sql_2.10 - ${dhp.spark.version} - provided - - - - com.databricks - spark-avro_2.10 - 1.1.0-${dhp.cdh.version} - - - - com.databricks - spark-csv_2.10 - 1.5.0 - - - - pl.edu.icm.spark-utils - spark-utils - 1.0.0 - - - - org.mongodb.spark - mongo-spark-connector_2.10 - 2.2.1 - provided - - - - - - org.apache.avro - avro - ${dhp.avro.version} - - - - org.apache.avro - avro-mapred - ${dhp.avro.version} - hadoop2 - - - - servlet-api - org.mortbay.jetty - - - netty - io.netty - - - - - - - - org.apache.pig - pig - ${dhp.pig.version} - provided - - - org.mortbay.jetty - servlet-api-2.5 - - - servlet-api - javax.servlet - - - - - - org.apache.pig - piggybank - ${dhp.pig.version} - provided - - - - org.apache.pig - pigunit - ${dhp.pig.version} - - - - - pl.edu.icm.cermine - cermine-impl - 1.13 - - - - - pl.edu.icm.coansys - models - ${dhp.coansys.version} - - - - pl.edu.icm.coansys - citation-matching-core-code - ${dhp.coansys.version} - - - - org.apache.avro - avro-mapred - - - org.apache.hadoop - hadoop-client - - - org.apache.hadoop - hadoop-yarn-api - - - org.apache.hadoop - hadoop-yarn-client - - - org.apache.hadoop - hadoop-mapreduce-client-common - - - org.apache.hadoop - hadoop-mapreduce-client-app - - - org.apache.hadoop - hadoop-mapreduce-client-core - - - - org.slf4j - slf4j-simple - - - - - - - - eu.dnetlib - dnet-openaireplus-mapping-utils - [6.0.0, 7.0.0) - - - - org.apache.hadoop - hadoop-core - - - org.apache.hadoop - hadoop-hdfs - - - org.apache.zookeeper - zookeeper - - - jgrapht - jgrapht - - - - - - eu.dnetlib - dnet-objectstore-rmi - [2.0.0, 3.0.0) - - - - eu.dnetlib - cnr-rmi-api - [2.0.0, 3.0.0) - - - - eu.dnetlib - cnr-resultset-client - [2.0.0, 3.0.0) - - - - org.springframework - spring-web - - - org.springframework - spring-webmvc - - - - - - eu.dnetlib - dnet-actionmanager-common - [6.0.0, 7.0.0) - - - - apache - commons-logging - - - - - - eu.dnetlib - cnr-service-utils - [1.0.0, 2.0.0) - - - - - com.beust - jcommander - 1.60 - - - - com.google.code.gson - gson - 2.8.0 - - - - com.googlecode.json-simple - json-simple - 1.1.1 - - - - org.apache.commons - commons-lang3 - 3.5 - - - - org.apache.commons - commons-collections4 - 4.1 - - - - com.thoughtworks.xstream - xstream - 1.4.9 - - - - xalan - xalan - 2.7.2 - - - - xml-apis - xml-apis - 1.4.01 - - - - org.jdom - jdom - 1.1.3 - - - - org.jsoup - jsoup - 1.10.2 - - - - net.sf.opencsv - opencsv - 2.3 - - - - com.googlecode.protobuf-java-format - protobuf-java-format - 1.2 - - - - com.google.protobuf - protobuf-java - 2.5.0 - - - - - com.google.guava - guava - 12.0 - - - - commons-cli - commons-cli - 1.3.1 - - - - commons-io - commons-io - 2.5 - - - - de.sven-jacobs - loremipsum - 1.0 - - - - net.schmizz - sshj - 0.10.0 - - - - - org.bouncycastle - bcprov-jdk15on - 1.50 - - - - - com.thoughtworks.paranamer - paranamer - 2.8 - - - - - com.linkedin.datafu - datafu - 1.2.0 - - - - org.apache.cxf - cxf-rt-frontend-jaxws - ${cxf.version} - - - - - com.sun.xml.bind - jaxb-impl - 2.2.7 - runtime - - - - org.springframework - spring-beans - ${spring.version} - - - - org.springframework - spring-context - ${spring.version} - - - - org.scala-lang - scala-library - ${scala.version} - - - - commons-beanutils - commons-beanutils - 1.9.3 - - - - org.apache.curator - curator-test - 3.3.0 - test - - - - - - - - target - target/classes - ${project.artifactId}-${project.version} - target/test-classes - - - - - - org.apache.maven.plugins - maven-compiler-plugin - 3.6.0 - - 1.8 - 1.8 - ${project.build.sourceEncoding} - - - - - org.apache.maven.plugins - maven-jar-plugin - 3.0.2 - - - - org.apache.maven.plugins - maven-source-plugin - 3.0.1 - - - attach-sources - verify - - jar-no-fork - - - - - - - org.apache.maven.plugins - maven-surefire-plugin - 2.19.1 - - true - - - - - org.apache.maven.plugins - maven-javadoc-plugin - 2.10.4 - - true - - - - - org.apache.maven.plugins - maven-dependency-plugin - 3.0.0 - - - - net.alchim31.maven - scala-maven-plugin - 3.2.2 - - - scala-compile-first - process-resources - - add-source - compile - - - - scala-test-compile - process-test-resources - - testCompile - - - - - - - - org.apache.avro - avro-maven-plugin - 1.7.7 - - - - org.codehaus.mojo - build-helper-maven-plugin - 1.12 - - - - - org.eclipse.m2e - lifecycle-mapping - 1.0.0 - - - - - - - org.apache.avro - - - avro-maven-plugin - - - [1.7.4,) - - - idl-protocol - schema - - - - - - - - - - org.codehaus.mojo - - - build-helper-maven-plugin - - - [1.7,) - - - add-source - - - - - - - - - - org.apache.maven.plugins - - - maven-plugin-plugin - - - [3.2,) - - - descriptor - - - - - - - - - - - - - - - - - org.apache.maven.plugins - maven-release-plugin - 2.5.3 - - - - org.jacoco - jacoco-maven-plugin - 0.7.9 - - - **/schemas/* - **/com/cloudera/**/* - **/org/apache/avro/io/**/* - - - - - default-prepare-agent - - prepare-agent - - - - default-report - prepare-package - - report - - - - - - - - - org.apache.maven.wagon - wagon-ssh - 2.10 - - - - - - - - - dnet45-snapshots - DNet45 Snapshots - http://maven.research-infrastructures.eu/nexus/content/repositories/dnet45-snapshots - default - - - dnet45-releases - http://maven.research-infrastructures.eu/nexus/content/repositories/dnet45-releases - - - - - - - org.apache.maven.plugins - maven-javadoc-plugin - 2.10.4 - - true - - - - - - - UTF-8 - UTF-8 - [4.2.5.RELEASE] - [3.1.5] - - cdh5.9.0 - - 4.1.0-${dhp.cdh.version} - 0.12.0-${dhp.cdh.version} - 1.7.6-${dhp.cdh.version} - 2.6.0-${dhp.cdh.version} - 1.6.0-${dhp.cdh.version} - 2.10.6 - + xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" + xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd"> + + 4.0.0 + eu.dnetlib.dhp + dhp + 1.0.0-SNAPSHOT + pom + + http://www.d-net.research-infrastructures.eu + + + + The Apache Software License, Version 2.0 + http://www.apache.org/licenses/LICENSE-2.0.txt + repo + A business-friendly OSS license + + + + + + + + + + + + + Redmine + https://issue.openaire.research-infrastructures.eu/projects/openaire + + + + jenkins + https://jenkins-dnet.d4science.org/ + + + + scm:git:ssh://git@github.com/??? + scm:git:ssh://git@github.com/???.git + https://github.com/??? + HEAD + + + + + + + + + dnet45-releases + D-Net 45 releases + http://maven.research-infrastructures.eu/nexus/content/repositories/dnet45-releases + default + + false + + + true + + + + dnet45-bootstrap-release + dnet45 bootstrap release + http://maven.research-infrastructures.eu/nexus/content/repositories/dnet45-bootstrap-release + default + + false + + + true + + + + + cloudera + Cloudera Repository + https://repository.cloudera.com/artifactory/cloudera-repos + + true + + + false + + + + + + + junit + junit + 4.12 + test + + + + + + + + + org.apache.hadoop + hadoop-hdfs + ${dhp.hadoop.version} + + + org.apache.spark + spark-core_2.11 + ${dhp.spark.version} + provided + + + org.apache.spark + spark-sql_2.11 + ${dhp.spark.version} + provided + + + + + + + + target + target/classes + ${project.artifactId}-${project.version} + target/test-classes + + + + + + org.apache.maven.plugins + maven-compiler-plugin + 3.6.0 + + 1.8 + 1.8 + ${project.build.sourceEncoding} + + + + + org.apache.maven.plugins + maven-jar-plugin + 3.0.2 + + + + org.apache.maven.plugins + maven-source-plugin + 3.0.1 + + + attach-sources + verify + + jar-no-fork + + + + + + + org.apache.maven.plugins + maven-surefire-plugin + 2.19.1 + + true + + + + + org.apache.maven.plugins + maven-javadoc-plugin + 2.10.4 + + true + + + + + org.apache.maven.plugins + maven-dependency-plugin + 3.0.0 + + + + + + + + + + + + org.apache.maven.plugins + maven-release-plugin + 2.5.3 + + + + + + + + + + + dnet45-snapshots + DNet45 Snapshots + http://maven.research-infrastructures.eu/nexus/content/repositories/dnet45-snapshots + default + + + dnet45-releases + http://maven.research-infrastructures.eu/nexus/content/repositories/dnet45-releases + + + + + + + org.apache.maven.plugins + maven-javadoc-plugin + 2.10.4 + + true + + + + + + + UTF-8 + UTF-8 + cdh5.9.2 + 2.6.0-${dhp.cdh.version} + 2.2.0 + 2.11.8 +