1
0
Fork 0

first commit

This commit is contained in:
Claudio Atzori 2018-01-16 14:21:13 +01:00
parent 2c27a2762d
commit f072ed91b2
132 changed files with 7690 additions and 0 deletions

1
README.md Normal file
View File

@ -0,0 +1 @@
# dnet-hadoop

View File

@ -0,0 +1,7 @@
Module utilized by `dhp-wf`.
Contains all required resources by this parent module:
* assembly XML definitions
* build shell scripts
* oozie package commands for uploading, running and monitoring oozie workflows

View File

@ -0,0 +1,24 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>eu.dnetlib.dhp</groupId>
<artifactId>dhp-build</artifactId>
<version>1.0.0-SNAPSHOT</version>
</parent>
<artifactId>dhp-build-assembly-resources</artifactId>
<packaging>jar</packaging>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
</plugin>
</plugins>
</build>
</project>

View File

@ -0,0 +1,32 @@
<assembly
xmlns="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.0 http://maven.apache.org/xsd/assembly-1.1.0.xsd">
<id>oozie-installer</id>
<formats>
<format>dir</format>
</formats>
<fileSets>
<fileSet>
<filtered>true</filtered>
<directory>${project.build.directory}/assembly-resources/commands</directory>
<!--
dziala dla (lokalnie zasoby modulu):
<directory>src/main/resources</directory>
nie dziala dla:
<directory>classpath:/commands</directory>
<directory>commands</directory>
<directory>classpath/src/main/resources</directory>
-->
<outputDirectory>/</outputDirectory>
<includes>
<include>**/*</include>
</includes>
<fileMode>0755</fileMode>
<lineEnding>unix</lineEnding>
</fileSet>
</fileSets>
<baseDirectory>/</baseDirectory>
</assembly>

View File

@ -0,0 +1,24 @@
<assembly
xmlns="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.0 http://maven.apache.org/xsd/assembly-1.1.0.xsd">
<id>tests</id>
<formats>
<format>jar</format>
</formats>
<includeBaseDirectory>false</includeBaseDirectory>
<fileSets>
<fileSet>
<directory>${project.build.testOutputDirectory}
</directory>
<outputDirectory />
</fileSet>
</fileSets>
<!-- <dependencySets>
<dependencySet>
<useProjectArtifact>false</useProjectArtifact>
<outputDirectory>lib</outputDirectory>
</dependencySet>
</dependencySets>-->
</assembly>

View File

@ -0,0 +1,3 @@
#!/bin/bash
hadoop fs -get ${workingDir}

View File

@ -0,0 +1,5 @@
#!/bin/bash
echo ""
echo "---->Contents of the working directory"
hadoop fs -ls ${workingDir}

View File

@ -0,0 +1,5 @@
Execute the scripts in the following order:
1. `upload_workflow.sh`
2. `run_workflow.sh`
3. `print_working_dir.sh` or `get_working_dir.sh`

View File

@ -0,0 +1,10 @@
#!/bin/bash
if [ $# = 0 ] ; then
oozie job -oozie ${oozieServiceLoc} -config job.properties -run
else
oozie job -oozie ${oozieServiceLoc} -config $1/job.properties -run
fi

View File

@ -0,0 +1,34 @@
#!/bin/bash
exec 3>&1
BASH_XTRACEFD=3
set -x ## print every executed command
if [ $# = 0 ] ; then
target_dir_root=`pwd`'/${oozieAppDir}'
else
target_dir_root=`readlink -f $1`'/${oozieAppDir}'
fi
# initial phase, creating symbolic links to jars in all subworkflows
# currently disabled
#libDir=$target_dir_root'/lib'
#dirs=`find $target_dir_root/* -maxdepth 10 -type d`
#for dir in $dirs
#do
# if [ -f $dir/workflow.xml ]
# then
# echo "creating symbolic links to jars in directory: $dir/lib"
# if [ ! -d "$dir/lib" ]; then
# mkdir $dir/lib
# fi
# find $libDir -type f -exec ln -s \{\} $dir/lib \;
# fi
#done
#uploading
hadoop fs -rm -r ${sandboxDir}
hadoop fs -mkdir -p ${sandboxDir}
hadoop fs -mkdir -p ${workingDir}
hadoop fs -put $target_dir_root ${sandboxDir}

View File

@ -0,0 +1,7 @@
#sandboxName when not provided explicitly will be generated
sandboxName=${sandboxName}
sandboxDir=/user/${iis.hadoop.frontend.user.name}/${sandboxName}
workingDir=${sandboxDir}/working_dir
oozie.wf.application.path = ${nameNode}${sandboxDir}/${oozieAppDir}
oozieTopWfApplicationPath = ${oozie.wf.application.path}

View File

@ -0,0 +1,32 @@
<assembly
xmlns="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.0 http://maven.apache.org/xsd/assembly-1.1.0.xsd">
<id>oozie-installer</id>
<formats>
<format>dir</format>
</formats>
<fileSets>
<fileSet>
<filtered>true</filtered>
<directory>${project.build.directory}/assembly-resources/commands</directory>
<!--
dziala dla (lokalnie zasoby modulu):
<directory>src/main/resources</directory>
nie dziala dla:
<directory>classpath:/commands</directory>
<directory>commands</directory>
<directory>classpath/src/main/resources</directory>
-->
<outputDirectory>/</outputDirectory>
<includes>
<include>**/*</include>
</includes>
<fileMode>0755</fileMode>
<lineEnding>unix</lineEnding>
</fileSet>
</fileSets>
<baseDirectory>/</baseDirectory>
</assembly>

View File

@ -0,0 +1,24 @@
<assembly
xmlns="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.0 http://maven.apache.org/xsd/assembly-1.1.0.xsd">
<id>tests</id>
<formats>
<format>jar</format>
</formats>
<includeBaseDirectory>false</includeBaseDirectory>
<fileSets>
<fileSet>
<directory>${project.build.testOutputDirectory}
</directory>
<outputDirectory />
</fileSet>
</fileSets>
<!-- <dependencySets>
<dependencySet>
<useProjectArtifact>false</useProjectArtifact>
<outputDirectory>lib</outputDirectory>
</dependencySet>
</dependencySets>-->
</assembly>

View File

@ -0,0 +1,3 @@
#!/bin/bash
hadoop fs -get ${workingDir}

View File

@ -0,0 +1,5 @@
#!/bin/bash
echo ""
echo "---->Contents of the working directory"
hadoop fs -ls ${workingDir}

View File

@ -0,0 +1,5 @@
Execute the scripts in the following order:
1. `upload_workflow.sh`
2. `run_workflow.sh`
3. `print_working_dir.sh` or `get_working_dir.sh`

View File

@ -0,0 +1,10 @@
#!/bin/bash
if [ $# = 0 ] ; then
oozie job -oozie ${oozieServiceLoc} -config job.properties -run
else
oozie job -oozie ${oozieServiceLoc} -config $1/job.properties -run
fi

View File

@ -0,0 +1,34 @@
#!/bin/bash
exec 3>&1
BASH_XTRACEFD=3
set -x ## print every executed command
if [ $# = 0 ] ; then
target_dir_root=`pwd`'/${oozieAppDir}'
else
target_dir_root=`readlink -f $1`'/${oozieAppDir}'
fi
# initial phase, creating symbolic links to jars in all subworkflows
# currently disabled
#libDir=$target_dir_root'/lib'
#dirs=`find $target_dir_root/* -maxdepth 10 -type d`
#for dir in $dirs
#do
# if [ -f $dir/workflow.xml ]
# then
# echo "creating symbolic links to jars in directory: $dir/lib"
# if [ ! -d "$dir/lib" ]; then
# mkdir $dir/lib
# fi
# find $libDir -type f -exec ln -s \{\} $dir/lib \;
# fi
#done
#uploading
hadoop fs -rm -r ${sandboxDir}
hadoop fs -mkdir -p ${sandboxDir}
hadoop fs -mkdir -p ${workingDir}
hadoop fs -put $target_dir_root ${sandboxDir}

View File

@ -0,0 +1,7 @@
#sandboxName when not provided explicitly will be generated
sandboxName=${sandboxName}
sandboxDir=/user/${iis.hadoop.frontend.user.name}/${sandboxName}
workingDir=${sandboxDir}/working_dir
oozie.wf.application.path = ${nameNode}${sandboxDir}/${oozieAppDir}
oozieTopWfApplicationPath = ${oozie.wf.application.path}

View File

@ -0,0 +1,6 @@
Maven plugin module utilized by `dhp-wf` for proper `job.properties` file building.
It is based on http://site.kuali.org/maven/plugins/properties-maven-plugin/1.3.2/write-project-properties-mojo.html and supplemented with:
* handling includePropertyKeysFromFiles property allowing writing only properties listed in given property files
As a final outcome only properties listed in `<include>` element and listed as a keys in files from `<includePropertyKeysFromFiles>` element will be written to output file.

View File

@ -0,0 +1,68 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>eu.dnetlib.dhp</groupId>
<artifactId>dhp-build</artifactId>
<version>1.0.0-SNAPSHOT</version>
</parent>
<artifactId>dhp-build-properties-maven-plugin</artifactId>
<packaging>maven-plugin</packaging>
<dependencies>
<dependency>
<groupId>org.apache.maven</groupId>
<artifactId>maven-plugin-api</artifactId>
<version>2.0</version>
</dependency>
<dependency>
<groupId>org.apache.maven</groupId>
<artifactId>maven-project</artifactId>
<version>2.0</version>
</dependency>
<dependency>
<groupId>org.kuali.maven.plugins</groupId>
<artifactId>properties-maven-plugin</artifactId>
<version>1.3.2</version>
</dependency>
</dependencies>
<build>
<directory>target</directory>
<outputDirectory>target/classes</outputDirectory>
<finalName>${project.artifactId}-${project.version}</finalName>
<testOutputDirectory>target/test-classes</testOutputDirectory>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-source-plugin</artifactId>
<executions>
<execution>
<id>attach-sources</id>
<phase>verify</phase>
<goals>
<goal>jar-no-fork</goal>
</goals>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-javadoc-plugin</artifactId>
<configuration>
<detectLinks>true</detectLinks>
</configuration>
</plugin>
</plugins>
</build>
</project>

View File

@ -0,0 +1,71 @@
package eu.dnetlib.maven.plugin.properties;
import java.io.File;
import java.util.ArrayList;
import java.util.List;
import org.apache.commons.lang.ArrayUtils;
import org.apache.commons.lang.StringUtils;
import org.apache.maven.plugin.AbstractMojo;
import org.apache.maven.plugin.MojoExecutionException;
import org.apache.maven.plugin.MojoFailureException;
/**
* Generates oozie properties which were not provided from commandline.
* @author mhorst
*
* @goal generate-properties
*/
public class GenerateOoziePropertiesMojo extends AbstractMojo {
public static final String PROPERTY_NAME_WF_SOURCE_DIR = "workflow.source.dir";
public static final String PROPERTY_NAME_SANDBOX_NAME = "sandboxName";
private final String[] limiters = {"iis", "dnetlib", "eu", "dhp"};
@Override
public void execute() throws MojoExecutionException, MojoFailureException {
if (System.getProperties().containsKey(PROPERTY_NAME_WF_SOURCE_DIR) &&
!System.getProperties().containsKey(PROPERTY_NAME_SANDBOX_NAME)) {
String generatedSandboxName = generateSandboxName(System.getProperties().getProperty(
PROPERTY_NAME_WF_SOURCE_DIR));
if (generatedSandboxName!=null) {
System.getProperties().setProperty(PROPERTY_NAME_SANDBOX_NAME,
generatedSandboxName);
} else {
System.out.println("unable to generate sandbox name from path: " +
System.getProperties().getProperty(PROPERTY_NAME_WF_SOURCE_DIR));
}
}
}
/**
* Generates sandbox name from workflow source directory.
* @param wfSourceDir
* @return generated sandbox name
*/
private String generateSandboxName(String wfSourceDir) {
// utilize all dir names until finding one of the limiters
List<String> sandboxNameParts = new ArrayList<String>();
String[] tokens = StringUtils.split(wfSourceDir, File.separatorChar);
ArrayUtils.reverse(tokens);
if (tokens.length>0) {
for (String token : tokens) {
for (String limiter : limiters) {
if (limiter.equals(token)) {
return sandboxNameParts.size()>0?
StringUtils.join(sandboxNameParts.toArray()):null;
}
}
if (sandboxNameParts.size()>0) {
sandboxNameParts.add(0, File.separator);
}
sandboxNameParts.add(0, token);
}
return StringUtils.join(sandboxNameParts.toArray());
} else {
return null;
}
}
}

View File

@ -0,0 +1,436 @@
/**
*
* Licensed under the Educational Community License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.opensource.org/licenses/ecl2.php
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package eu.dnetlib.maven.plugin.properties;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Date;
import java.util.List;
import java.util.Map.Entry;
import java.util.Properties;
import java.util.Set;
import org.apache.commons.io.FileUtils;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang.StringUtils;
import org.apache.maven.plugin.AbstractMojo;
import org.apache.maven.plugin.MojoExecutionException;
import org.apache.maven.plugin.MojoFailureException;
import org.apache.maven.project.MavenProject;
import org.springframework.core.io.DefaultResourceLoader;
import org.springframework.core.io.Resource;
import org.springframework.core.io.ResourceLoader;
import edu.umd.cs.findbugs.annotations.SuppressFBWarnings;
/**
* Writes project properties for the keys listed in specified properties files.
* Based on:
* http://site.kuali.org/maven/plugins/properties-maven-plugin/1.3.2/write-project-properties-mojo.html
* @author mhorst
* @goal write-project-properties
*/
public class WritePredefinedProjectProperties extends AbstractMojo {
private static final String CR = "\r";
private static final String LF = "\n";
private static final String TAB = "\t";
protected static final String PROPERTY_PREFIX_ENV = "env.";
private static final String ENCODING_UTF8 = "utf8";
/**
* @parameter property="properties.includePropertyKeysFromFiles"
*/
private String[] includePropertyKeysFromFiles;
/**
* @parameter default-value="${project}"
* @required
* @readonly
*/
protected MavenProject project;
/**
* The file that properties will be written to
*
* @parameter property="properties.outputFile"
* default-value="${project.build.directory}/properties/project.properties";
* @required
*/
protected File outputFile;
/**
* If true, the plugin will silently ignore any non-existent properties files, and the build will continue
*
* @parameter property="properties.quiet" default-value="true"
*/
private boolean quiet;
/**
* Comma separated list of characters to escape when writing property values. cr=carriage return, lf=linefeed,
* tab=tab. Any other values are taken literally.
*
* @parameter default-value="cr,lf,tab" property="properties.escapeChars"
*/
private String escapeChars;
/**
* If true, the plugin will include system properties when writing the properties file. System properties override
* both environment variables and project properties.
*
* @parameter default-value="false" property="properties.includeSystemProperties"
*/
private boolean includeSystemProperties;
/**
* If true, the plugin will include environment variables when writing the properties file. Environment variables
* are prefixed with "env". Environment variables override project properties.
*
* @parameter default-value="false" property="properties.includeEnvironmentVariables"
*/
private boolean includeEnvironmentVariables;
/**
* Comma separated set of properties to exclude when writing the properties file
*
* @parameter property="properties.exclude"
*/
private String exclude;
/**
* Comma separated set of properties to write to the properties file. If provided, only the properties matching
* those supplied here will be written to the properties file.
*
* @parameter property="properties.include"
*/
private String include;
/* (non-Javadoc)
* @see org.apache.maven.plugin.AbstractMojo#execute()
*/
@Override
@SuppressFBWarnings({"NP_UNWRITTEN_FIELD","UWF_UNWRITTEN_FIELD"})
public void execute() throws MojoExecutionException, MojoFailureException {
Properties properties = new Properties();
// Add project properties
properties.putAll(project.getProperties());
if (includeEnvironmentVariables) {
// Add environment variables, overriding any existing properties with the same key
properties.putAll(getEnvironmentVariables());
}
if (includeSystemProperties) {
// Add system properties, overriding any existing properties with the same key
properties.putAll(System.getProperties());
}
// Remove properties as appropriate
trim(properties, exclude, include);
String comment = "# " + new Date() + "\n";
List<String> escapeTokens = getEscapeChars(escapeChars);
getLog().info("Creating " + outputFile);
writeProperties(outputFile, comment, properties, escapeTokens);
}
/**
* Provides environment variables.
* @return environment variables
*/
protected static Properties getEnvironmentVariables() {
Properties props = new Properties();
for (Entry<String, String> entry : System.getenv().entrySet()) {
props.setProperty(PROPERTY_PREFIX_ENV + entry.getKey(), entry.getValue());
}
return props;
}
/**
* Removes properties which should not be written.
* @param properties
* @param omitCSV
* @param includeCSV
* @throws MojoExecutionException
*/
protected void trim(Properties properties, String omitCSV, String includeCSV) throws MojoExecutionException {
List<String> omitKeys = getListFromCSV(omitCSV);
for (String key : omitKeys) {
properties.remove(key);
}
List<String> includeKeys = getListFromCSV(includeCSV);
// mh: including keys from predefined properties
if (includePropertyKeysFromFiles!=null && includePropertyKeysFromFiles.length>0) {
for (String currentIncludeLoc : includePropertyKeysFromFiles) {
if (validate(currentIncludeLoc)) {
Properties p = getProperties(currentIncludeLoc);
for (String key : p.stringPropertyNames()) {
includeKeys.add(key);
}
}
}
}
if (includeKeys!=null && !includeKeys.isEmpty()) {
// removing only when include keys provided
Set<String> keys = properties.stringPropertyNames();
for (String key : keys) {
if (!includeKeys.contains(key)) {
properties.remove(key);
}
}
}
}
/**
* Checks whether file exists.
* @param location
* @return true when exists, false otherwise.
*/
protected boolean exists(String location) {
if (StringUtils.isBlank(location)) {
return false;
}
File file = new File(location);
if (file.exists()) {
return true;
}
ResourceLoader loader = new DefaultResourceLoader();
Resource resource = loader.getResource(location);
return resource.exists();
}
/**
* Validates resource location.
* @param location
* @return true when valid, false otherwise
* @throws MojoExecutionException
*/
protected boolean validate(String location) throws MojoExecutionException {
boolean exists = exists(location);
if (exists) {
return true;
}
if (quiet) {
getLog().info("Ignoring non-existent properties file '" + location + "'");
return false;
} else {
throw new MojoExecutionException("Non-existent properties file '" + location + "'");
}
}
/**
* Provides input stream.
* @param location
* @return input stream
* @throws IOException
*/
protected InputStream getInputStream(String location) throws IOException {
File file = new File(location);
if (file.exists()) {
return new FileInputStream(location);
}
ResourceLoader loader = new DefaultResourceLoader();
Resource resource = loader.getResource(location);
return resource.getInputStream();
}
/**
* Creates properties for given location.
* @param location
* @return properties for given location
* @throws MojoExecutionException
*/
protected Properties getProperties(String location) throws MojoExecutionException {
InputStream in = null;
try {
Properties properties = new Properties();
in = getInputStream(location);
if (location.toLowerCase().endsWith(".xml")) {
properties.loadFromXML(in);
} else {
properties.load(in);
}
return properties;
} catch (IOException e) {
throw new MojoExecutionException("Error reading properties file " + location, e);
} finally {
IOUtils.closeQuietly(in);
}
}
/**
* Provides escape characters.
* @param escapeChars
* @return escape characters
*/
protected List<String> getEscapeChars(String escapeChars) {
List<String> tokens = getListFromCSV(escapeChars);
List<String> realTokens = new ArrayList<String>();
for (String token : tokens) {
String realToken = getRealToken(token);
realTokens.add(realToken);
}
return realTokens;
}
/**
* Provides real token.
* @param token
* @return real token
*/
protected String getRealToken(String token) {
if (token.equalsIgnoreCase("CR")) {
return CR;
} else if (token.equalsIgnoreCase("LF")) {
return LF;
} else if (token.equalsIgnoreCase("TAB")) {
return TAB;
} else {
return token;
}
}
/**
* Returns content.
* @param comment
* @param properties
* @param escapeTokens
* @return content
*/
protected String getContent(String comment, Properties properties, List<String> escapeTokens) {
List<String> names = new ArrayList<String>(properties.stringPropertyNames());
Collections.sort(names);
StringBuilder sb = new StringBuilder();
if (!StringUtils.isBlank(comment)) {
sb.append(comment);
}
for (String name : names) {
String value = properties.getProperty(name);
String escapedValue = escape(value, escapeTokens);
sb.append(name + "=" + escapedValue + "\n");
}
return sb.toString();
}
/**
* Writes properties to given file.
* @param file
* @param comment
* @param properties
* @param escapeTokens
* @throws MojoExecutionException
*/
protected void writeProperties(File file, String comment, Properties properties, List<String> escapeTokens)
throws MojoExecutionException {
try {
String content = getContent(comment, properties, escapeTokens);
FileUtils.writeStringToFile(file, content, ENCODING_UTF8);
} catch (IOException e) {
throw new MojoExecutionException("Error creating properties file", e);
}
}
/**
* Escapes characters.
* @param s
* @param escapeChars
* @return
*/
protected String escape(String s, List<String> escapeChars) {
String result = s;
for (String escapeChar : escapeChars) {
result = result.replace(escapeChar, getReplacementToken(escapeChar));
}
return result;
}
/**
* Provides replacement token.
* @param escapeChar
* @return replacement token
*/
protected String getReplacementToken(String escapeChar) {
if (escapeChar.equals(CR)) {
return "\\r";
} else if (escapeChar.equals(LF)) {
return "\\n";
} else if (escapeChar.equals(TAB)) {
return "\\t";
} else {
return "\\" + escapeChar;
}
}
/**
* Returns list from csv.
* @param csv
* @return list of values generated from CSV
*/
protected static final List<String> getListFromCSV(String csv) {
if (StringUtils.isBlank(csv)) {
return new ArrayList<String>();
}
List<String> list = new ArrayList<String>();
String[] tokens = StringUtils.split(csv, ",");
for (String token : tokens) {
list.add(token.trim());
}
return list;
}
public void setIncludeSystemProperties(boolean includeSystemProperties) {
this.includeSystemProperties = includeSystemProperties;
}
public void setEscapeChars(String escapeChars) {
this.escapeChars = escapeChars;
}
public void setIncludeEnvironmentVariables(boolean includeEnvironmentVariables) {
this.includeEnvironmentVariables = includeEnvironmentVariables;
}
public void setExclude(String exclude) {
this.exclude = exclude;
}
public void setInclude(String include) {
this.include = include;
}
public void setQuiet(boolean quiet) {
this.quiet = quiet;
}
/**
* Sets property files for which keys properties should be included.
* @param includePropertyKeysFromFiles
*/
public void setIncludePropertyKeysFromFiles(
String[] includePropertyKeysFromFiles) {
if (includePropertyKeysFromFiles!=null) {
this.includePropertyKeysFromFiles = Arrays.copyOf(
includePropertyKeysFromFiles,
includePropertyKeysFromFiles.length);
}
}
}

View File

@ -0,0 +1,101 @@
package eu.dnetlib.maven.plugin.properties;
import static eu.dnetlib.maven.plugin.properties.GenerateOoziePropertiesMojo.PROPERTY_NAME_SANDBOX_NAME;
import static eu.dnetlib.maven.plugin.properties.GenerateOoziePropertiesMojo.PROPERTY_NAME_WF_SOURCE_DIR;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNull;
import org.junit.Before;
import org.junit.Test;
/**
* @author mhorst
*
*/
public class GenerateOoziePropertiesMojoTest {
private GenerateOoziePropertiesMojo mojo = new GenerateOoziePropertiesMojo();
@Before
public void clearSystemProperties() {
System.clearProperty(PROPERTY_NAME_SANDBOX_NAME);
System.clearProperty(PROPERTY_NAME_WF_SOURCE_DIR);
}
@Test
public void testExecuteEmpty() throws Exception {
// execute
mojo.execute();
// assert
assertNull(System.getProperty(PROPERTY_NAME_SANDBOX_NAME));
}
@Test
public void testExecuteSandboxNameAlreadySet() throws Exception {
// given
String workflowSourceDir = "eu/dnetlib/iis/wf/transformers";
String sandboxName = "originalSandboxName";
System.setProperty(PROPERTY_NAME_WF_SOURCE_DIR, workflowSourceDir);
System.setProperty(PROPERTY_NAME_SANDBOX_NAME, sandboxName);
// execute
mojo.execute();
// assert
assertEquals(sandboxName, System.getProperty(PROPERTY_NAME_SANDBOX_NAME));
}
@Test
public void testExecuteEmptyWorkflowSourceDir() throws Exception {
// given
String workflowSourceDir = "";
System.setProperty(PROPERTY_NAME_WF_SOURCE_DIR, workflowSourceDir);
// execute
mojo.execute();
// assert
assertNull(System.getProperty(PROPERTY_NAME_SANDBOX_NAME));
}
@Test
public void testExecuteNullSandboxNameGenerated() throws Exception {
// given
String workflowSourceDir = "eu/dnetlib/iis/";
System.setProperty(PROPERTY_NAME_WF_SOURCE_DIR, workflowSourceDir);
// execute
mojo.execute();
// assert
assertNull(System.getProperty(PROPERTY_NAME_SANDBOX_NAME));
}
@Test
public void testExecute() throws Exception {
// given
String workflowSourceDir = "eu/dnetlib/iis/wf/transformers";
System.setProperty(PROPERTY_NAME_WF_SOURCE_DIR, workflowSourceDir);
// execute
mojo.execute();
// assert
assertEquals("wf/transformers", System.getProperty(PROPERTY_NAME_SANDBOX_NAME));
}
@Test
public void testExecuteWithoutRoot() throws Exception {
// given
String workflowSourceDir = "wf/transformers";
System.setProperty(PROPERTY_NAME_WF_SOURCE_DIR, workflowSourceDir);
// execute
mojo.execute();
// assert
assertEquals("wf/transformers", System.getProperty(PROPERTY_NAME_SANDBOX_NAME));
}
}

View File

@ -0,0 +1,365 @@
package eu.dnetlib.maven.plugin.properties;
import static eu.dnetlib.maven.plugin.properties.WritePredefinedProjectProperties.PROPERTY_PREFIX_ENV;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
import static org.mockito.Mockito.doReturn;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.FileWriter;
import java.io.IOException;
import java.util.Properties;
import org.apache.maven.plugin.MojoExecutionException;
import org.apache.maven.project.MavenProject;
import org.junit.Before;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.TemporaryFolder;
import org.junit.runner.RunWith;
import org.mockito.Mock;
import org.mockito.runners.MockitoJUnitRunner;
/**
* @author mhorst
*
*/
@RunWith(MockitoJUnitRunner.class)
public class WritePredefinedProjectPropertiesTest {
@Rule
public TemporaryFolder testFolder = new TemporaryFolder();
@Mock
private MavenProject mavenProject;
private WritePredefinedProjectProperties mojo;
@Before
public void init() {
mojo = new WritePredefinedProjectProperties();
mojo.outputFile = getPropertiesFileLocation();
mojo.project = mavenProject;
doReturn(new Properties()).when(mavenProject).getProperties();
}
// ----------------------------------- TESTS ---------------------------------------------
@Test
public void testExecuteEmpty() throws Exception {
// execute
mojo.execute();
// assert
assertTrue(mojo.outputFile.exists());
Properties storedProperties = getStoredProperties();
assertEquals(0, storedProperties.size());
}
@Test
public void testExecuteWithProjectProperties() throws Exception {
// given
String key = "projectPropertyKey";
String value = "projectPropertyValue";
Properties projectProperties = new Properties();
projectProperties.setProperty(key, value);
doReturn(projectProperties).when(mavenProject).getProperties();
// execute
mojo.execute();
// assert
assertTrue(mojo.outputFile.exists());
Properties storedProperties = getStoredProperties();
assertEquals(1, storedProperties.size());
assertTrue(storedProperties.containsKey(key));
assertEquals(value, storedProperties.getProperty(key));
}
@Test(expected=MojoExecutionException.class)
public void testExecuteWithProjectPropertiesAndInvalidOutputFile() throws Exception {
// given
String key = "projectPropertyKey";
String value = "projectPropertyValue";
Properties projectProperties = new Properties();
projectProperties.setProperty(key, value);
doReturn(projectProperties).when(mavenProject).getProperties();
mojo.outputFile = testFolder.getRoot();
// execute
mojo.execute();
}
@Test
public void testExecuteWithProjectPropertiesExclusion() throws Exception {
// given
String key = "projectPropertyKey";
String value = "projectPropertyValue";
String excludedKey = "excludedPropertyKey";
String excludedValue = "excludedPropertyValue";
Properties projectProperties = new Properties();
projectProperties.setProperty(key, value);
projectProperties.setProperty(excludedKey, excludedValue);
doReturn(projectProperties).when(mavenProject).getProperties();
mojo.setExclude(excludedKey);
// execute
mojo.execute();
// assert
assertTrue(mojo.outputFile.exists());
Properties storedProperties = getStoredProperties();
assertEquals(1, storedProperties.size());
assertTrue(storedProperties.containsKey(key));
assertEquals(value, storedProperties.getProperty(key));
}
@Test
public void testExecuteWithProjectPropertiesInclusion() throws Exception {
// given
String key = "projectPropertyKey";
String value = "projectPropertyValue";
String includedKey = "includedPropertyKey";
String includedValue = "includedPropertyValue";
Properties projectProperties = new Properties();
projectProperties.setProperty(key, value);
projectProperties.setProperty(includedKey, includedValue);
doReturn(projectProperties).when(mavenProject).getProperties();
mojo.setInclude(includedKey);
// execute
mojo.execute();
// assert
assertTrue(mojo.outputFile.exists());
Properties storedProperties = getStoredProperties();
assertEquals(1, storedProperties.size());
assertTrue(storedProperties.containsKey(includedKey));
assertEquals(includedValue, storedProperties.getProperty(includedKey));
}
@Test
public void testExecuteIncludingPropertyKeysFromFile() throws Exception {
// given
String key = "projectPropertyKey";
String value = "projectPropertyValue";
String includedKey = "includedPropertyKey";
String includedValue = "includedPropertyValue";
Properties projectProperties = new Properties();
projectProperties.setProperty(key, value);
projectProperties.setProperty(includedKey, includedValue);
doReturn(projectProperties).when(mavenProject).getProperties();
File includedPropertiesFile = new File(testFolder.getRoot(), "included.properties");
Properties includedProperties = new Properties();
includedProperties.setProperty(includedKey, "irrelevantValue");
includedProperties.store(new FileWriter(includedPropertiesFile), null);
mojo.setIncludePropertyKeysFromFiles(new String[] {includedPropertiesFile.getAbsolutePath()});
// execute
mojo.execute();
// assert
assertTrue(mojo.outputFile.exists());
Properties storedProperties = getStoredProperties();
assertEquals(1, storedProperties.size());
assertTrue(storedProperties.containsKey(includedKey));
assertEquals(includedValue, storedProperties.getProperty(includedKey));
}
@Test
public void testExecuteIncludingPropertyKeysFromClasspathResource() throws Exception {
// given
String key = "projectPropertyKey";
String value = "projectPropertyValue";
String includedKey = "includedPropertyKey";
String includedValue = "includedPropertyValue";
Properties projectProperties = new Properties();
projectProperties.setProperty(key, value);
projectProperties.setProperty(includedKey, includedValue);
doReturn(projectProperties).when(mavenProject).getProperties();
mojo.setIncludePropertyKeysFromFiles(new String[] {"/eu/dnetlib/maven/plugin/properties/included.properties"});
// execute
mojo.execute();
// assert
assertTrue(mojo.outputFile.exists());
Properties storedProperties = getStoredProperties();
assertEquals(1, storedProperties.size());
assertTrue(storedProperties.containsKey(includedKey));
assertEquals(includedValue, storedProperties.getProperty(includedKey));
}
@Test(expected=MojoExecutionException.class)
public void testExecuteIncludingPropertyKeysFromBlankLocation() throws Exception {
// given
String key = "projectPropertyKey";
String value = "projectPropertyValue";
String includedKey = "includedPropertyKey";
String includedValue = "includedPropertyValue";
Properties projectProperties = new Properties();
projectProperties.setProperty(key, value);
projectProperties.setProperty(includedKey, includedValue);
doReturn(projectProperties).when(mavenProject).getProperties();
mojo.setIncludePropertyKeysFromFiles(new String[] {""});
// execute
mojo.execute();
}
@Test
public void testExecuteIncludingPropertyKeysFromXmlFile() throws Exception {
// given
String key = "projectPropertyKey";
String value = "projectPropertyValue";
String includedKey = "includedPropertyKey";
String includedValue = "includedPropertyValue";
Properties projectProperties = new Properties();
projectProperties.setProperty(key, value);
projectProperties.setProperty(includedKey, includedValue);
doReturn(projectProperties).when(mavenProject).getProperties();
File includedPropertiesFile = new File(testFolder.getRoot(), "included.xml");
Properties includedProperties = new Properties();
includedProperties.setProperty(includedKey, "irrelevantValue");
includedProperties.storeToXML(new FileOutputStream(includedPropertiesFile), null);
mojo.setIncludePropertyKeysFromFiles(new String[] {includedPropertiesFile.getAbsolutePath()});
// execute
mojo.execute();
// assert
assertTrue(mojo.outputFile.exists());
Properties storedProperties = getStoredProperties();
assertEquals(1, storedProperties.size());
assertTrue(storedProperties.containsKey(includedKey));
assertEquals(includedValue, storedProperties.getProperty(includedKey));
}
@Test(expected=MojoExecutionException.class)
public void testExecuteIncludingPropertyKeysFromInvalidXmlFile() throws Exception {
// given
String key = "projectPropertyKey";
String value = "projectPropertyValue";
String includedKey = "includedPropertyKey";
String includedValue = "includedPropertyValue";
Properties projectProperties = new Properties();
projectProperties.setProperty(key, value);
projectProperties.setProperty(includedKey, includedValue);
doReturn(projectProperties).when(mavenProject).getProperties();
File includedPropertiesFile = new File(testFolder.getRoot(), "included.xml");
Properties includedProperties = new Properties();
includedProperties.setProperty(includedKey, "irrelevantValue");
includedProperties.store(new FileOutputStream(includedPropertiesFile), null);
mojo.setIncludePropertyKeysFromFiles(new String[] {includedPropertiesFile.getAbsolutePath()});
// execute
mojo.execute();
}
@Test
public void testExecuteWithQuietModeOn() throws Exception {
// given
mojo.setQuiet(true);
mojo.setIncludePropertyKeysFromFiles(new String[] {"invalid location"});
// execute
mojo.execute();
// assert
assertTrue(mojo.outputFile.exists());
Properties storedProperties = getStoredProperties();
assertEquals(0, storedProperties.size());
}
@Test(expected=MojoExecutionException.class)
public void testExecuteIncludingPropertyKeysFromInvalidFile() throws Exception {
// given
mojo.setIncludePropertyKeysFromFiles(new String[] {"invalid location"});
// execute
mojo.execute();
}
@Test
public void testExecuteWithEnvironmentProperties() throws Exception {
// given
mojo.setIncludeEnvironmentVariables(true);
// execute
mojo.execute();
// assert
assertTrue(mojo.outputFile.exists());
Properties storedProperties = getStoredProperties();
assertTrue(storedProperties.size() > 0);
for (Object currentKey : storedProperties.keySet()) {
assertTrue(((String)currentKey).startsWith(PROPERTY_PREFIX_ENV));
}
}
@Test
public void testExecuteWithSystemProperties() throws Exception {
// given
String key = "systemPropertyKey";
String value = "systemPropertyValue";
System.setProperty(key, value);
mojo.setIncludeSystemProperties(true);
// execute
mojo.execute();
// assert
assertTrue(mojo.outputFile.exists());
Properties storedProperties = getStoredProperties();
assertTrue(storedProperties.size() > 0);
assertTrue(storedProperties.containsKey(key));
assertEquals(value, storedProperties.getProperty(key));
}
@Test
public void testExecuteWithSystemPropertiesAndEscapeChars() throws Exception {
// given
String key = "systemPropertyKey ";
String value = "systemPropertyValue";
System.setProperty(key, value);
mojo.setIncludeSystemProperties(true);
String escapeChars = "cr,lf,tab,|";
mojo.setEscapeChars(escapeChars);
// execute
mojo.execute();
// assert
assertTrue(mojo.outputFile.exists());
Properties storedProperties = getStoredProperties();
assertTrue(storedProperties.size() > 0);
assertFalse(storedProperties.containsKey(key));
assertTrue(storedProperties.containsKey(key.trim()));
assertEquals(value, storedProperties.getProperty(key.trim()));
}
// ----------------------------------- PRIVATE -------------------------------------------
private File getPropertiesFileLocation() {
return new File(testFolder.getRoot(), "test.properties");
}
private Properties getStoredProperties() throws FileNotFoundException, IOException {
Properties properties = new Properties();
properties.load(new FileInputStream(getPropertiesFileLocation()));
return properties;
}
}

View File

@ -0,0 +1,281 @@
<?xml version="1.0" encoding="UTF-8"?>
<plugin>
<name>dhp-build-properties-maven-plugin</name>
<description></description>
<groupId>eu.dnetlib.dhp</groupId>
<artifactId>dhp-build-properties-maven-plugin</artifactId>
<version>1.0.0-SNAPSHOT</version>
<goalPrefix>dhp-build-properties</goalPrefix>
<isolatedRealm>false</isolatedRealm>
<inheritedByDefault>true</inheritedByDefault>
<mojos>
<mojo>
<goal>generate-properties</goal>
<description>Generates oozie properties which were not provided from commandline.</description>
<requiresDirectInvocation>false</requiresDirectInvocation>
<requiresProject>true</requiresProject>
<requiresReports>false</requiresReports>
<aggregator>false</aggregator>
<requiresOnline>false</requiresOnline>
<inheritedByDefault>true</inheritedByDefault>
<implementation>eu.dnetlib.maven.plugin.properties.GenerateOoziePropertiesMojo</implementation>
<language>java</language>
<instantiationStrategy>per-lookup</instantiationStrategy>
<executionStrategy>once-per-session</executionStrategy>
<threadSafe>false</threadSafe>
<parameters/>
</mojo>
<mojo>
<goal>write-project-properties</goal>
<description>Writes project properties for the keys listed in specified properties files.
Based on:
http://site.kuali.org/maven/plugins/properties-maven-plugin/1.3.2/write-project-properties-mojo.html</description>
<requiresDirectInvocation>false</requiresDirectInvocation>
<requiresProject>true</requiresProject>
<requiresReports>false</requiresReports>
<aggregator>false</aggregator>
<requiresOnline>false</requiresOnline>
<inheritedByDefault>true</inheritedByDefault>
<implementation>eu.dnetlib.maven.plugin.properties.WritePredefinedProjectProperties</implementation>
<language>java</language>
<instantiationStrategy>per-lookup</instantiationStrategy>
<executionStrategy>once-per-session</executionStrategy>
<threadSafe>false</threadSafe>
<parameters>
<parameter>
<name>properties.escapeChars</name>
<type>java.lang.String</type>
<required>false</required>
<editable>true</editable>
<description>Comma separated list of characters to escape when writing property values. cr=carriage return, lf=linefeed,
tab=tab. Any other values are taken literally.</description>
</parameter>
<parameter>
<name>properties.exclude</name>
<type>java.lang.String</type>
<required>false</required>
<editable>true</editable>
<description>Comma separated set of properties to exclude when writing the properties file</description>
</parameter>
<parameter>
<name>properties.include</name>
<type>java.lang.String</type>
<required>false</required>
<editable>true</editable>
<description>Comma separated set of properties to write to the properties file. If provided, only the properties matching
those supplied here will be written to the properties file.</description>
</parameter>
<parameter>
<name>properties.includeEnvironmentVariables</name>
<type>boolean</type>
<required>false</required>
<editable>true</editable>
<description>If true, the plugin will include environment variables when writing the properties file. Environment variables
are prefixed with &quot;env&quot;. Environment variables override project properties.</description>
</parameter>
<parameter>
<name>properties.includePropertyKeysFromFiles</name>
<type>java.lang.String[]</type>
<required>false</required>
<editable>true</editable>
<description></description>
</parameter>
<parameter>
<name>properties.includeSystemProperties</name>
<type>boolean</type>
<required>false</required>
<editable>true</editable>
<description>If true, the plugin will include system properties when writing the properties file. System properties override
both environment variables and project properties.</description>
</parameter>
<parameter>
<name>properties.outputFile</name>
<type>java.io.File</type>
<required>true</required>
<editable>true</editable>
<description>The file that properties will be written to</description>
</parameter>
<parameter>
<name>project</name>
<type>org.apache.maven.project.MavenProject</type>
<required>true</required>
<editable>false</editable>
<description></description>
</parameter>
<parameter>
<name>properties.quiet</name>
<type>boolean</type>
<required>false</required>
<editable>true</editable>
<description>If true, the plugin will silently ignore any non-existent properties files, and the build will continue</description>
</parameter>
</parameters>
<configuration>
<properties.escapeChars implementation="java.lang.String" default-value="cr,lf,tab"/>
<properties.includeEnvironmentVariables implementation="boolean" default-value="false"/>
<properties.includeSystemProperties implementation="boolean" default-value="false"/>
<properties.outputFile implementation="java.io.File" default-value="${project.build.directory}/properties/project.properties"/>
<project implementation="org.apache.maven.project.MavenProject" default-value="${project}"/>
<properties.quiet implementation="boolean" default-value="true"/>
</configuration>
</mojo>
</mojos>
<dependencies>
<dependency>
<groupId>org.apache.maven</groupId>
<artifactId>maven-plugin-api</artifactId>
<type>jar</type>
<version>2.0</version>
</dependency>
<dependency>
<groupId>org.apache.maven</groupId>
<artifactId>maven-project</artifactId>
<type>jar</type>
<version>2.0</version>
</dependency>
<dependency>
<groupId>org.apache.maven</groupId>
<artifactId>maven-profile</artifactId>
<type>jar</type>
<version>2.0</version>
</dependency>
<dependency>
<groupId>org.apache.maven</groupId>
<artifactId>maven-model</artifactId>
<type>jar</type>
<version>2.0</version>
</dependency>
<dependency>
<groupId>org.apache.maven</groupId>
<artifactId>maven-artifact-manager</artifactId>
<type>jar</type>
<version>2.0</version>
</dependency>
<dependency>
<groupId>org.apache.maven</groupId>
<artifactId>maven-repository-metadata</artifactId>
<type>jar</type>
<version>2.0</version>
</dependency>
<dependency>
<groupId>org.apache.maven.wagon</groupId>
<artifactId>wagon-provider-api</artifactId>
<type>jar</type>
<version>1.0-alpha-5</version>
</dependency>
<dependency>
<groupId>org.codehaus.plexus</groupId>
<artifactId>plexus-utils</artifactId>
<type>jar</type>
<version>1.0.4</version>
</dependency>
<dependency>
<groupId>org.apache.maven</groupId>
<artifactId>maven-artifact</artifactId>
<type>jar</type>
<version>2.0</version>
</dependency>
<dependency>
<groupId>org.codehaus.plexus</groupId>
<artifactId>plexus-container-default</artifactId>
<type>jar</type>
<version>1.0-alpha-8</version>
</dependency>
<dependency>
<groupId>classworlds</groupId>
<artifactId>classworlds</artifactId>
<type>jar</type>
<version>1.1-alpha-2</version>
</dependency>
<dependency>
<groupId>org.kuali.maven.plugins</groupId>
<artifactId>properties-maven-plugin</artifactId>
<type>jar</type>
<version>1.3.2</version>
</dependency>
<dependency>
<groupId>org.springframework</groupId>
<artifactId>spring-core</artifactId>
<type>jar</type>
<version>3.1.1.RELEASE</version>
</dependency>
<dependency>
<groupId>org.springframework</groupId>
<artifactId>spring-asm</artifactId>
<type>jar</type>
<version>3.1.1.RELEASE</version>
</dependency>
<dependency>
<groupId>org.jasypt</groupId>
<artifactId>jasypt</artifactId>
<type>jar</type>
<version>1.9.0</version>
</dependency>
<dependency>
<groupId>org.kuali.maven.common</groupId>
<artifactId>maven-kuali-common</artifactId>
<type>jar</type>
<version>1.2.8</version>
</dependency>
<dependency>
<groupId>org.apache.ant</groupId>
<artifactId>ant</artifactId>
<type>jar</type>
<version>1.8.2</version>
</dependency>
<dependency>
<groupId>org.apache.ant</groupId>
<artifactId>ant-launcher</artifactId>
<type>jar</type>
<version>1.8.2</version>
</dependency>
<dependency>
<groupId>org.codehaus.plexus</groupId>
<artifactId>plexus-interpolation</artifactId>
<type>jar</type>
<version>1.15</version>
</dependency>
<dependency>
<groupId>commons-lang</groupId>
<artifactId>commons-lang</artifactId>
<type>jar</type>
<version>2.6</version>
</dependency>
<dependency>
<groupId>commons-io</groupId>
<artifactId>commons-io</artifactId>
<type>jar</type>
<version>2.5</version>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>jcl-over-slf4j</artifactId>
<type>jar</type>
<version>1.6.4</version>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
<type>jar</type>
<version>1.7.22</version>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-log4j12</artifactId>
<type>jar</type>
<version>1.7.22</version>
</dependency>
<dependency>
<groupId>log4j</groupId>
<artifactId>log4j</artifactId>
<type>jar</type>
<version>1.2.17</version>
</dependency>
<dependency>
<groupId>javax.servlet</groupId>
<artifactId>javax.servlet-api</artifactId>
<type>jar</type>
<version>3.1.0</version>
</dependency>
</dependencies>
</plugin>

View File

@ -0,0 +1,2 @@
eu/dnetlib/maven/plugin/properties/WritePredefinedProjectProperties.class
eu/dnetlib/maven/plugin/properties/GenerateOoziePropertiesMojo.class

View File

@ -0,0 +1,2 @@
/Users/claudio/workspace/dnet-hadoop/dhp-build/dhp-build-properties-maven-plugin/src/main/java/eu/dnetlib/maven/plugin/properties/GenerateOoziePropertiesMojo.java
/Users/claudio/workspace/dnet-hadoop/dhp-build/dhp-build-properties-maven-plugin/src/main/java/eu/dnetlib/maven/plugin/properties/WritePredefinedProjectProperties.java

16
dhp-build/pom.xml Normal file
View File

@ -0,0 +1,16 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>eu.dnetlib.dhp</groupId>
<artifactId>dhp</artifactId>
<version>1.0.0-SNAPSHOT</version>
</parent>
<artifactId>dhp-build</artifactId>
<packaging>pom</packaging>
<modules>
<module>dhp-build-assembly-resources</module>
<module>dhp-build-properties-maven-plugin</module>
</modules>
</project>

177
dhp-common/pom.xml Normal file
View File

@ -0,0 +1,177 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>eu.dnetlib.dhp</groupId>
<artifactId>dhp</artifactId>
<version>1.0.0-SNAPSHOT</version>
</parent>
<artifactId>dhp-common</artifactId>
<packaging>jar</packaging>
<dependencies>
<dependency>
<groupId>${project.groupId}</groupId>
<artifactId>dhp-schemas</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>org.apache.oozie</groupId>
<artifactId>oozie-core</artifactId>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-mapreduce-client-core</artifactId>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_2.10</artifactId>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-sql_2.10</artifactId>
</dependency>
<dependency>
<groupId>org.apache.avro</groupId>
<artifactId>avro</artifactId>
</dependency>
<dependency>
<groupId>org.apache.avro</groupId>
<artifactId>avro-mapred</artifactId>
<classifier>hadoop2</classifier>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-lang3</artifactId>
</dependency>
<dependency>
<!-- required by caching mechanism for setting chmod -->
<groupId>org.springframework</groupId>
<artifactId>spring-beans</artifactId>
</dependency>
<dependency>
<groupId>com.beust</groupId>
<artifactId>jcommander</artifactId>
</dependency>
<dependency>
<groupId>org.apache.pig</groupId>
<artifactId>pig</artifactId>
</dependency>
<dependency>
<groupId>com.linkedin.datafu</groupId>
<artifactId>datafu</artifactId>
</dependency>
<dependency>
<groupId>commons-beanutils</groupId>
<artifactId>commons-beanutils</artifactId>
</dependency>
<dependency>
<groupId>commons-io</groupId>
<artifactId>commons-io</artifactId>
</dependency>
<dependency>
<groupId>org.jdom</groupId>
<artifactId>jdom</artifactId>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>net.alchim31.maven</groupId>
<artifactId>scala-maven-plugin</artifactId>
</plugin>
<!-- Plugin that generates Java classes from Avro schemas -->
<plugin>
<groupId>org.apache.avro</groupId>
<artifactId>avro-maven-plugin</artifactId>
<executions>
<execution>
<phase>generate-test-sources</phase>
<goals>
<goal>schema</goal>
<goal>idl-protocol</goal>
</goals>
<configuration>
<stringType>String</stringType>
</configuration>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.codehaus.mojo</groupId>
<artifactId>build-helper-maven-plugin</artifactId>
<executions>
<execution>
<id>add-test-sources</id>
<phase>generate-test-sources</phase>
<goals>
<goal>add-test-source</goal>
</goals>
<configuration>
<sources>
<source>${project.build.directory}/generated-test-sources/avro/</source>
</sources>
</configuration>
</execution>
</executions>
</plugin>
<!-- Plugin that generates jar with test classes -->
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-jar-plugin</artifactId>
<executions>
<execution>
<goals>
<goal>test-jar</goal>
</goals>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-surefire-plugin</artifactId>
<configuration>
<excludedGroups>eu.dnetlib.iis.common.IntegrationTest</excludedGroups>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-failsafe-plugin</artifactId>
</plugin>
</plugins>
</build>
</project>

View File

@ -0,0 +1,106 @@
package eu.dnetlib.dhp.common;
import java.lang.reflect.Method;
import java.util.Arrays;
import java.util.LinkedList;
import org.apache.hadoop.conf.Configurable;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FsShell;
import org.springframework.beans.BeanUtils;
import org.springframework.util.ClassUtils;
import org.springframework.util.ReflectionUtils;
/**
* Extracted from:
* https://github.com/spring-projects/spring-hadoop/blob/master/spring-hadoop-core/src/main/java/org/springframework/data/hadoop/fs/FsShellPermissions.java
*
* Utility class for accessing Hadoop FsShellPermissions (which is not public)
* without having to duplicate its code.
* @author Costin Leau
*
*/
public class FsShellPermissions {
private static boolean IS_HADOOP_20X = ClassUtils.isPresent("org.apache.hadoop.fs.FsShellPermissions$Chmod",
FsShellPermissions.class.getClassLoader());
public enum Op {
CHOWN("-chown"), CHMOD("-chmod"), CHGRP("-chgrp");
private final String cmd;
Op(String cmd) {
this.cmd = cmd;
}
public String getCmd() {
return cmd;
}
}
// TODO: move this into Spring Core (but add JDK 1.5 compatibility first)
@SafeVarargs
static <T> T[] concatAll(T[] first, T[]... rest) {
// can add some sanity checks
int totalLength = first.length;
for (T[] array : rest) {
totalLength += array.length;
}
T[] result = Arrays.copyOf(first, totalLength);
int offset = first.length;
for (T[] array : rest) {
System.arraycopy(array, 0, result, offset, array.length);
offset += array.length;
}
return result;
}
public static void changePermissions(FileSystem fs, Configuration config,
Op op, boolean recursive, String group, String uri) {
changePermissions(fs, config, op, recursive, group, new String[] {uri});
}
public static void changePermissions(FileSystem fs, Configuration config,
Op op, boolean recursive, String group, String... uris) {
String[] argvs;
if (recursive) {
argvs = new String[1];
argvs[0] = "-R";
} else {
argvs = new String[0];
}
argvs = concatAll(argvs, new String[] { group }, uris);
// Hadoop 1.0.x
if (!IS_HADOOP_20X) {
Class<?> cls = ClassUtils.resolveClassName("org.apache.hadoop.fs.FsShellPermissions", config.getClass().getClassLoader());
Object[] args = new Object[] { fs, op.getCmd(), argvs, 0, new FsShell(config) };
Method m = ReflectionUtils.findMethod(cls, "changePermissions", FileSystem.class, String.class, String[].class, int.class, FsShell.class);
ReflectionUtils.makeAccessible(m);
ReflectionUtils.invokeMethod(m, null, args);
}
// Hadoop 2.x
else {
Class<?> cmd = ClassUtils.resolveClassName("org.apache.hadoop.fs.shell.Command", config.getClass().getClassLoader());
Class<?> targetClz = ClassUtils.resolveClassName("org.apache.hadoop.fs.FsShellPermissions$Chmod", config.getClass().getClassLoader());
Configurable target = (Configurable) BeanUtils.instantiate(targetClz);
target.setConf(config);
// run(String...) swallows the exceptions - re-implement it here
//
LinkedList<String> args = new LinkedList<String>(Arrays.asList(argvs));
try {
Method m = ReflectionUtils.findMethod(cmd, "processOptions", LinkedList.class);
ReflectionUtils.makeAccessible(m);
ReflectionUtils.invokeMethod(m, target, args);
m = ReflectionUtils.findMethod(cmd, "processRawArguments", LinkedList.class);
ReflectionUtils.makeAccessible(m);
ReflectionUtils.invokeMethod(m, target, args);
} catch (IllegalStateException ex){
throw new RuntimeException("Cannot change permissions/ownership " + ex);
}
}
}
}

View File

@ -0,0 +1,75 @@
package eu.dnetlib.dhp.common;
import java.io.UnsupportedEncodingException;
/**
* InfoSpaceConstants constants.
*
* @author mhorst
*
*/
public final class InfoSpaceConstants {
public static final float CONFIDENCE_TO_TRUST_LEVEL_FACTOR = 0.9f;
public static final String ENCODING_UTF8 = "utf-8";
public static final char ROW_PREFIX_SEPARATOR = '|';
public static final String ID_NAMESPACE_SEPARATOR = "::";
public static final String CLASSIFICATION_HIERARCHY_SEPARATOR = ID_NAMESPACE_SEPARATOR;
public static final String INFERENCE_PROVENANCE_SEPARATOR = ID_NAMESPACE_SEPARATOR;
public static final String ROW_PREFIX_RESULT = "50|";
public static final String ROW_PREFIX_PROJECT = "40|";
public static final String ROW_PREFIX_PERSON = "30|";
public static final String ROW_PREFIX_ORGANIZATION = "20|";
public static final String ROW_PREFIX_DATASOURCE = "10|";
public static final String QUALIFIER_BODY_STRING = "body";
public static final byte[] QUALIFIER_BODY;
public static final String SEMANTIC_CLASS_MAIN_TITLE = "main title";
public static final String SEMANTIC_CLASS_PUBLICATION = "publication";
public static final String SEMANTIC_CLASS_UNKNOWN = "UNKNOWN";
public static final String SEMANTIC_SCHEME_DNET_PERSON_ROLES = "dnet:personroles";
public static final String SEMANTIC_SCHEME_DNET_RELATIONS_RESULT_RESULT = "dnet:result_result_relations";
public static final String SEMANTIC_SCHEME_DNET_RELATIONS_RESULT_PROJECT = "dnet:result_project_relations";
public static final String SEMANTIC_SCHEME_DNET_TITLE = "dnet:dataCite_title";
public static final String SEMANTIC_SCHEME_DNET_TITLE_TYPOLOGIES = "dnet:title_typologies";
public static final String SEMANTIC_SCHEME_DNET_RESULT_TYPOLOGIES = "dnet:result_typologies";
public static final String SEMANTIC_SCHEME_DNET_PROVENANCE_ACTIONS = "dnet:provenanceActions";
public static final String SEMANTIC_SCHEME_DNET_LANGUAGES = "dnet:languages";
public static final String SEMANTIC_SCHEME_DNET_PID_TYPES = "dnet:pid_types";
public static final String SEMANTIC_SCHEME_DNET_CLASSIFICATION_TAXONOMIES = "dnet:subject_classification_typologies";
// resultResult citation and similarity related
public static final String SEMANTIC_SCHEME_DNET_DATASET_PUBLICATION_RELS = "dnet:dataset_publication_rels";
public static final String SEMANTIC_CLASS_TAXONOMIES_ARXIV = "arxiv";
public static final String SEMANTIC_CLASS_TAXONOMIES_WOS = "wos";
public static final String SEMANTIC_CLASS_TAXONOMIES_DDC = "ddc";
public static final String SEMANTIC_CLASS_TAXONOMIES_MESHEUROPMC = "mesheuropmc";
public static final String SEMANTIC_CLASS_TAXONOMIES_ACM = "acm";
public static final String EXTERNAL_ID_TYPE_INSTANCE_URL = "dnet:instance-url";
public static final String EXTERNAL_ID_TYPE_UNKNOWN = "unknown";
// publication types class ids
public static final String SEMANTIC_CLASS_INSTANCE_TYPE_ARTICLE = "0001";
public static final String SEMANTIC_CLASS_INSTANCE_TYPE_DATASET = "0021";
static {
try {
QUALIFIER_BODY = QUALIFIER_BODY_STRING.getBytes(ENCODING_UTF8);
} catch (UnsupportedEncodingException e) {
throw new RuntimeException(e);
}
}
private InfoSpaceConstants() {
}
}

View File

@ -0,0 +1,74 @@
package eu.dnetlib.dhp.common;
import java.util.Map;
import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.conf.Configuration;
/**
* Utility class holding parameter names and method simplifying access to parameters from hadoop context.
* @author mhorst
*
*/
public final class WorkflowRuntimeParameters {
public static final String OOZIE_ACTION_OUTPUT_FILENAME = "oozie.action.output.properties";
public static final char DEFAULT_CSV_DELIMITER = ',';
public static final String UNDEFINED_NONEMPTY_VALUE = "$UNDEFINED$";
// default values
public static final String DNET_SERVICE_READ_TIMEOUT_DEFAULT_VALUE = "60000";
public static final String DNET_SERVICE_CONNECTION_TIMEOUT_DEFAULT_VALUE = "60000";
// parameter names
public static final String DNET_SERVICE_CLIENT_READ_TIMEOUT = "dnet.service.client.read.timeout";
public static final String DNET_SERVICE_CLIENT_CONNECTION_TIMEOUT = "dnet.service.client.connection.timeout";
// ----------------- CONSTRUCTORS -----------------------------
private WorkflowRuntimeParameters() {}
/**
* Retrieves parameter from hadoop context configuration when set to value different than {@link WorkflowRuntimeParameters#UNDEFINED_NONEMPTY_VALUE}.
*/
public static String getParamValue(String paramName, Configuration configuration) {
String paramValue = configuration.get(paramName);
if (StringUtils.isNotBlank(paramValue) && !UNDEFINED_NONEMPTY_VALUE.equals(paramValue)) {
return paramValue;
} else {
return null;
}
}
/**
* Retrieves {@link Integer} parameter from hadoop context configuration when set to non-empty value different than {@link WorkflowRuntimeParameters#UNDEFINED_NONEMPTY_VALUE}.
* Null is returned when parameter was not set.
* @throws {@link NumberFormatException} if parameter value does not contain a parsable integer
*/
public static Integer getIntegerParamValue(String paramName, Configuration configuration) throws NumberFormatException {
String paramValue = getParamValue(paramName, configuration);
return paramValue!=null?Integer.valueOf(paramValue):null;
}
/**
* Retrieves parameter from hadoop context configuration when set to value different than {@link WorkflowRuntimeParameters#UNDEFINED_NONEMPTY_VALUE}.
* If requested parameter was not set, fallback parameter is retrieved using the same logic.
*/
public static String getParamValue(String paramName, String fallbackParamName, Configuration configuration) {
String resultCandidate = getParamValue(paramName, configuration);
return resultCandidate!=null?resultCandidate:getParamValue(fallbackParamName, configuration);
}
/**
* Provides parameter value. Returns default value when entry not found among parameters.
*
* @param paramName parameter name
* @param defaultValue parameter default value to be returned when entry not found among parameters
* @param parameters map of parameters
*/
public static String getParamValue(String paramName, String defaultValue, Map<String, String> parameters) {
return parameters.containsKey(paramName)?parameters.get(paramName):defaultValue;
}
}

View File

@ -0,0 +1,111 @@
package eu.dnetlib.dhp.common.counter;
import java.io.Serializable;
import java.util.Collection;
import java.util.Map;
import com.google.common.base.Preconditions;
import com.google.common.collect.Maps;
/**
* Class that groups several counters which are identified by name (<code>String</code> value).
*
* @author madryk
*/
public class NamedCounters implements Serializable {
private static final long serialVersionUID = 1L;
private final Map<String, Long> counters;
//------------------------ CONSTRUCTORS --------------------------
/**
* Creates {@link NamedCounters} with empty initial counters.
*/
public NamedCounters() {
this.counters = Maps.newHashMap();
}
/**
* Creates {@link NamedCounters} with initial counters.<br/>
* Starting value of initial counters is zero.
*
* @param initialCounterNames - names of initial counters
*/
public NamedCounters(String[] initialCounterNames) {
Preconditions.checkNotNull(initialCounterNames);
this.counters = Maps.newHashMap();
for (String initialCounterName : initialCounterNames) {
this.counters.put(initialCounterName, 0L);
}
}
/**
* Creates {@link NamedCounters} with initial counters.<br/>
* Starting value of initial counters is zero.
*
* @param initialCounterNamesEnumClass - enum class providing names of initial counters
*/
public <E extends Enum<E>> NamedCounters(Class<E> initialCounterNamesEnumClass) {
Preconditions.checkNotNull(initialCounterNamesEnumClass);
this.counters = Maps.newHashMap();
Enum<?>[] enumConstants = initialCounterNamesEnumClass.getEnumConstants();
for (int i=0; i<enumConstants.length; ++i) {
this.counters.put(enumConstants[i].name(), 0L);
}
}
//------------------------ LOGIC --------------------------
/**
* Increments value by 1 of a counter with the name specified as parameter.<br/>
* Internally uses {@link #increment(String, Long)}
*/
public void increment(String counterName) {
increment(counterName, 1L);
}
/**
* Increments value of a counter with the name specified as parameter by the given value.<br/>
* If current instance of {@link NamedCounters} does not contain counter
* with provided name, then before incrementing counter will be created with starting
* value equal to zero.
*/
public void increment(String counterName, Long incrementValue) {
long oldValue = counters.getOrDefault(counterName, 0L);
counters.put(counterName, oldValue + incrementValue);
}
/**
* Returns current value of a counter with the name specified as parameter.
*
* @throws IllegalArgumentException when {@link NamedCounters} does not contain counter
* with provided name
*/
public long currentValue(String counterName) {
if (!counters.containsKey(counterName)) {
throw new IllegalArgumentException("Couldn't find counter with name: " + counterName);
}
return counters.get(counterName);
}
/**
* Returns names of currently tracked counters.
*/
public Collection<String> counterNames() {
return counters.keySet();
}
}

View File

@ -0,0 +1,48 @@
package eu.dnetlib.dhp.common.counter;
import org.apache.spark.AccumulableParam;
import scala.Tuple2;
/**
* Spark {@link AccumulableParam} for tracking multiple counter values using {@link NamedCounters}.
*
* @author madryk
*/
public class NamedCountersAccumulableParam implements AccumulableParam<NamedCounters, Tuple2<String,Long>> {
private static final long serialVersionUID = 1L;
//------------------------ LOGIC --------------------------
/**
* Increments {@link NamedCounters} counter with the name same as the first element of passed incrementValue tuple
* by value defined in the second element of incrementValue tuple.
*/
@Override
public NamedCounters addAccumulator(NamedCounters counters, Tuple2<String, Long> incrementValue) {
counters.increment(incrementValue._1, incrementValue._2);
return counters;
}
/**
* Merges two passed {@link NamedCounters}.
*/
@Override
public NamedCounters addInPlace(NamedCounters counters1, NamedCounters counters2) {
for (String counterName2 : counters2.counterNames()) {
counters1.increment(counterName2, counters2.currentValue(counterName2));
}
return counters1;
}
/**
* Returns passed initialCounters value without any modifications.
*/
@Override
public NamedCounters zero(NamedCounters initialCounters) {
return initialCounters;
}
}

View File

@ -0,0 +1,52 @@
package eu.dnetlib.dhp.common.counter;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.util.Properties;
/**
* Writer of {@link NamedCounters} object into a properties file.
*
* @author madryk
*/
public class NamedCountersFileWriter {
//------------------------ LOGIC --------------------------
/**
* Writes {@link NamedCounters} as a properties file located under
* provided filePath.
*
* @throws IOException if writing to properties file resulted in an error
*/
public void writeCounters(NamedCounters counters, String filePath) throws IOException {
Properties counterProperties = buildPropertiesFromCounters(counters);
File file = new File(filePath);
try (OutputStream os = new FileOutputStream(file)) {
counterProperties.store(os, null);
}
}
//------------------------ PRIVATE --------------------------
private Properties buildPropertiesFromCounters(NamedCounters counters) {
Properties properties = new Properties();
for (String counterName : counters.counterNames()) {
long count = counters.currentValue(counterName);
properties.put(counterName, String.valueOf(count));
}
return properties;
}
}

View File

@ -0,0 +1,67 @@
package eu.dnetlib.dhp.common.fault;
import java.io.PrintWriter;
import java.io.StringWriter;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import eu.dnetlib.dhp.audit.schemas.Cause;
import eu.dnetlib.dhp.audit.schemas.Fault;
/**
* {@link Fault} related utilities.
* @author mhorst
*
*/
public final class FaultUtils {
// ---------------------- CONSTRUCTORS -------------------
private FaultUtils() {}
// ---------------------- LOGIC --------------------------
/**
* Generates {@link Fault} instance based on {@link Throwable}.
* @param entityId entity identifier
* @param throwable
* @param auditSupplementaryData
* @return {@link Fault} instance generated for {@link Throwable}
*/
public static Fault exceptionToFault(CharSequence entityId, Throwable throwable,
Map<CharSequence, CharSequence> auditSupplementaryData) {
Fault.Builder faultBuilder = Fault.newBuilder();
faultBuilder.setInputObjectId(entityId);
faultBuilder.setTimestamp(System.currentTimeMillis());
faultBuilder.setCode(throwable.getClass().getName());
faultBuilder.setMessage(throwable.getMessage());
StringWriter strWriter = new StringWriter();
PrintWriter pw = new PrintWriter(strWriter);
throwable.printStackTrace(pw);
pw.close();
faultBuilder.setStackTrace(strWriter.toString());
if (throwable.getCause()!=null) {
faultBuilder.setCauses(appendThrowableToCauses(
throwable.getCause(), new ArrayList<Cause>()));
}
if (auditSupplementaryData!=null && !auditSupplementaryData.isEmpty()) {
faultBuilder.setSupplementaryData(auditSupplementaryData);
}
return faultBuilder.build();
}
protected static List<Cause> appendThrowableToCauses(Throwable e, List<Cause> causes) {
Cause.Builder causeBuilder = Cause.newBuilder();
causeBuilder.setCode(e.getClass().getName());
causeBuilder.setMessage(e.getMessage());
causes.add(causeBuilder.build());
if (e.getCause()!=null) {
return appendThrowableToCauses(
e.getCause(),causes);
} else {
return causes;
}
}
}

View File

@ -0,0 +1,98 @@
package eu.dnetlib.dhp.common.java;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.CommandLineParser;
import org.apache.commons.cli.GnuParser;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.Option;
import org.apache.commons.cli.OptionBuilder;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;
import org.apache.commons.lang3.StringUtils;
/**
*
* @author Mateusz Kobos
*
*/
@SuppressWarnings("deprecation")
public final class CmdLineParser {
/** HACK: make the names of various types of parameters of the program
* more readable, e.g. "--Input_person=..." instead of "-Iperson=...",
* "--Output_merged=..." instead of "-Omerged=...". I wasn't able to
* get such notation so far using the Apache CLI. */
public static final String constructorPrefix = "C";
public static final String inputPrefix = "I";
public static final String outputPrefix = "O";
public static final String specialParametersPrefix = "S";
/** HACK: This field should be removed since this list of special
* parameters is empty, thus not used anywhere.*/
public static final String[] mandatorySpecialParameters = new String[]{};
public static final String processParametersPrefix = "P";
// ------------------------- CONSTRUCTORS ------------------------------
private CmdLineParser() {}
// ------------------------- LOGIC -------------------------------------
public static CommandLine parse(String[] args) {
Options options = new Options();
@SuppressWarnings("static-access")
Option constructorParams = OptionBuilder.withArgName("STRING")
.hasArg()
.withDescription("Constructor parameter")
.withLongOpt("ConstructorParam")
.create(constructorPrefix);
options.addOption(constructorParams);
@SuppressWarnings("static-access")
Option inputs = OptionBuilder.withArgName("portName=URI")
.hasArgs(2)
.withValueSeparator()
.withDescription("Path binding for a given input port")
.withLongOpt("Input")
.create(inputPrefix);
options.addOption(inputs);
@SuppressWarnings("static-access")
Option outputs = OptionBuilder.withArgName("portName=URI")
.hasArgs(2)
.withValueSeparator()
.withDescription("Path binding for a given output port")
.create(outputPrefix);
options.addOption(outputs);
@SuppressWarnings("static-access")
Option specialParameter = OptionBuilder.withArgName("parameter_name=string")
.hasArgs(2)
.withValueSeparator()
.withDescription(String.format("Value of special parameter. "
+ "These are the mandatory parameters={%s}",
StringUtils.join(mandatorySpecialParameters, ",")))
.create(specialParametersPrefix);
options.addOption(specialParameter);
@SuppressWarnings("static-access")
Option otherParameter = OptionBuilder.withArgName("parameter_name=string")
.hasArgs(2)
.withValueSeparator()
.withDescription(
String.format("Value of some other parameter."))
.create(processParametersPrefix);
options.addOption(otherParameter);
Option help = new Option("help", "print this message");
options.addOption(help);
CommandLineParser parser = new GnuParser();
try {
CommandLine cmdLine = parser.parse(options, args);
if(cmdLine.hasOption("help")){
HelpFormatter formatter = new HelpFormatter();
formatter.printHelp("", options );
System.exit(1);
}
return cmdLine;
} catch (ParseException e) {
throw new CmdLineParserException("Parsing command line arguments failed", e);
}
}
}

View File

@ -0,0 +1,21 @@
package eu.dnetlib.dhp.common.java;
/**
* Command line parsing exception
* @author Mateusz Kobos
*
*/
public class CmdLineParserException extends RuntimeException {
/**
*
*/
private static final long serialVersionUID = 9219928547611876284L;
public CmdLineParserException(String message){
super(message);
}
public CmdLineParserException(String message, Throwable cause){
super(message, cause);
}
}

View File

@ -0,0 +1,45 @@
package eu.dnetlib.dhp.common.java;
import java.lang.reflect.Constructor;
import org.apache.commons.cli.CommandLine;
/**
* Handles parsing the command line arguments provided by the Oozie
* to create a {@link Process}
* @author Mateusz Kobos
*
*/
public class CmdLineParserForProcessConstruction {
public Process run(CommandLine cmdLine){
String[] args = cmdLine.getArgs();
if(args.length != 1){
throw new CmdLineParserException("The name of the class has "+
"to be specified as the first agrument");
}
String className = args[0];
String[] constructorParams = cmdLine.getOptionValues(
CmdLineParser.constructorPrefix);
if(constructorParams == null){
constructorParams = new String[0];
}
try {
Class<?> processClass = Class.forName(className);
Constructor<?> processConstructor = null;
if(constructorParams.length == 0){
try{
processConstructor = processClass.getConstructor();
return (Process) processConstructor.newInstance();
} catch(NoSuchMethodException ex){
}
}
processConstructor = processClass.getConstructor(String[].class);
return (Process) processConstructor.newInstance(
(Object)constructorParams);
} catch (Exception e) {
throw new CmdLineParserException(String.format(
"Problem while creating class \"%s\"", className), e);
}
}
}

View File

@ -0,0 +1,100 @@
package eu.dnetlib.dhp.common.java;
import java.util.HashMap;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Properties;
import java.util.Set;
import org.apache.commons.cli.CommandLine;
import org.apache.hadoop.fs.Path;
/**
* Handles parsing parameters passed to the {@link Process}
* @author Mateusz Kobos
*
*/
public class CmdLineParserForProcessRunParameters {
/** Parse the command line arguments.
*
* @param cmdLine command line arguments
* @param ports names of ports that ought to be extracted from command line
*/
public ProcessParameters run(CommandLine cmdLine, Ports ports) {
Properties inputProperties = cmdLine.getOptionProperties(
CmdLineParser.inputPrefix);
assumePortNamesMatch(CmdLineParser.inputPrefix, inputProperties,
ports.getInput().keySet());
Map<String, Path> inputBindings = getBindings(
inputProperties, ports.getInput().keySet());
Properties outputProperties = cmdLine.getOptionProperties(
CmdLineParser.outputPrefix);
assumePortNamesMatch(CmdLineParser.outputPrefix, outputProperties,
ports.getOutput().keySet());
Map<String, Path> outputBindings = getBindings(
outputProperties, ports.getOutput().keySet());
PortBindings bindings = new PortBindings(inputBindings, outputBindings);
Properties specialProperties = cmdLine.getOptionProperties(
CmdLineParser.specialParametersPrefix);
assumeContainAllMandatoryParameters(
specialProperties, CmdLineParser.mandatorySpecialParameters);
Properties rawProperties = cmdLine.getOptionProperties(
CmdLineParser.processParametersPrefix);
Map<String, String> processParameters = new HashMap<String, String>();
for(Entry<Object, Object> entry: rawProperties.entrySet()){
processParameters.put(
(String)entry.getKey(), (String)entry.getValue());
}
return new ProcessParameters(bindings, processParameters);
}
private static void assumeContainAllMandatoryParameters(
Properties properties, String[] mandatoryParameters){
for(String otherParameter: mandatoryParameters){
if(!properties.containsKey(otherParameter)){
throw new CmdLineParserException(String.format(
"Not all mandatory properties are set using the \"%s\" "
+ "option are given, e.g. \"-%s\" parameter is missing",
CmdLineParser.specialParametersPrefix, otherParameter));
}
}
}
private static void assumePortNamesMatch(String cmdLineParamPrefix,
Properties cmdLineProperties, Set<String> portNames) {
for (String name : portNames) {
if (!cmdLineProperties.containsKey(name)) {
throw new CmdLineParserException(String.format(
"The port with name \"%s\" is not specified in "
+ "command line (command line option \"-%s\" is missing)",
name, cmdLineParamPrefix + name));
}
}
for (Object cmdLineKeyObject : cmdLineProperties.keySet()) {
String name = (String) cmdLineKeyObject;
if (!portNames.contains(name)) {
throw new CmdLineParserException(String.format(
"A port name \"%s\" which is not specified is given "
+ "in the command line "
+ "(command line option \"%s\" is excess)",
name, cmdLineParamPrefix + name));
}
}
}
private static Map<String, Path> getBindings(
Properties cmdLineProperties, Set<String> portNames) {
Map<String, Path> bindings = new HashMap<String, Path>();
for (String name : portNames) {
Path path = new Path((String) cmdLineProperties.get(name));
bindings.put(name, path);
}
return bindings;
}
}

View File

@ -0,0 +1,43 @@
package eu.dnetlib.dhp.common.java;
import java.util.Map;
import org.apache.commons.lang.NotImplementedException;
import org.apache.hadoop.fs.Path;
/**
* Port names (see {@link Ports}) bound to certain paths in the file system
* @author Mateusz Kobos
*
*/
public class PortBindings {
private final Map<String, Path> input;
private final Map<String, Path> output;
public PortBindings(Map<String, Path> input, Map<String, Path> output) {
this.input = input;
this.output = output;
}
public Map<String, Path> getInput() {
return input;
}
public Map<String, Path> getOutput() {
return output;
}
@Override
public boolean equals(Object o){
if(!(o instanceof PortBindings)){
return false;
}
PortBindings other = (PortBindings) o;
return input.equals(other.input) && output.equals(other.output);
}
@Override
public int hashCode(){
throw new NotImplementedException();
}
}

View File

@ -0,0 +1,27 @@
package eu.dnetlib.dhp.common.java;
import java.util.Map;
import eu.dnetlib.dhp.common.java.porttype.PortType;
/**
* A class that groups information about input and output ports, i.e.
* their (name of the port -> type of the port) mappings.
* @author Mateusz Kobos
*/
public class Ports {
private final Map<String, PortType> input;
private final Map<String, PortType> output;
public Ports(Map<String, PortType> input, Map<String, PortType> output){
this.input = input;
this.output = output;
}
public Map<String, PortType> getInput() {
return input;
}
public Map<String, PortType> getOutput() {
return output;
}
}

View File

@ -0,0 +1,43 @@
package eu.dnetlib.dhp.common.java;
import java.util.Map;
import org.apache.hadoop.conf.Configuration;
import eu.dnetlib.dhp.common.java.porttype.PortType;
/** Workflow node written in Java.
*
* The implementing class has to define a constructor with no parameters
* (possibly the default one) or a constructor with String[] as a single
* parameter.
* @author Mateusz Kobos
*/
public interface Process {
/**
* Run the process.
*
* The process ends with a success status if no exception is thrown,
* otherwise it ends with an error status.
*
* @param parameters parameters of the process. Each parameter
* corresponds to a single entry in the map, its name is the key, its
* value is the value.
* @throws Exception if thrown, it means that the process finished
* with an error status
*/
void run(PortBindings portBindings, Configuration conf,
Map<String, String> parameters) throws Exception;
/**
* @return map containing as the key: name of the port, as the value: type
* of the port
*/
Map<String, PortType> getInputPorts();
/**
* @return map containing as the key: name of the port, as the value: type
* of the port
*/
Map<String, PortType> getOutputPorts();
}

View File

@ -0,0 +1,20 @@
package eu.dnetlib.dhp.common.java;
/**
* Process exception
* @author Dominika Tkaczyk
*
*/
public class ProcessException extends RuntimeException {
private static final long serialVersionUID = 2758953138374438377L;
public ProcessException(String message){
super(message);
}
public ProcessException(String message, Throwable cause){
super(message, cause);
}
}

View File

@ -0,0 +1,43 @@
package eu.dnetlib.dhp.common.java;
import java.util.Map;
import org.apache.commons.lang.NotImplementedException;
/**
* Parameters of the Process retrieved from Oozie
* @author Mateusz Kobos
*
*/
public class ProcessParameters {
private final PortBindings portBindings;
private final Map<String, String> parameters;
public PortBindings getPortBindings() {
return portBindings;
}
public Map<String, String> getParameters(){
return parameters;
}
public ProcessParameters(PortBindings portBindings,
Map<String, String> parameters) {
this.portBindings = portBindings;
this.parameters = parameters;
}
@Override
public boolean equals(Object o){
if(!(o instanceof ProcessParameters)){
return false;
}
ProcessParameters other = (ProcessParameters) o;
return this.portBindings.equals(other.portBindings);
}
@Override
public int hashCode(){
throw new NotImplementedException();
}
}

View File

@ -0,0 +1,43 @@
package eu.dnetlib.dhp.common.java;
import java.util.Map;
import org.apache.hadoop.conf.Configuration;
/**
* {@link Process} related utility class.
* @author mhorst
*
*/
public final class ProcessUtils {
// ------------- CONSTRUCTORS ----------------
private ProcessUtils() {}
// ------------- LOGIC -----------------------
/**
* Returns parameter value retrived from parameters or context.
* @param paramName
* @param hadoopConf
* @param parameters
* @return parameter value
*/
public static String getParameterValue(String paramName,
Configuration hadoopConf,
Map<String, String> parameters) {
if (parameters!=null && !parameters.isEmpty()) {
String result = null;
result = parameters.get(paramName);
if (result!=null) {
return result;
}
}
if (hadoopConf!=null) {
return hadoopConf.get(paramName);
} else {
return null;
}
}
}

View File

@ -0,0 +1,88 @@
package eu.dnetlib.dhp.common.java;
import java.io.IOException;
import java.util.Map;
import org.apache.avro.file.DataFileWriter;
import org.apache.avro.generic.GenericContainer;
import org.apache.commons.cli.CommandLine;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.LocatedFileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.RemoteIterator;
import eu.dnetlib.dhp.common.java.io.DataStore;
import eu.dnetlib.dhp.common.java.io.FileSystemPath;
import eu.dnetlib.dhp.common.java.porttype.AvroPortType;
import eu.dnetlib.dhp.common.java.porttype.PortType;
/**
* Creates {@link Process} object through reflection by parsing
* the command-line arguments
* @author Mateusz Kobos
*
*/
public class ProcessWrapper {
public Configuration getConfiguration() throws Exception{
return new Configuration();
}
public static void main(String[] args) throws Exception {
ProcessWrapper wrapper = new ProcessWrapper();
wrapper.run(args);
}
public void run(String[] args) throws Exception{
CommandLine cmdLine = CmdLineParser.parse(args);
CmdLineParserForProcessConstruction constructionParser =
new CmdLineParserForProcessConstruction();
Process process = constructionParser.run(cmdLine);
Ports ports =
new Ports(process.getInputPorts(), process.getOutputPorts());
CmdLineParserForProcessRunParameters runParametersParser =
new CmdLineParserForProcessRunParameters();
ProcessParameters params = runParametersParser.run(cmdLine, ports);
Configuration conf = getConfiguration();
process.run(params.getPortBindings(), conf, params.getParameters());
createOutputsIfDontExist(
process.getOutputPorts(), params.getPortBindings().getOutput(),
conf);
}
private static void createOutputsIfDontExist(
Map<String, PortType> outputPortsSpecification,
Map<String, Path> outputPortBindings, Configuration conf) throws IOException{
FileSystem fs = FileSystem.get(conf);
for(Map.Entry<String, Path> entry: outputPortBindings.entrySet()){
Path path = entry.getValue();
if(!fs.exists(path) || isEmptyDirectory(fs, path)){
PortType rawType = outputPortsSpecification.get(entry.getKey());
if(!(rawType instanceof AvroPortType)){
throw new RuntimeException("The port \""+entry.getKey()+
"\" is not of Avro type and only Avro types are "+
"supported");
}
AvroPortType type = (AvroPortType) rawType;
FileSystemPath fsPath = new FileSystemPath(fs, path);
DataFileWriter<GenericContainer> writer =
DataStore.create(fsPath, type.getSchema());
writer.close();
}
}
}
private static boolean isEmptyDirectory(FileSystem fs, Path path) throws IOException{
if(!fs.isDirectory(path)){
return false;
}
RemoteIterator<LocatedFileStatus> files = fs.listFiles(path, false);
/** There's at least one file, so the directory is not empty */
if(files.hasNext()){
return false;
}
return true;
}
}

View File

@ -0,0 +1,156 @@
package eu.dnetlib.dhp.common.java.io;
import java.io.IOException;
import java.util.NoSuchElementException;
import java.util.regex.Pattern;
import org.apache.avro.Schema;
import org.apache.avro.file.DataFileReader;
import org.apache.avro.specific.SpecificDatumReader;
import org.apache.hadoop.fs.AvroFSInput;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.LocatedFileStatus;
import org.apache.hadoop.fs.RemoteIterator;
/**
* An abstraction over data store format which allows
* iterating over records stored in the data store.
* It handles the standard case of a data store that is a directory containing
* many Avro files (but it can also read records from a single file).
*
* @author mhorst
* @author Mateusz Kobos
*/
class AvroDataStoreReader<T> implements CloseableIterator<T> {
private DataFileReader<T> currentReader;
private RemoteIterator<LocatedFileStatus> fileIterator;
private final FileSystemPath path;
private final Schema readerSchema;
/**
* Ignore file starting with underscore. Such files are also ignored by
* default by map-reduce jobs.
*/
private final Pattern whitelistPattern = Pattern.compile("^(?!_).*");
/**
* Here the schema used for reading the data store is set to be the same
* as the one that was used to write it.
*/
public AvroDataStoreReader(final FileSystemPath path)
throws IOException {
this(path, null);
}
/**
* @param path path to the data store to be read
* @param readerSchema the schema onto which the read data store will
* be projected
*/
public AvroDataStoreReader(final FileSystemPath path, Schema readerSchema)
throws IOException {
this.path = path;
this.readerSchema = readerSchema;
fileIterator = path.getFileSystem().listFiles(path.getPath(), false);
currentReader = getNextNonemptyReader();
}
private DataFileReader<T> getNextNonemptyReader() throws IOException {
while (fileIterator != null && fileIterator.hasNext()) {
LocatedFileStatus currentFileStatus = fileIterator.next();
if (isValidFile(currentFileStatus)) {
FileSystemPath currPath = new FileSystemPath(
path.getFileSystem(), currentFileStatus.getPath());
DataFileReader<T> reader =
getSingleFileReader(currPath, readerSchema);
/** Check if the file contains at least one record */
if(reader.hasNext()){
return reader;
} else {
reader.close();
}
}
}
/** fallback */
return null;
}
/**
* Get a reader for the specified Avro file. A utility function.
* @param path path to the existing file
* @param readerSchema optional reader schema. If you want to use the
* default option of using writer schema as the reader schema, pass the
* {@code null} value.
* @throws IOException
*/
private static <T> DataFileReader<T> getSingleFileReader(
FileSystemPath path, Schema readerSchema) throws IOException{
try{
SpecificDatumReader<T> datumReader = new SpecificDatumReader<T>();
if(readerSchema != null){
datumReader.setExpected(readerSchema);
}
long len = path.getFileSystem().getFileStatus(path.getPath()).getLen();
FSDataInputStream inputStream = path.getFileSystem().open(path.getPath());
return new DataFileReader<T>(
new AvroFSInput(inputStream, len), datumReader);
} catch (IOException ex){
throw new IOException("Problem with file \""+
path.getPath().toString()+"\": "+ex.getMessage(), ex);
}
}
/**
* Checks whether file is valid
*
* @param fileStatus
* @return true when valid, false otherwise
*/
private boolean isValidFile(LocatedFileStatus fileStatus) {
if (fileStatus.isFile()) {
return whitelistPattern.matcher(
fileStatus.getPath().getName()).matches();
}
/** fallback */
return false;
}
@Override
public boolean hasNext() {
return currentReader != null;
}
@Override
public T next(){
if(currentReader == null){
throw new NoSuchElementException();
}
T obj = currentReader.next();
if(!currentReader.hasNext()){
try{
currentReader.close();
currentReader = getNextNonemptyReader();
} catch(IOException ex){
throw new RuntimeException(ex);
}
}
return obj;
}
@Override
public void remove() {
throw new UnsupportedOperationException();
}
@Override
public void close() throws IOException {
if(currentReader != null){
currentReader.close();
currentReader = null;
}
fileIterator = null;
}
}

View File

@ -0,0 +1,25 @@
package eu.dnetlib.dhp.common.java.io;
import java.io.Closeable;
import java.util.Iterator;
/**
* An iterator for I/O operations that can be {@code close}d explicitly to
* release the resources it holds.
*
* You should call {@code close} only when interrupting the iteration in the
* middle since in such situation there is no way for the iterator to know if
* you're going to continue the iteration and it should still hold the resources
* or not. There's no need to call {@code close} when iterating over all
* elements since in such situation it is called automatically after the
* end of iteration.
*
* @author mhorst
*
* @param <E>
*/
public interface CloseableIterator<E> extends Iterator<E>, Closeable {
}

View File

@ -0,0 +1,19 @@
package eu.dnetlib.dhp.common.java.io;
import java.util.Iterator;
/**
* Counting iterator providing total number of results.
* @author mhorst
*
* @param <E>
*/
public interface CountingIterator<E> extends Iterator<E> {
/**
* Provides total number of results to be iterating on.
* @return total number of results to be iterating on
*/
int getCount();
}

View File

@ -0,0 +1,172 @@
package eu.dnetlib.dhp.common.java.io;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.apache.avro.Schema;
import org.apache.avro.file.DataFileWriter;
import org.apache.avro.generic.GenericContainer;
import org.apache.avro.io.DatumWriter;
import org.apache.avro.specific.SpecificDatumWriter;
/**
* Utility for accessing to Avro-based data stores stored in file system
* @author Mateusz Kobos
*
*/
public final class DataStore {
private final static String singleDataStoreFileName = "content.avro";
private static final int FILE_NO_PADDING_LENGTH = 7;
private DataStore(){}
/**
* Create a new data store directory with single file and return writer that allows
* adding new records
* @param path path to a directory to be created
* @param schema schema of the records to be stored in the file
* @return
* @throws IOException
*/
public static <T> DataFileWriter<T> create(
FileSystemPath path, Schema schema) throws IOException{
return create(path, schema, singleDataStoreFileName);
}
/**
* Create a new data store directory and return writer that allows
* adding new records
* @param path path to a directory to be created
* @param schema schema of the records to be stored in the file
* @param dataStoreFileName datastore file name
* @return
* @throws IOException
*/
public static <T> DataFileWriter<T> create(
FileSystemPath path, Schema schema, String dataStoreFileName) throws IOException{
path.getFileSystem().mkdirs(path.getPath());
FileSystemPath outFile = new FileSystemPath(
path, dataStoreFileName);
return DataStore.createSingleFile(outFile, schema);
}
/**
* Get reader for reading records from given data store
*
* Here the schema used for reading the data store is set to be the same
* as the one that was used to write it.
*
* @see getReader(FileSystemPath path, Schema readerSchema) for details.
*
*/
public static <T> CloseableIterator<T> getReader(FileSystemPath path)
throws IOException{
return getReader(path, null);
}
/**
* Get reader for reading records from given data store
* @param path path to a directory corresponding to data store
* @param readerSchema the schema onto which the read data store will
* be projected
*/
public static <T> CloseableIterator<T> getReader(
FileSystemPath path, Schema readerSchema) throws IOException{
return new AvroDataStoreReader<T>(path, readerSchema);
}
/**
* Read data store entries and insert them into a list. A utility function.
*
* Here the schema used for reading the data store is set to be the same
* as the one that was used to write it.
*/
public static <T> List<T> read(FileSystemPath path)
throws IOException{
return read(path, null);
}
/**
* Read data store entries and insert them into a list. A utility function.
*
* @param readerSchema the schema onto which the read data store will
* be projected
*/
public static <T> List<T> read(FileSystemPath path, Schema readerSchema)
throws IOException{
CloseableIterator<T> iterator = getReader(path, readerSchema);
List<T> elems = new ArrayList<T>();
while(iterator.hasNext()){
elems.add(iterator.next());
}
return elems;
}
/**
* Create a data store from a list of entries. A utility function.
* The schema is implicitly
* taken from the first element from the {@code elements} list.
* @param elements list of elements to write. At least one element has
* to be present, because it is used to retrieve schema of the
* structures passed in the list.
*/
public static <T extends GenericContainer> void create(
List<T> elements, FileSystemPath path) throws IOException{
if(elements.isEmpty()){
throw new IllegalArgumentException(
"The list of elements has to be non-empty");
}
Schema schema = elements.get(0).getSchema();
create(elements, path, schema);
}
/**
* Create a data store from a list of entries with schema given explicitly.
* A utility function.
*/
public static <T extends GenericContainer> void create(
List<T> elements, FileSystemPath path, Schema schema)
throws IOException{
DataFileWriter<T> writer = create(path, schema);
try{
for(T i: elements){
writer.append(i);
}
} finally {
if(writer != null){
writer.close();
}
}
}
/**
* Create a single Avro file. This method shouldn't be normally used to
* create data stores since it creates only a single Avro file,
* while a data store consists of a directory containing one or more files.
*/
public static <T> DataFileWriter<T> createSingleFile(
FileSystemPath path, Schema schema) throws IOException{
DatumWriter<T> datumWriter = new SpecificDatumWriter<T>();
DataFileWriter<T> writer = new DataFileWriter<T>(datumWriter);
writer.create(schema, path.getFileSystem().create(path.getPath()));
return writer;
}
/**
* Generates filename for given file number.
* @param fileNo file sequence number
*/
public static String generateFileName(int fileNo) {
StringBuffer strBuff = new StringBuffer(String.valueOf(fileNo));
while(strBuff.length()<FILE_NO_PADDING_LENGTH) {
strBuff.insert(0, '0');
}
strBuff.append(".avro");
return strBuff.toString();
}
}

View File

@ -0,0 +1,61 @@
package eu.dnetlib.dhp.common.java.io;
import java.io.File;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
/**
* Path to a directory or a file along with information about the
* file system in which the path is defined.
*
* @author Mateusz Kobos
*
*/
public class FileSystemPath {
private final FileSystem fs;
private final Path path;
/**
* Path in the local file system
*/
public FileSystemPath(File file) throws IOException {
this(new Path(file.toURI()));
}
/**
* Path in the local file system
*/
public FileSystemPath(Path path) throws IOException{
this(FileSystem.get(new Configuration(false)), path);
}
/**
* Path in the given file system
*/
public FileSystemPath(FileSystem fs, Path path){
this.fs = fs;
this.path = path;
}
/** Create a path with a child element */
public FileSystemPath(FileSystemPath parent, String child){
this.fs = parent.getFileSystem();
this.path = new Path(parent.getPath(), child);
}
public FileSystem getFileSystem() {
return fs;
}
public Path getPath() {
return path;
}
public FSDataInputStream getInputStream() throws IOException{
return getFileSystem().open(getPath());
}
}

View File

@ -0,0 +1,37 @@
package eu.dnetlib.dhp.common.java.io;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
/**
* Util class containing operations on hdfs or local filesystem
*
* @author madryk
*/
public final class HdfsUtils {
//------------------------ CONSTRUCTORS -------------------
private HdfsUtils() {}
//------------------------ LOGIC --------------------------
/**
* Removes file or directory (recursively) located under the specified pathname.
*/
public static void remove(Configuration hadoopConf, String pathname) throws IOException {
Path path = new Path(pathname);
FileSystem fileSystem = FileSystem.get(hadoopConf);
if (fileSystem.exists(path)) {
fileSystem.delete(path, true);
}
}
}

View File

@ -0,0 +1,159 @@
package eu.dnetlib.dhp.common.java.io;
import java.io.IOException;
import java.util.NoSuchElementException;
import java.util.regex.Pattern;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.LocatedFileStatus;
import org.apache.hadoop.fs.RemoteIterator;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.SequenceFile.Reader;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.util.ReflectionUtils;
/**
* Iterator that extracts sequence file's consecutive {@link Text} values.
*
* @author mhorst
*/
public class SequenceFileTextValueReader implements CloseableIterator<Text> {
private SequenceFile.Reader sequenceReader;
private final RemoteIterator<LocatedFileStatus> fileIt;
private final FileSystem fs;
/**
* Ignore file starting with underscore. Such files are also ignored by
* default by map-reduce jobs.
*/
private final static Pattern WHITELIST_REGEXP = Pattern.compile("^[^_].*");
private Text toBeReturned;
//------------------------ CONSTRUCTORS --------------------------
/**
* Default constructor.
*
* @param path HDFS path along with associated FileSystem
* @throws IOException
*/
public SequenceFileTextValueReader(final FileSystemPath path) throws IOException {
this.fs = path.getFileSystem();
if (fs.isDirectory(path.getPath())) {
fileIt = fs.listFiles(path.getPath(), false);
sequenceReader = getNextSequenceReader();
} else {
fileIt = null;
sequenceReader = new Reader(fs.getConf(), SequenceFile.Reader.file(path.getPath()));
}
}
//------------------------ LOGIC ---------------------------------
/*
* (non-Javadoc)
*
* @see java.util.Iterator#hasNext()
*/
@Override
public boolean hasNext() {
// check and provide next when already returned
if (toBeReturned == null) {
toBeReturned = getNext();
}
return toBeReturned != null;
}
/*
* (non-Javadoc)
*
* @see java.util.Iterator#next()
*/
@Override
public Text next() {
if (toBeReturned != null) {
// element fetched while executing hasNext()
Text result = toBeReturned;
toBeReturned = null;
return result;
} else {
Text resultCandidate = getNext();
if (resultCandidate!=null) {
return resultCandidate;
} else {
throw new NoSuchElementException();
}
}
}
/*
* (non-Javadoc)
*
* @see eu.dnetlib.dhp.exp.iterator.ClosableIterator#close()
*/
@Override
public void close() throws IOException {
if (sequenceReader != null) {
sequenceReader.close();
}
}
//------------------------ PRIVATE -------------------------------
private final Reader getNextSequenceReader() throws IOException {
while (fileIt != null && fileIt.hasNext()) {
LocatedFileStatus currentFileStatus = fileIt.next();
if (isValidFile(currentFileStatus)) {
return new Reader(this.fs.getConf(), SequenceFile.Reader.file(currentFileStatus.getPath()));
}
}
// fallback
return null;
}
/**
* Checks whether file is valid candidate.
*
* @param fileStatus
* file status holding file name
* @return true when valid, false otherwise
*/
private final boolean isValidFile(LocatedFileStatus fileStatus) {
if (fileStatus.isFile()) {
return WHITELIST_REGEXP.matcher(fileStatus.getPath().getName()).matches();
} else {
return false;
}
}
/**
* @return next data package
*/
private Text getNext() {
try {
if (sequenceReader == null) {
return null;
}
Writable key = (Writable) ReflectionUtils.newInstance(sequenceReader.getKeyClass(), fs.getConf());
Writable value = (Writable) ReflectionUtils.newInstance(sequenceReader.getValueClass(), fs.getConf());
if (sequenceReader.next(key, value)) {
return (Text) value;
} else {
sequenceReader.close();
sequenceReader = getNextSequenceReader();
if (sequenceReader != null) {
return getNext();
}
}
// fallback
return null;
} catch (IOException e) {
throw new RuntimeException(e);
}
}
}

View File

@ -0,0 +1,54 @@
package eu.dnetlib.dhp.common.java.jsonworkflownodes;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.HashMap;
import java.util.Map;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;
import com.google.common.base.Preconditions;
import eu.dnetlib.dhp.common.java.PortBindings;
import eu.dnetlib.dhp.common.java.Process;
import eu.dnetlib.dhp.common.java.porttype.PortType;
/**
* Utility class responsible for copying resources available on classpath to specified HDFS location.
* @author mhorst
*
*/
public class ClassPathResourceToHdfsCopier implements Process {
private static final String PARAM_INPUT_CLASSPATH_RESOURCE = "inputClasspathResource";
private static final String PARAM_OUTPUT_HDFS_FILE_LOCATION = "outputHdfsFileLocation";
@Override
public void run(PortBindings portBindings, Configuration conf, Map<String, String> parameters) throws Exception {
Preconditions.checkNotNull(parameters.get(PARAM_INPUT_CLASSPATH_RESOURCE), PARAM_INPUT_CLASSPATH_RESOURCE + " parameter was not specified!");
Preconditions.checkNotNull(parameters.get(PARAM_OUTPUT_HDFS_FILE_LOCATION), PARAM_OUTPUT_HDFS_FILE_LOCATION + " parameter was not specified!");
FileSystem fs = FileSystem.get(conf);
try (InputStream in = Thread.currentThread().getContextClassLoader()
.getResourceAsStream(parameters.get(PARAM_INPUT_CLASSPATH_RESOURCE));
OutputStream os = fs.create(new Path(parameters.get(PARAM_OUTPUT_HDFS_FILE_LOCATION)))) {
IOUtils.copyBytes(in, os, 4096, false);
}
}
@Override
public Map<String, PortType> getInputPorts() {
return new HashMap<String, PortType>();
}
@Override
public Map<String, PortType> getOutputPorts() {
return new HashMap<String, PortType>();
}
}

View File

@ -0,0 +1,66 @@
package eu.dnetlib.dhp.common.java.jsonworkflownodes;
import java.util.HashMap;
import java.util.Map;
import org.apache.avro.Schema;
import eu.dnetlib.dhp.common.java.jsonworkflownodes.StringPortSpecificationExtractor.PortSpecification;
import eu.dnetlib.dhp.common.java.porttype.AvroPortType;
import eu.dnetlib.dhp.common.java.porttype.PortType;
import eu.dnetlib.dhp.common.utils.AvroUtils;
/**
* @author Mateusz Kobos
*/
public class PortSpecifications {
private static final String[] propertyRegexps =
new String[]{"[\\w\\.]+", "[\\w\\./_\\-]+"};
private final Map<String, SpecificationValues> specs;
public static class SpecificationValues {
private final Schema schema;
private final String jsonFilePath;
public SpecificationValues(Schema schema, String jsonFilePath) {
this.schema = schema;
this.jsonFilePath = jsonFilePath;
}
public Schema getSchema() {
return schema;
}
public String getJsonFilePath() {
return jsonFilePath;
}
}
public PortSpecifications(String[] portSpecifications){
StringPortSpecificationExtractor portSpecExtractor =
new StringPortSpecificationExtractor(propertyRegexps);
specs = new HashMap<String, SpecificationValues>();
for(int i = 0; i < portSpecifications.length; i++){
PortSpecification portSpec = portSpecExtractor.getSpecification(portSpecifications[i]);
Schema schema = AvroUtils.toSchema(portSpec.getProperties()[0]);
String jsonPath = portSpec.getProperties()[1];
specs.put(portSpec.getName(), new SpecificationValues(schema, jsonPath));
}
}
public SpecificationValues get(String portName){
return specs.get(portName);
}
public Map<String, PortType> getPortTypes(){
Map<String, PortType> ports = new HashMap<String, PortType>();
for(Map.Entry<String, SpecificationValues> e: specs.entrySet()){
Schema schema = e.getValue().schema;
ports.put(e.getKey(), new AvroPortType(schema));
}
return ports;
}
}

View File

@ -0,0 +1,89 @@
package eu.dnetlib.dhp.common.java.jsonworkflownodes;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* Extracts information about port name and its properties from a string
* of a form "{port_name, property_1, property_2, ...}"
* @author Mateusz Kobos
*/
public class StringPortSpecificationExtractor {
private final String[] propertiesRegexp;
private final String portSpecificationRegexp;
private final Pattern pattern;
public static class PortSpecification {
private final String name;
private final String[] properties;
public PortSpecification(String name, String[] properties) {
this.name = name;
this.properties = properties;
}
public String getName() {
return name;
}
public String[] getProperties() {
return properties;
}
}
/**
* @param propertiesRegexp regular expressions specifying pattern for
* each of the properties associated with a port. An example of a single
* specification: {@code "[\\w\\.]+"}.
*/
public StringPortSpecificationExtractor(String[] propertiesRegexp){
this.propertiesRegexp = propertiesRegexp;
this.portSpecificationRegexp = createRegexpString("[\\w\\._]+", propertiesRegexp);
this.pattern = Pattern.compile(this.portSpecificationRegexp);
}
private static String createRegexpString(String portNameRegexp, String[] propertiesRegexp){
StringBuilder regexp = new StringBuilder();
regexp.append("s*\\{\\s*");
regexp.append("("+portNameRegexp+")");
for(String propertyRegexp: propertiesRegexp){
regexp.append(",\\s*("+propertyRegexp+")");
}
regexp.append("\\s*\\}\\s*");
return regexp.toString();
}
private int getPropertiesCount(){
return propertiesRegexp.length;
}
public PortSpecification getSpecification(String text){
Matcher m = pattern.matcher(text);
if(!m.matches()){
throw new RuntimeException(String.format("Specification of " +
"the port (\"%s\") does not match regexp \"%s\"",
text, portSpecificationRegexp));
}
final int expectedGroupsCount = getPropertiesCount()+1;
if(m.groupCount() != expectedGroupsCount){
StringBuilder groups = new StringBuilder();
for(int i = 0; i < m.groupCount(); i++){
groups.append("\""+m.group(i)+"\"");
if(i != m.groupCount()-1) {
groups.append(", ");
}
}
throw new RuntimeException(String.format(
"Invalid output port specification \"%s\": got %d groups "+
"instead of %d (namely: %s)", text, m.groupCount(),
expectedGroupsCount, groups.toString()));
}
String[] properties = new String[getPropertiesCount()];
for(int i = 0; i < getPropertiesCount(); i++){
properties[i] = m.group(i+2);
}
return new PortSpecification(m.group(1), properties);
}
}

View File

@ -0,0 +1,20 @@
package eu.dnetlib.dhp.common.java.porttype;
/**
* A port type that accepts any type of data
* @author Mateusz Kobos
*
*/
public class AnyPortType implements PortType {
@Override
public String getName() {
return "Any";
}
@Override
public boolean accepts(PortType other) {
return true;
}
}

View File

@ -0,0 +1,65 @@
package eu.dnetlib.dhp.common.java.porttype;
import org.apache.avro.Schema;
import org.apache.commons.lang.NotImplementedException;
/**
* This port type accepts data stores in a format of Avro
* Object Container Files, i.e. Avro data files.
* This kind of file corresponds to a list of objects, each one being of the
* same type, i.e. each one is defined by the same Avro schema.
* @author Mateusz Kobos
*/
public class AvroPortType implements PortType {
private final Schema schema;
public AvroPortType(Schema schema) {
this.schema = schema;
}
@Override
public String getName() {
return schema.getFullName();
}
@Override
/** Simple check if the port types are exactly the same
* (as defined by the {@code equals} method).
*
* TODO: this should work in a more relaxed way -
* {@code this.accepts(other)} should be true if {@code this}
* describes a subset of structures defined in {@code other}. To be
* more precise: the JSON schema tree tree defined by {@code this} should
* form a sub-tree of the JSON schema tree defined by {@code other}. */
public boolean accepts(PortType other) {
return this.equals(other);
}
/**
* Two patterns are equal if their schemas are the same.
*/
@Override
public boolean equals(Object o){
if(!(o instanceof AvroPortType)){
return false;
}
AvroPortType other = (AvroPortType) o;
return this.schema.equals(other.schema);
}
@Override
public int hashCode(){
throw new NotImplementedException();
}
/**
* Returns avro schema.
* @return avro schema
*/
public Schema getSchema() {
return schema;
}
}

View File

@ -0,0 +1,33 @@
package eu.dnetlib.dhp.common.java.porttype;
/**
* Type of the port. This is used to specify what kind of data is
* accepted on a certain input port or produced on a certain output port
* of a workflow node.
*
* @author Mateusz Kobos
*
*/
public interface PortType {
String getName();
/**
* This should be used to check whether data produced by a workflow node
* conforms to the data consumed by other workflow node.
* In a scenario when A produces certain data on a port p and B consumes
* this data on a port q, type(q).accepts(type(p)) has to be true.
*
* @return {@code true} if {@code this} port type is a more general
* version of the {@code other} port type,
* or as an alternative explanation: {@code other} is a subset of
* {@code this}, i.e. {@code other} has at least all the properties present
* in {@code this} (and possibly some others). This is analogous to a
* situation in object-oriented programming, where in order for assignment
* operation {@code this = other} to work, the type of {@code this} has to
* accept type of {@code other}, or in other words {@code other} has to
* inherit from {@code this}, or in yet other words: {@code other} has to
* conform to {@code this}.
*/
boolean accepts(PortType other);
}

View File

@ -0,0 +1,149 @@
package eu.dnetlib.dhp.common.lock;
import java.security.InvalidParameterException;
import java.util.Collections;
import java.util.Map;
import java.util.concurrent.Semaphore;
import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.ha.ZKFailoverController;
import org.apache.log4j.Logger;
import org.apache.zookeeper.CreateMode;
import org.apache.zookeeper.KeeperException;
import org.apache.zookeeper.Watcher.Event;
import org.apache.zookeeper.ZooDefs;
import org.apache.zookeeper.ZooKeeper;
import com.google.common.base.Preconditions;
import com.google.common.base.Stopwatch;
import eu.dnetlib.dhp.common.java.PortBindings;
import eu.dnetlib.dhp.common.java.porttype.PortType;
/**
* Zookeeper lock managing process. Blocks until lock is released.
*
* @author mhorst
*
*/
public class LockManagingProcess implements eu.dnetlib.dhp.common.java.Process {
public static final String DEFAULT_ROOT_NODE = "/cache";
public static final String NODE_SEPARATOR = "/";
public static final String PARAM_ZK_SESSION_TIMEOUT = "zk_session_timeout";
public static final String PARAM_NODE_ID = "node_id";
public static final String PARAM_LOCK_MODE = "mode";
public static enum LockMode {
obtain,
release
}
public static final int DEFAULT_SESSION_TIMEOUT = 60000;
public static final Logger log = Logger.getLogger(LockManagingProcess.class);
@Override
public Map<String, PortType> getInputPorts() {
return Collections.emptyMap();
}
@Override
public Map<String, PortType> getOutputPorts() {
return Collections.emptyMap();
}
@Override
public void run(PortBindings portBindings, Configuration conf,
Map<String, String> parameters) throws Exception {
Preconditions.checkArgument(parameters.containsKey(PARAM_NODE_ID), "node id not provided!");
Preconditions.checkArgument(parameters.containsKey(PARAM_LOCK_MODE), "lock mode not provided!");
String zkConnectionString = conf.get(ZKFailoverController.ZK_QUORUM_KEY);
Preconditions.checkArgument(StringUtils.isNotBlank(zkConnectionString),
"zookeeper quorum is unknown, invalid '%s' property value: %s", ZKFailoverController.ZK_QUORUM_KEY, zkConnectionString);
int sessionTimeout = parameters.containsKey(PARAM_ZK_SESSION_TIMEOUT)?
Integer.valueOf(parameters.get(PARAM_ZK_SESSION_TIMEOUT)) : DEFAULT_SESSION_TIMEOUT;
final ZooKeeper zooKeeper = new ZooKeeper(zkConnectionString, sessionTimeout, (e) -> {
// we are not interested in generic events
});
// initializing root node if does not exist
if (zooKeeper.exists(DEFAULT_ROOT_NODE, false) == null) {
log.info("initializing root node: " + DEFAULT_ROOT_NODE);
zooKeeper.create(DEFAULT_ROOT_NODE, new byte[0], ZooDefs.Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT);
log.info("root node initialized");
}
final String nodePath = generatePath(parameters.get(PARAM_NODE_ID), DEFAULT_ROOT_NODE);
final Semaphore semaphore = new Semaphore(1);
semaphore.acquire();
switch(LockMode.valueOf(parameters.get(PARAM_LOCK_MODE))) {
case obtain: {
obtain(zooKeeper, nodePath, semaphore);
break;
}
case release: {
release(zooKeeper, nodePath);
break;
}
default: {
throw new InvalidParameterException("unsupported lock mode: " + parameters.get(PARAM_LOCK_MODE));
}
}
}
// ------------------------- PRIVATE --------------------------
private void obtain(final ZooKeeper zooKeeper, final String nodePath, final Semaphore semaphore) throws KeeperException, InterruptedException {
log.info("trying to obtain lock: " + nodePath);
if (zooKeeper.exists(nodePath, (event) -> {
if (Event.EventType.NodeDeleted == event.getType()) {
try {
log.info(nodePath + " lock release detected");
log.info("creating new lock instance: " + nodePath + "...");
zooKeeper.create(nodePath, new byte[0], ZooDefs.Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT);
log.info("lock" + nodePath + " created");
semaphore.release();
} catch (KeeperException e) {
throw new RuntimeException(e);
} catch (InterruptedException e) {
throw new RuntimeException(e);
}
}
}) == null) {
log.info("lock not found, creating new lock instance: " + nodePath);
zooKeeper.create(nodePath, new byte[0], ZooDefs.Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT);
log.info("lock" + nodePath + " created");
semaphore.release();
} else {
// waiting until node is removed by other lock manager
log.info("waiting until lock is released");
Stopwatch timer = new Stopwatch().start();
semaphore.acquire();
log.info("lock released, waited for " + timer.elapsedMillis() + " ms");
semaphore.release();
}
}
private void release(final ZooKeeper zooKeeper, final String nodePath) throws InterruptedException, KeeperException {
log.info("removing lock" + nodePath + "...");
zooKeeper.delete(nodePath, -1);
log.info("lock" + nodePath + " removed");
}
private static final String generatePath(String nodeId, String rootNode) {
return rootNode + NODE_SEPARATOR + nodeId.replace('/', '_');
}
}

View File

@ -0,0 +1,24 @@
package eu.dnetlib.dhp.common.oozie;
import org.apache.oozie.client.OozieClient;
/**
* Factory of {@link OozieClient}
*
* @author madryk
*/
public class OozieClientFactory {
//------------------------ LOGIC --------------------------
/**
* Returns {@link OozieClient} object used for communication with oozie
*/
public OozieClient createOozieClient(String oozieUrl) {
OozieClient oozieClient = new OozieClient(oozieUrl);
return oozieClient;
}
}

View File

@ -0,0 +1,76 @@
package eu.dnetlib.dhp.common.oozie.property;
import java.io.File;
import java.io.FileOutputStream;
import java.io.OutputStream;
import java.util.Collections;
import java.util.Map;
import java.util.Properties;
import eu.dnetlib.dhp.common.java.PortBindings;
import eu.dnetlib.dhp.common.java.Process;
import eu.dnetlib.dhp.common.java.porttype.PortType;
import org.apache.hadoop.conf.Configuration;
import static eu.dnetlib.dhp.common.WorkflowRuntimeParameters.OOZIE_ACTION_OUTPUT_FILENAME;
/**
* This process is a solution for setting dynamic properties in oozie workflow definition.
*
* Expects three parameters to be provided: the first 'condition' parameter is boolean value
* based on which either first 'inCaseOfTrue' or second 'elseCase' parameter value is set as
* the 'result' property.
*
* This can be understood as the:
*
* condition ? inCaseOfTrue : elseCase
*
* java syntax equivalent.
*
* @author mhorst
*
*/
public class ConditionalPropertySetter implements Process {
public static final String PARAM_CONDITION = "condition";
public static final String PARAM_INCASEOFTRUE = "inCaseOfTrue";
public static final String PARAM_ELSECASE = "elseCase";
public static final String OUTPUT_PROPERTY_RESULT = "result";
@Override
public Map<String, PortType> getInputPorts() {
return Collections.emptyMap();
}
@Override
public Map<String, PortType> getOutputPorts() {
return Collections.emptyMap();
}
@Override
public void run(PortBindings portBindings, Configuration conf,
Map<String, String> parameters) throws Exception {
String condition = parameters.get(PARAM_CONDITION);
if (condition == null) {
throw new RuntimeException("unable to make decision: " +
PARAM_CONDITION + " parameter was not set!");
}
Properties props = new Properties();
props.setProperty(OUTPUT_PROPERTY_RESULT,
Boolean.parseBoolean(condition)?
parameters.get(PARAM_INCASEOFTRUE):
parameters.get(PARAM_ELSECASE));
OutputStream os = new FileOutputStream(
new File(System.getProperty(OOZIE_ACTION_OUTPUT_FILENAME)));
try {
props.store(os, "");
} finally {
os.close();
}
}
}

View File

@ -0,0 +1,12 @@
package eu.dnetlib.dhp.common.protobuf;
import com.google.protobuf.Message;
import org.apache.avro.generic.IndexedRecord;
/**
* @author Mateusz Fedoryszak (m.fedoryszak@icm.edu.pl)
*/
public interface AvroToProtoBufConverter<IN extends IndexedRecord, OUT extends Message> {
String convertIntoKey(IN datum);
OUT convertIntoValue(IN datum);
}

View File

@ -0,0 +1,62 @@
package eu.dnetlib.dhp.common.protobuf;
import com.google.protobuf.Message;
import org.apache.avro.generic.IndexedRecord;
import org.apache.avro.mapred.AvroKey;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.log4j.Logger;
import java.io.IOException;
/**
* @author Mateusz Fedoryszak (m.fedoryszak@icm.edu.pl)
*/
public class AvroToProtoBufOneToOneMapper<IN extends IndexedRecord, OUT extends Message>
extends Mapper<AvroKey<IN>, NullWritable, Text, BytesWritable> {
private static final String CONVERTER_CLASS_PROPERTY = "converter_class";
private static final Logger log = Logger.getLogger(AvroToProtoBufOneToOneMapper.class);
private final Text keyWritable = new Text();
private final BytesWritable valueWritable = new BytesWritable();
private AvroToProtoBufConverter<IN, OUT> converter;
@SuppressWarnings("unchecked")
@Override
public void setup(Context context) throws IOException, InterruptedException {
Class<?> converterClass = context.getConfiguration().getClass(CONVERTER_CLASS_PROPERTY, null);
if (converterClass == null) {
throw new IOException("Please specify " + CONVERTER_CLASS_PROPERTY);
}
try {
converter = (AvroToProtoBufConverter<IN, OUT>) converterClass.newInstance();
} catch (ClassCastException e) {
throw new IOException(
"Class specified in " + CONVERTER_CLASS_PROPERTY + " doesn't implement AvroToProtoBufConverter", e);
} catch (Exception e) {
throw new IOException(
"Could not instantiate specified AvroToProtoBufConverter class, " + converterClass, e);
}
}
@Override
public void map(AvroKey<IN> avro, NullWritable ignore, Context context)
throws IOException, InterruptedException {
String key = null;
try {
key = converter.convertIntoKey(avro.datum());
keyWritable.set(key);
byte[] value = converter.convertIntoValue(avro.datum()).toByteArray();
valueWritable.set(value, 0, value.length);
context.write(keyWritable, valueWritable);
} catch (Exception e) {
log.error("Error" + (key != null ? " while processing " + key : ""), e);
}
}
}

View File

@ -0,0 +1,32 @@
package eu.dnetlib.dhp.common.report;
import eu.dnetlib.dhp.common.schemas.ReportEntry;
import eu.dnetlib.dhp.common.schemas.ReportEntryType;
/**
* Factory of {@link ReportEntry} objects.
*
* @author madryk
*/
public final class ReportEntryFactory {
// ----------------------- CONSTRUCTORS -----------------------------
private ReportEntryFactory() {}
// ----------------------- LOGIC ------------------------------------
/**
* Creates {@link ReportEntry} with {@link ReportEntryType#COUNTER} type
*/
public static ReportEntry createCounterReportEntry(String key, long count) {
return new ReportEntry(key, ReportEntryType.COUNTER, String.valueOf(count));
}
/**
* Creates {@link ReportEntry} with {@link ReportEntryType#DURATION} type
*/
public static ReportEntry createDurationReportEntry(String key, long duration) {
return new ReportEntry(key, ReportEntryType.DURATION, String.valueOf(duration));
}
}

View File

@ -0,0 +1,110 @@
package eu.dnetlib.dhp.common.report;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
import org.apache.commons.lang3.tuple.Pair;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import com.google.common.collect.Lists;
import eu.dnetlib.dhp.common.java.PortBindings;
import eu.dnetlib.dhp.common.java.Process;
import eu.dnetlib.dhp.common.java.io.DataStore;
import eu.dnetlib.dhp.common.java.io.FileSystemPath;
import eu.dnetlib.dhp.common.java.porttype.AvroPortType;
import eu.dnetlib.dhp.common.java.porttype.PortType;
import eu.dnetlib.dhp.common.schemas.ReportEntry;
/**
* Java workflow node process for building report.<br/>
* It writes report properties into avro datastore of {@link ReportEntry}s
* with location specified in output port.<br/>
* Report property name must start with <code>report.</code> to
* be included in output datastore.
*
* Usage example:<br/>
* <pre>
* {@code
* <action name="report">
* <java>
* <main-class>eu.dnetlib.dhp.common.java.ProcessWrapper</main-class>
* <arg>eu.dnetlib.dhp.common.report.ReportGenerator</arg>
* <arg>-Preport.someProperty=someValue</arg>
* <arg>-Oreport=/report/path</arg>
* </java>
* ...
* </action>
* }
* </pre>
* Above example will produce avro datastore in <code>/report/path</code>
* with single {@link ReportEntry}.
* Where the {@link ReportEntry#getKey()} will be equal to <code>someProperty</code> and
* the {@link ReportEntry#getValue()} will be equal to <code>someValue</code>
* (notice the stripped <code>report.</code> prefix from the entry key).
*
*
* @author madryk
*
*/
public class ReportGenerator implements Process {
private static final String REPORT_PORT_OUT_NAME = "report";
private static final String REPORT_PROPERTY_PREFIX = "report.";
//------------------------ LOGIC --------------------------
@Override
public Map<String, PortType> getInputPorts() {
return Collections.emptyMap();
}
@Override
public Map<String, PortType> getOutputPorts() {
return Collections.singletonMap(REPORT_PORT_OUT_NAME, new AvroPortType(ReportEntry.SCHEMA$));
}
@Override
public void run(PortBindings portBindings, Configuration conf, Map<String, String> parameters) throws Exception {
Map<String, String> entriesToReport = collectEntriesToReport(parameters);
List<ReportEntry> avroReport = convertToAvroReport(entriesToReport);
FileSystem fs = FileSystem.get(conf);
Path reportPath = portBindings.getOutput().get(REPORT_PORT_OUT_NAME);
DataStore.create(avroReport, new FileSystemPath(fs, reportPath));
}
//------------------------ PRIVATE --------------------------
private Map<String, String> collectEntriesToReport(Map<String, String> parameters) {
return parameters.entrySet().stream()
.filter(property -> property.getKey().startsWith(REPORT_PROPERTY_PREFIX))
.map(x -> Pair.of(x.getKey().substring(REPORT_PROPERTY_PREFIX.length()), x.getValue()))
.collect(Collectors.toMap(e -> e.getLeft(), e -> e.getRight()));
}
private List<ReportEntry> convertToAvroReport(Map<String, String> entriesToReport) {
List<ReportEntry> avroReport = Lists.newArrayList();
entriesToReport.forEach((key, value) -> avroReport.add(ReportEntryFactory.createCounterReportEntry(key, Long.valueOf(value))));
return avroReport;
}
}

View File

@ -0,0 +1,74 @@
package eu.dnetlib.dhp.common.spark.pipe;
import java.io.Serializable;
import org.apache.avro.generic.GenericRecord;
import org.apache.avro.mapred.AvroKey;
import org.apache.hadoop.io.NullWritable;
import org.apache.spark.SparkFiles;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD;
import eu.dnetlib.dhp.common.utils.AvroGsonFactory;
import scala.Tuple2;
/**
* Executor of mapreduce scripts using spark pipes.
* It imitates hadoop streaming behavior.
*
* @author madryk
*
*/
public class SparkPipeExecutor implements Serializable {
private static final long serialVersionUID = 1L;
//------------------------ LOGIC --------------------------
/**
* Imitates map part of hadoop streaming job.
* It executes provided script for every key in inputRecords rdd.
* <br/><br/>
* It is assumed that provided script will read records from standard input (one line for one record)
* and write mapped record into standard output (also one line for one record).
* Mapped record can be a key/value pair. In that case script should return key and value
* splitted by tab (\t) character in single line.
*/
public JavaPairRDD<String, String> doMap(JavaPairRDD<AvroKey<GenericRecord>, NullWritable> inputRecords, String scriptName, String args) {
JavaRDD<String> mappedRecords = inputRecords.keys().pipe("python " + SparkFiles.get(scriptName) + " " + args);
JavaPairRDD<String, String> outputRecords = mappedRecords
.mapToPair(line -> {
String[] splittedPair = line.split("\t");
return new Tuple2<String, String>(splittedPair[0], (splittedPair.length == 1) ? null : splittedPair[1]);
});
return outputRecords;
}
/**
* Imitates reduce part of hadoop streaming job.
* <br/><br/>
* It is assumed that provided script will read records from standard input (one line for one record)
* and group records with the same key into single record (reduce).
* Method assures that all input records with the same key will be transfered in adjacent lines.
* Reduced records should be written by script into standard output (one line for one record).
* Reduced records must be json strings of class provided as argument.
*/
public JavaPairRDD<AvroKey<GenericRecord>, NullWritable> doReduce(JavaPairRDD<String, String> inputRecords, String scriptName, String args, Class<? extends GenericRecord> outputClass) {
JavaRDD<String> reducedRecords = inputRecords.sortByKey()
.map(record -> record._1 + ((record._2 == null) ? "" : ("\t" + record._2)))
.pipe("python " + SparkFiles.get(scriptName) + " " + args);
JavaPairRDD<AvroKey<GenericRecord>, NullWritable> outputRecords = reducedRecords
.map(recordString -> AvroGsonFactory.create().fromJson(recordString, outputClass))
.mapToPair(record -> new Tuple2<AvroKey<GenericRecord>, NullWritable>(new AvroKey<>(record), NullWritable.get()));
return outputRecords;
}
}

View File

@ -0,0 +1,31 @@
package eu.dnetlib.dhp.common.string;
/**
* Operations on {@link CharSequence}
*
* @author Łukasz Dumiszewski
*/
public final class CharSequenceUtils {
//------------------------ CONSTRUCTORS --------------------------
private CharSequenceUtils() {
throw new IllegalStateException("may not be initialized");
}
//------------------------ LOGIC --------------------------
/**
* Converts the given {@link CharSequence} <code>value</code> to {@link String} by using {@link CharSequence#toString()}.
* Returns empty string if <code>value</code> is null.
*/
public static String toStringWithNullToEmpty(CharSequence value) {
return value == null? "": value.toString();
}
}

View File

@ -0,0 +1,113 @@
/*
* This file is part of CoAnSys project.
* Copyright (c) 2012-2015 ICM-UW
*
* CoAnSys is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* CoAnSys is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with CoAnSys. If not, see <http://www.gnu.org/licenses/>.
*/
package eu.dnetlib.dhp.common.string;
import java.text.Normalizer;
import java.util.HashMap;
import java.util.Map;
/**
* Mapping to the basic Latin alphabet (a-z, A-Z). In most cases, a character is
* mapped to the closest visual form, rather than functional one, e.g.: "ö" is
* mapped to "o" rather than "oe", and "đ" is mapped to "d" rather than "dj" or
* "gj". Notable exceptions include: "ĸ" mapped to "q", "ß" mapped to "ss", and
* "Þ", "þ" mapped to "Y", "y".
*
* <p> Each character is processed as follows: <ol> <li>the character is
* compatibility decomposed,</li> <li>all the combining marks are removed,</li>
* <li>the character is compatibility composed,</li> <li>additional "manual"
* substitutions are applied.</li> </ol> </p>
*
* <p> All the characters from the "Latin-1 Supplement" and "Latin Extended-A"
* Unicode blocks are mapped to the "Basic Latin" block. Characters from other
* alphabets are generally left intact, although the decomposable ones may be
* affected by the procedure. </p>
*
* @author Lukasz Bolikowski (bolo@icm.edu.pl)
*
* @author Łukasz Dumiszewski /just copied from coansys-commons/
*
*/
public final class DiacriticsRemover {
private static final Character[] from = {
'Æ', 'Ð', 'Ø', 'Þ', 'ß', 'æ', 'ð', 'ø', 'þ', 'Đ', 'đ', 'Ħ',
'ħ', 'ı', 'ĸ', 'Ł', 'ł', 'Ŋ', 'ŋ', 'Œ', 'œ', 'Ŧ', 'ŧ'};
private static final String[] to = {
"AE", "D", "O", "Y", "ss", "ae", "d", "o", "y", "D", "d", "H",
"h", "i", "q", "L", "l", "N", "n", "OE", "oe", "T", "t"};
private static Map<Character, String> lookup = buildLookup();
//------------------------ CONSTRUCTORS -------------------
private DiacriticsRemover() {}
//------------------------ LOGIC --------------------------
/**
* Removes diacritics from a text.
*
* @param text Text to process.
* @return Text without diacritics.
*/
public static String removeDiacritics(String text) {
if (text == null) {
return null;
}
String tmp = Normalizer.normalize(text, Normalizer.Form.NFKD);
StringBuilder builder = new StringBuilder();
for (int i = 0; i < tmp.length(); i++) {
Character ch = tmp.charAt(i);
if (Character.getType(ch) == Character.NON_SPACING_MARK) {
continue;
}
if (lookup.containsKey(ch)) {
builder.append(lookup.get(ch));
} else {
builder.append(ch);
}
}
return builder.toString();
}
//------------------------ PRIVATE --------------------------
private static Map<Character, String> buildLookup() {
if (from.length != to.length) {
throw new IllegalStateException();
}
Map<Character, String> _lookup = new HashMap<Character, String>();
for (int i = 0; i < from.length; i++) {
_lookup.put(from[i], to[i]);
}
return _lookup;
}
}

View File

@ -0,0 +1,130 @@
/*
* This file is part of CoAnSys project.
* Copyright (c) 2012-2015 ICM-UW
*
* CoAnSys is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* CoAnSys is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with CoAnSys. If not, see <http://www.gnu.org/licenses/>.
*/
package eu.dnetlib.dhp.common.string;
import java.io.Serializable;
import java.util.List;
import org.apache.commons.lang3.StringUtils;
import com.google.common.collect.ImmutableList;
/**
* An implementation of {@link StringNormalizer} that normalizes strings for non-strict comparisons
* in which one does not care about characters other than letters and digits or about differently written diacritics.
*
* @author Łukasz Dumiszewski
*
*/
public final class LenientComparisonStringNormalizer implements StringNormalizer, Serializable {
private static final long serialVersionUID = 1L;
private List<Character> whitelistCharacters;
//------------------------ CONSTRUCTORS --------------------------
public LenientComparisonStringNormalizer() {
this(ImmutableList.of());
}
/**
* @param whitelistCharacters - non alphanumeric characters that will not be removed
* during normalization
*/
public LenientComparisonStringNormalizer(List<Character> whitelistCharacters) {
this.whitelistCharacters = whitelistCharacters;
}
//------------------------ LOGIC --------------------------
/**
* Normalizes the given value. <br/>
* The normalized strings are better suited for non-strict comparisons, in which one does NOT care about characters that are
* neither letters nor digits; about accidental spaces or different diacritics etc. <br/><br/>
* This method:
* <ul>
* <li>Replaces all characters that are not letters or digits with spaces (except those on whitelist characters list)</li>
* <li>Replaces white spaces with spaces </li>
* <li>Trims</li>
* <li>Compacts multi-space gaps to one-space gaps</li>
* <li>Removes diacritics</li>
* <li>Changes characters to lower case</li>
* </ul>
* Returns "" if the passed value is null or blank
*
* @param value the string to normalize
* @see DiacriticsRemover#removeDiacritics(String, boolean)
*
*
*/
public String normalize(String value) {
if (StringUtils.isBlank(value)) {
return "";
}
String result = value;
result = DiacriticsRemover.removeDiacritics(result);
result = removeNonLetterDigitCharacters(result);
result = result.toLowerCase();
result = result.trim().replaceAll(" +", " ");
return result;
}
//------------------------ PRIVATE --------------------------
private String removeNonLetterDigitCharacters(final String value) {
StringBuilder sb = new StringBuilder();
for (int i = 0; i < value.length(); ++i) {
char c = value.charAt(i);
if (Character.isLetterOrDigit(c) || whitelistCharacters.contains(c)) {
sb.append(c);
} else {
sb.append(' ');
}
}
return sb.toString();
}
}

View File

@ -0,0 +1,16 @@
package eu.dnetlib.dhp.common.string;
/**
* String normalizer.
*
* @author Łukasz Dumiszewski
*
*/
public interface StringNormalizer {
/**
* Normalizes the given string value.
*/
String normalize(String value);
}

View File

@ -0,0 +1,45 @@
package eu.dnetlib.dhp.common.utils;
import java.lang.reflect.Type;
import com.google.gson.Gson;
import com.google.gson.GsonBuilder;
import com.google.gson.JsonDeserializationContext;
import com.google.gson.JsonDeserializer;
import com.google.gson.JsonElement;
import com.google.gson.JsonParseException;
/**
* Factory for gson object that supports serializing avro generated classes
*
* @author madryk
*
*/
public final class AvroGsonFactory {
//------------------------ CONSTRUCTORS -------------------
private AvroGsonFactory() {}
//------------------------ LOGIC --------------------------
public static Gson create() {
GsonBuilder builder = new GsonBuilder();
builder.registerTypeAdapter(CharSequence.class, new CharSequenceDeserializer());
return builder.create();
}
public static class CharSequenceDeserializer implements JsonDeserializer<CharSequence> {
@Override
public CharSequence deserialize(JsonElement json, Type typeOfT, JsonDeserializationContext context)
throws JsonParseException {
return json.getAsString();
}
}
}

View File

@ -0,0 +1,77 @@
package eu.dnetlib.dhp.common.utils;
import java.lang.reflect.Field;
import org.apache.avro.Schema;
/**
*
* @author Mateusz Kobos
*
*/
public final class AvroUtils {
public final static String primitiveTypePrefix = "org.apache.avro.Schema.Type.";
//------------------------ CONSTRUCTORS -------------------
private AvroUtils() {}
//------------------------ LOGIC --------------------------
/**
* For a given name of a class generated from Avro schema return
* a JSON schema.
*
* Apart from a name of a class you can also give a name of one of enums
* defined in {@link org.apache.avro.Schema.Type}; in such case an
* appropriate primitive type will be returned.
*
* @param typeName fully qualified name of a class generated from Avro schema,
* e.g. {@code eu.dnetlib.dhp.common.avro.Person},
* or a fully qualified name of enum defined by
* {@link org.apache.avro.Schema.Type},
* e.g. {@link org.apache.avro.Schema.Type.STRING}.
* @return JSON string
*/
public static Schema toSchema(String typeName) {
Schema schema = null;
if(typeName.startsWith(primitiveTypePrefix)){
String shortName = typeName.substring(
primitiveTypePrefix.length(), typeName.length());
schema = getPrimitiveTypeSchema(shortName);
} else {
schema = getAvroClassSchema(typeName);
}
return schema;
}
private static Schema getPrimitiveTypeSchema(String shortName){
Schema.Type type = Schema.Type.valueOf(shortName);
return Schema.create(type);
}
private static Schema getAvroClassSchema(String className){
try {
Class<?> avroClass = Class.forName(className);
Field f = avroClass.getDeclaredField("SCHEMA$");
return (Schema) f.get(null);
} catch (ClassNotFoundException e) {
throw new RuntimeException(
"Class \""+className+"\" does not exist", e);
} catch (SecurityException e) {
throw new RuntimeException(e);
} catch (NoSuchFieldException e) {
throw new RuntimeException(e);
} catch (IllegalArgumentException e) {
throw new RuntimeException(e);
} catch (IllegalAccessException e) {
throw new RuntimeException(e);
}
}
}

View File

@ -0,0 +1,45 @@
package eu.dnetlib.dhp.common.utils;
/**
* Byte array utility class.
* @author mhorst
*
*/
public final class ByteArrayUtils {
//------------------------ CONSTRUCTORS -------------------
private ByteArrayUtils() {}
//------------------------ LOGIC --------------------------
/**
* Does this byte array begin with match array content?
* @param source Byte array to examine
* @param match Byte array to locate in <code>source</code>
* @return true If the starting bytes are equal
*/
public static boolean startsWith(byte[] source, byte[] match) {
return startsWith(source, 0, match);
}
/**
* Does this byte array begin with match array content?
* @param source Byte array to examine
* @param offset An offset into the <code>source</code> array
* @param match Byte array to locate in <code>source</code>
* @return true If the starting bytes are equal
*/
public static boolean startsWith(byte[] source, int offset, byte[] match) {
if (match.length > (source.length - offset)) {
return false;
}
for (int i = 0; i < match.length; i++) {
if (source[offset + i] != match[i]) {
return false;
}
}
return true;
}
}

View File

@ -0,0 +1,89 @@
package eu.dnetlib.dhp.common.utils;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.security.InvalidParameterException;
import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
import java.util.Properties;
import eu.dnetlib.dhp.common.java.PortBindings;
import eu.dnetlib.dhp.common.java.Ports;
import eu.dnetlib.dhp.common.java.Process;
import eu.dnetlib.dhp.common.java.io.CloseableIterator;
import eu.dnetlib.dhp.common.java.io.DataStore;
import eu.dnetlib.dhp.common.java.io.FileSystemPath;
import eu.dnetlib.dhp.common.java.porttype.AnyPortType;
import eu.dnetlib.dhp.common.java.porttype.PortType;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import static eu.dnetlib.dhp.common.WorkflowRuntimeParameters.OOZIE_ACTION_OUTPUT_FILENAME;
/**
* Simple process verifying whether given datastore is empty.
* @author mhorst
*
*/
public class EmptyDatastoreVerifierProcess implements Process {
public static final String INPUT_PORT_NAME = "input";
public static final String DEFAULT_ENCODING = "UTF-8";
public static final String OUTPUT_PROPERTY_IS_EMPTY = "isEmpty";
/**
* Ports handled by this module.
*/
private final Ports ports;
// ------------------------ CONSTRUCTORS --------------------------
public EmptyDatastoreVerifierProcess() {
// preparing ports
Map<String, PortType> input = new HashMap<String, PortType>();
input.put(INPUT_PORT_NAME, new AnyPortType());
Map<String, PortType> output = Collections.emptyMap();
ports = new Ports(input, output);
}
@Override
public Map<String, PortType> getInputPorts() {
return ports.getInput();
}
@Override
public Map<String, PortType> getOutputPorts() {
return ports.getOutput();
}
@Override
public void run(PortBindings portBindings, Configuration conf, Map<String, String> parameters) throws Exception {
if (!portBindings.getInput().containsKey(INPUT_PORT_NAME)) {
throw new InvalidParameterException("missing input port!");
}
try (CloseableIterator<?> closeableIt = getIterator(conf, portBindings.getInput().get(INPUT_PORT_NAME))) {
File file = new File(System.getProperty(OOZIE_ACTION_OUTPUT_FILENAME));
Properties props = new Properties();
props.setProperty(OUTPUT_PROPERTY_IS_EMPTY, Boolean.toString(!closeableIt.hasNext()));
try (OutputStream os = new FileOutputStream(file)) {
props.store(os, "");
}
}
}
/**
* Returns iterator over datastore.
*/
protected CloseableIterator<?> getIterator(Configuration conf, Path path) throws IOException {
return DataStore.getReader(new FileSystemPath(FileSystem.get(conf), path));
}
}

3
dhp-schemas/README.md Normal file
View File

@ -0,0 +1,3 @@
Description of the project
--------------------------
This project defines **serialization schemas** of Avro data store files that are used to pass data between workflow nodes in the system.

62
dhp-schemas/pom.xml Normal file
View File

@ -0,0 +1,62 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>eu.dnetlib.dhp</groupId>
<artifactId>dhp</artifactId>
<version>1.0.0-SNAPSHOT</version>
</parent>
<artifactId>dhp-schemas</artifactId>
<packaging>jar</packaging>
<dependencies>
<dependency>
<groupId>org.apache.avro</groupId>
<artifactId>avro</artifactId>
</dependency>
</dependencies>
<build>
<plugins>
<!-- Plugin that generates Java classes from Avro schemas -->
<plugin>
<groupId>org.apache.avro</groupId>
<artifactId>avro-maven-plugin</artifactId>
<executions>
<execution>
<phase>generate-sources</phase>
<goals>
<goal>schema</goal>
<goal>idl-protocol</goal>
</goals>
</execution>
</executions>
</plugin>
<!-- This plugin makes the Maven->Update Project Configuration
not forget about the "target/generated-sources/avro" source path-->
<plugin>
<groupId>org.codehaus.mojo</groupId>
<artifactId>build-helper-maven-plugin</artifactId>
<executions>
<execution>
<id>add-source</id>
<phase>generate-sources</phase>
<goals>
<goal>add-source</goal>
</goals>
<configuration>
<sources>
<source>${project.build.directory}/generated-sources/avro/</source>
</sources>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>
</project>

View File

@ -0,0 +1,29 @@
@namespace("eu.dnetlib.dhp.audit.schemas")
protocol DHP {
record Cause {
// generic cause code, root exception class name when derived from exception
string code;
// cause message
union { null , string } message = null;
}
record Fault {
// input object identifier
string inputObjectId;
// fault creation timestamp
long timestamp;
// generic fault code, root exception class name when derived from exception
string code;
// fault message
union { null , string } message = null;
// stack trace
union { null , string } stackTrace = null;
// fault causes, array is indexed with cause depth
union { null , array<Cause> } causes = null;
// Other supplementary data related to specific type of fault.
// See parameters description in oozie workflow.xml documentation of modules
// that use this structure for information what exactly can be stored as supplementary data.
union { null , map<string> } supplementaryData = null;
}
}

View File

@ -0,0 +1,16 @@
@namespace("eu.dnetlib.dhp.common.schemas")
protocol DHP{
enum ReportEntryType {
COUNTER, DURATION
}
record ReportEntry {
string key;
ReportEntryType type;
string value;
}
}

View File

@ -0,0 +1,21 @@
@namespace("eu.dnetlib.dhp.importer.schemas")
protocol DHP {
enum RecordFormat {
XML, JSON
}
record ImportedRecord {
// record identifier
string id;
RecordFormat format;
// format name (OAF, OAI_DC, Datacite, etc) for which there is a parser implementation
string formatName;
// record body
string body;
}
}

View File

@ -0,0 +1,105 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<parent>
<groupId>eu.dnetlib.dhp</groupId>
<artifactId>dhp-wf</artifactId>
<version>1.0.0-SNAPSHOT</version>
</parent>
<modelVersion>4.0.0</modelVersion>
<artifactId>dhp-wf-import</artifactId>
<dependencies>
<dependency>
<groupId>${project.groupId}</groupId>
<artifactId>dhp-common</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>${project.groupId}</groupId>
<artifactId>dhp-common</artifactId>
<version>${project.version}</version>
<type>test-jar</type>
<scope>test</scope>
</dependency>
<dependency>
<groupId>${project.groupId}</groupId>
<artifactId>dhp-schemas</artifactId>
<version>${project.version}</version>
</dependency>
<!-- required after introducing 'provided' scope for hadoop libs -->
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-mapreduce-client-core</artifactId>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
</dependency>
<dependency>
<groupId>com.googlecode.json-simple</groupId>
<artifactId>json-simple</artifactId>
</dependency>
<dependency>
<groupId>commons-cli</groupId>
<artifactId>commons-cli</artifactId>
</dependency>
<dependency>
<groupId>eu.dnetlib</groupId>
<artifactId>dnet-objectstore-rmi</artifactId>
</dependency>
<dependency>
<groupId>eu.dnetlib</groupId>
<artifactId>cnr-rmi-api</artifactId>
</dependency>
<dependency>
<groupId>eu.dnetlib</groupId>
<artifactId>cnr-resultset-client</artifactId>
</dependency>
<dependency>
<groupId>eu.dnetlib</groupId>
<artifactId>dnet-openaireplus-mapping-utils</artifactId>
</dependency>
<!-- proper spring context version required by cnr-resultset-client -->
<dependency>
<groupId>org.springframework</groupId>
<artifactId>spring-context</artifactId>
</dependency>
<dependency>
<groupId>org.apache.cxf</groupId>
<artifactId>cxf-rt-frontend-jaxws</artifactId>
</dependency>
<dependency>
<groupId>com.google.code.gson</groupId>
<artifactId>gson</artifactId>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_2.10</artifactId>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-sql_2.10</artifactId>
</dependency>
<dependency>
<groupId>com.databricks</groupId>
<artifactId>spark-avro_2.10</artifactId>
</dependency>
<dependency>
<groupId>org.mongodb.spark</groupId>
<artifactId>mongo-spark-connector_2.10</artifactId>
</dependency>
</dependencies>
</project>

View File

@ -0,0 +1,29 @@
package eu.dnetlib.dhp.wf.importer;
import java.io.IOException;
import org.apache.avro.file.DataFileWriter;
/**
* {@link DataFileWriter} based record receiver.
* @author mhorst
*
*/
public class DataFileRecordReceiver<T> implements RecordReceiver<T> {
private final DataFileWriter<T> writer;
/**
* Default constructor.
* @param writer
*/
public DataFileRecordReceiver(DataFileWriter<T> writer) {
this.writer = writer;
}
@Override
public void receive(T object) throws IOException {
this.writer.append(object);
}
}

View File

@ -0,0 +1,50 @@
package eu.dnetlib.dhp.wf.importer;
import java.io.IOException;
import org.apache.avro.file.DataFileWriter;
/**
* {@link DataFileWriter} based record receiver with counter of
* received records.
*
* @author madryk
*/
public class DataFileRecordReceiverWithCounter<T> extends DataFileRecordReceiver<T> {
private long receivedCount = 0L;
//------------------------ CONSTRUCTORS --------------------------
/**
* Default constructor
*
* @param writer - writer of the received records
*/
public DataFileRecordReceiverWithCounter(DataFileWriter<T> writer) {
super(writer);
}
//------------------------ GETTERS --------------------------
/**
* Returns number of received records
*/
public long getReceivedCount() {
return receivedCount;
}
//------------------------ LOGIC --------------------------
/**
* Receives passed record and increments the counter.
*/
@Override
public void receive(T record) throws IOException {
super.receive(record);
++receivedCount;
}
}

View File

@ -0,0 +1,52 @@
package eu.dnetlib.dhp.wf.importer;
/**
* Import realated workflow parameters.
* @author mhorst
*
*/
public final class ImportWorkflowRuntimeParameters {
// parameter names
public static final String IMPORT_INFERENCE_PROVENANCE_BLACKLIST = "import.inference.provenance.blacklist";
public static final String IMPORT_SKIP_DELETED_BY_INFERENCE = "import.skip.deleted.by.inference";
public static final String IMPORT_TRUST_LEVEL_THRESHOLD = "import.trust.level.threshold";
public static final String IMPORT_APPROVED_DATASOURCES_CSV = "import.approved.datasources.csv";
public static final String IMPORT_APPROVED_COLUMNFAMILIES_CSV = "import.approved.columnfamilies.csv";
public static final String IMPORT_MERGE_BODY_WITH_UPDATES = "import.merge.body.with.updates";
public static final String IMPORT_CONTENT_APPROVED_OBJECSTORES_CSV = "import.content.approved.objectstores.csv";
public static final String IMPORT_CONTENT_BLACKLISTED_OBJECSTORES_CSV = "import.content.blacklisted.objectstores.csv";
public static final String IMPORT_CONTENT_OBJECT_STORE_LOC = "import.content.object.store.location";
public static final String IMPORT_CONTENT_OBJECT_STORE_IDS_CSV = "import.content.object.store.ids.csv";
public static final String IMPORT_CONTENT_MAX_FILE_SIZE_MB = "import.content.max.file.size.mb";
public static final String IMPORT_CONTENT_CONNECTION_TIMEOUT = "import.content.connection.timeout";
public static final String IMPORT_CONTENT_READ_TIMEOUT = "import.content.read.timeout";
public static final String IMPORT_MDSTORE_IDS_CSV = "import.mdstore.ids.csv";
public static final String IMPORT_MDSTORE_SERVICE_LOCATION = "import.mdstore.service.location";
public static final String IMPORT_MDSTORE_RECORD_MAXLENGTH = "import.mdstore.record.maxlength";
public static final String IMPORT_ISLOOKUP_SERVICE_LOCATION = "import.islookup.service.location";
public static final String IMPORT_VOCABULARY_CODE = "import.vocabulary.code";
public static final String IMPORT_VOCABULARY_OUTPUT_FILENAME = "import.vocabulary.output.filename";
public static final String IMPORT_RESULT_SET_CLIENT_READ_TIMEOUT = "import.resultset.client.read.timeout";
public static final String IMPORT_RESULT_SET_CLIENT_CONNECTION_TIMEOUT = "import.resultset.client.connection.timeout";
public static final String IMPORT_RESULT_SET_PAGESIZE = "import.resultset.pagesize";
public static final String HBASE_ENCODING = "hbase.table.encoding";
public static final String IMPORT_FACADE_FACTORY_CLASS = "import.facade.factory.classname";
// default values
public static final String RESULTSET_READ_TIMEOUT_DEFAULT_VALUE = "60000";
public static final String RESULTSET_CONNECTION_TIMEOUT_DEFAULT_VALUE = "60000";
public static final String RESULTSET_PAGESIZE_DEFAULT_VALUE = "100";
private ImportWorkflowRuntimeParameters() {}
}

View File

@ -0,0 +1,14 @@
package eu.dnetlib.dhp.wf.importer;
import java.io.IOException;
/**
* Record receiver interface.
* @author mhorst
*
* @param <T>
*/
public interface RecordReceiver<T> {
void receive(T object) throws IOException;
}

View File

@ -0,0 +1,104 @@
package eu.dnetlib.dhp.wf.importer.facade;
import java.util.Map;
import javax.xml.ws.BindingProvider;
import javax.xml.ws.wsaddressing.W3CEndpointReferenceBuilder;
import org.apache.log4j.Logger;
import eu.dnetlib.enabling.tools.JaxwsServiceResolverImpl;
/**
* Abstract class utilized by all WebService facades.
* @author mhorst
*
*/
public abstract class AbstractResultSetAwareWebServiceFacade<T> {
private final Logger log = Logger.getLogger(this.getClass());
/**
* Web service.
*/
private final T service;
/**
* ResultSet read timeout.
*/
private final long resultSetReadTimeout;
/**
* ResultSet connection timeout.
*/
private final long resultSetConnectionTimeout;
/**
* ResultSet page size.
*/
private final int resultSetPageSize;
//------------------------ CONSTRUCTORS -------------------
/**
* Instantiates underlying service.
* @param clazz webservice class
* @param serviceLocation webservice location
* @param serviceReadTimeout service read timeout
* @param serviceConnectionTimeout service connection timeout
* @param resultSetReadTimeout resultset read timeout
* @param resultSetConnectionTimeout resultset connection timeout
* @param resultSetPageSize resultset page size
*/
protected AbstractResultSetAwareWebServiceFacade(Class<T> clazz, String serviceLocation,
long serviceReadTimeout, long serviceConnectionTimeout,
long resultSetReadTimeout, long resultSetConnectionTimeout, int resultSetPageSize) {
W3CEndpointReferenceBuilder eprBuilder = new W3CEndpointReferenceBuilder();
eprBuilder.address(serviceLocation);
eprBuilder.build();
this.service = new JaxwsServiceResolverImpl().getService(clazz, eprBuilder.build());
if (this.service instanceof BindingProvider) {
log.info(String.format("setting timeouts for %s: read timeout (%s) and connect timeout (%s)",
BindingProvider.class, serviceReadTimeout, serviceConnectionTimeout));
final Map<String, Object> requestContext = ((BindingProvider) service).getRequestContext();
// can't be sure about which will be used. Set them all.
requestContext.put("com.sun.xml.internal.ws.request.timeout", serviceReadTimeout);
requestContext.put("com.sun.xml.internal.ws.connect.timeout", serviceConnectionTimeout);
requestContext.put("com.sun.xml.ws.request.timeout", serviceReadTimeout);
requestContext.put("com.sun.xml.ws.connect.timeout", serviceConnectionTimeout);
requestContext.put("javax.xml.ws.client.receiveTimeout", serviceReadTimeout);
requestContext.put("javax.xml.ws.client.connectionTimeout", serviceConnectionTimeout);
}
this.resultSetReadTimeout = resultSetReadTimeout;
this.resultSetConnectionTimeout = resultSetConnectionTimeout;
this.resultSetPageSize = resultSetPageSize;
}
//------------------------ GETTERS -------------------------
public T getService() {
return service;
}
public long getResultSetReadTimeout() {
return resultSetReadTimeout;
}
public long getResultSetConnectionTimeout() {
return resultSetConnectionTimeout;
}
public int getResultSetPageSize() {
return resultSetPageSize;
}
}

View File

@ -0,0 +1,17 @@
package eu.dnetlib.dhp.wf.importer.facade;
/**
* ISLookup service facade.
*
* @author mhorst
*
*/
public interface ISLookupFacade {
/**
* Provides all profiles matching given query
* @param xPathQuery XPath query
*/
Iterable<String> searchProfile(String xPathQuery) throws ServiceFacadeException;
}

View File

@ -0,0 +1,17 @@
package eu.dnetlib.dhp.wf.importer.facade;
/**
* MDStore service facade.
*
* @author mhorst
*
*/
public interface MDStoreFacade {
/**
* Delivers all records for given MDStore identifier
* @param mdStoreId MDStore identifier
*/
Iterable<String> deliverMDRecords(String mdStoreId) throws ServiceFacadeException;
}

View File

@ -0,0 +1,19 @@
package eu.dnetlib.dhp.wf.importer.facade;
/**
* ObjectStore service facade.
*
* @author mhorst
*
*/
public interface ObjectStoreFacade {
/**
* Returns metadata records from given objectstore created in specified time range.
* @param objectStoreId object store identifier
* @param from from time in millis
* @param until until time in millis
*/
Iterable<String> deliverObjects(String objectStoreId, long from, long until) throws ServiceFacadeException;
}

View File

@ -0,0 +1,27 @@
package eu.dnetlib.dhp.wf.importer.facade;
/**
* Service facade generic exception.
*
* @author mhorst
*
*/
public class ServiceFacadeException extends Exception {
private static final long serialVersionUID = 0L;
//------------------------ CONSTRUCTORS -------------------
public ServiceFacadeException(String message, Throwable cause) {
super(message, cause);
}
public ServiceFacadeException(String message) {
super(message);
}
public ServiceFacadeException(Throwable cause) {
super(cause);
}
}

View File

@ -0,0 +1,20 @@
package eu.dnetlib.dhp.wf.importer.facade;
import java.util.Map;
/**
* Generic service facade factory. All implementations must be instantiable with no-argument construtor.
*
* @author mhorst
*
*/
public interface ServiceFacadeFactory<T> {
/**
* Creates service of given type configured with parameters.
*
* @param parameters service configuration
*
*/
T instantiate(Map<String, String> parameters);
}

View File

@ -0,0 +1,80 @@
package eu.dnetlib.dhp.wf.importer.facade;
import static eu.dnetlib.dhp.wf.importer.ImportWorkflowRuntimeParameters.IMPORT_FACADE_FACTORY_CLASS;
import java.lang.reflect.Constructor;
import java.util.Map;
import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.conf.Configuration;
import com.google.common.collect.ImmutableMap;
import eu.dnetlib.dhp.wf.importer.ImportWorkflowRuntimeParameters;
/**
* Service facade utility methods.
* @author mhorst
*
*/
public final class ServiceFacadeUtils {
//------------------------ CONSTRUCTORS -------------------
private ServiceFacadeUtils() {}
//------------------------ LOGIC --------------------------
/**
* Instantiates service based on provided parameters.
*
* Service factory class name is mandatory and has to be provided as {@value ImportWorkflowRuntimeParameters#IMPORT_FACADE_FACTORY_CLASS} parameter.
* Other parameters will be used by factory itself. Factory must be instantiable with no-argument construtor.
*
* @param parameters set of parameters required for service instantiation
*
*/
public static <T> T instantiate(Map<String, String> parameters) throws ServiceFacadeException {
String serviceFactoryClassName = parameters.get(IMPORT_FACADE_FACTORY_CLASS);
if (StringUtils.isBlank(serviceFactoryClassName)) {
throw new ServiceFacadeException("unknown service facade factory, no " + IMPORT_FACADE_FACTORY_CLASS + " parameter provided!");
}
try {
Class<?> clazz = Class.forName(serviceFactoryClassName);
Constructor<?> constructor = clazz.getConstructor();
@SuppressWarnings("unchecked")
ServiceFacadeFactory<T> serviceFactory = (ServiceFacadeFactory<T>) constructor.newInstance();
return serviceFactory.instantiate(parameters);
} catch (Exception e) {
throw new ServiceFacadeException("exception occurred while instantiating service by facade factory: " + IMPORT_FACADE_FACTORY_CLASS, e);
}
}
/**
* Instantiates service based on provided configuration.
*
* Service factory class name is mandatory and has to be provided as {@value ImportWorkflowRuntimeParameters#IMPORT_FACADE_FACTORY_CLASS} configuration entry.
* Other parameters will be used by factory itself. Factory must be instantiable with no-argument construtor.
*
* @param config set of configuration entries required for service instantiation
*/
public static <T> T instantiate(Configuration config) throws ServiceFacadeException {
return instantiate(buildParameters(config));
}
// ------------------------ PRIVATE --------------------------
/**
* Converts configuration entries into plain map.
*/
private static Map<String, String> buildParameters(Configuration config) {
ImmutableMap.Builder<String, String> builder = ImmutableMap.builder();
for (Map.Entry<String, String> entry : config) {
builder.put(entry);
}
return builder.build();
}
}

View File

@ -0,0 +1,55 @@
package eu.dnetlib.dhp.wf.importer.facade;
import java.util.Collections;
import org.apache.log4j.Logger;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpDocumentNotFoundException;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
/**
* WebService based database facade.
*
* @author mhorst
*
*/
public class WebServiceISLookupFacade extends AbstractResultSetAwareWebServiceFacade<ISLookUpService> implements ISLookupFacade {
private static final Logger log = Logger.getLogger(WebServiceISLookupFacade.class);
//------------------------ CONSTRUCTORS -------------------
/**
* @param serviceLocation database service location
* @param serviceReadTimeout service read timeout
* @param serviceConnectionTimeout service connection timeout
* @param resultSetReadTimeout result set providing database results read timeout
* @param resultSetConnectionTimeout result set connection timeout
* @param resultSetPageSize result set data chunk size
*/
public WebServiceISLookupFacade(String serviceLocation,
long serviceReadTimeout, long serviceConnectionTimeout,
long resultSetReadTimeout, long resultSetConnectionTimeout, int resultSetPageSize) {
super(ISLookUpService.class, serviceLocation,
serviceReadTimeout, serviceConnectionTimeout,
resultSetReadTimeout, resultSetConnectionTimeout, resultSetPageSize);
}
//------------------------ LOGIC --------------------------
@Override
public Iterable<String> searchProfile(String xPathQuery) throws ServiceFacadeException {
try {
return getService().quickSearchProfile(xPathQuery);
} catch (ISLookUpDocumentNotFoundException e) {
log.error("unable to find profile for query: " + xPathQuery, e);
return Collections.emptyList();
} catch (ISLookUpException e) {
throw new ServiceFacadeException("searching profiles in ISLookup failed with query '" + xPathQuery + "'", e);
}
}
}

View File

@ -0,0 +1,45 @@
package eu.dnetlib.dhp.wf.importer.facade;
import static eu.dnetlib.dhp.common.WorkflowRuntimeParameters.DNET_SERVICE_CLIENT_CONNECTION_TIMEOUT;
import static eu.dnetlib.dhp.common.WorkflowRuntimeParameters.DNET_SERVICE_CLIENT_READ_TIMEOUT;
import static eu.dnetlib.dhp.common.WorkflowRuntimeParameters.DNET_SERVICE_CONNECTION_TIMEOUT_DEFAULT_VALUE;
import static eu.dnetlib.dhp.common.WorkflowRuntimeParameters.DNET_SERVICE_READ_TIMEOUT_DEFAULT_VALUE;
import static eu.dnetlib.dhp.wf.importer.ImportWorkflowRuntimeParameters.IMPORT_ISLOOKUP_SERVICE_LOCATION;
import static eu.dnetlib.dhp.wf.importer.ImportWorkflowRuntimeParameters.IMPORT_RESULT_SET_CLIENT_CONNECTION_TIMEOUT;
import static eu.dnetlib.dhp.wf.importer.ImportWorkflowRuntimeParameters.IMPORT_RESULT_SET_CLIENT_READ_TIMEOUT;
import static eu.dnetlib.dhp.wf.importer.ImportWorkflowRuntimeParameters.IMPORT_RESULT_SET_PAGESIZE;
import static eu.dnetlib.dhp.wf.importer.ImportWorkflowRuntimeParameters.RESULTSET_CONNECTION_TIMEOUT_DEFAULT_VALUE;
import static eu.dnetlib.dhp.wf.importer.ImportWorkflowRuntimeParameters.RESULTSET_PAGESIZE_DEFAULT_VALUE;
import static eu.dnetlib.dhp.wf.importer.ImportWorkflowRuntimeParameters.RESULTSET_READ_TIMEOUT_DEFAULT_VALUE;
import java.util.Map;
import com.google.common.base.Preconditions;
import eu.dnetlib.dhp.common.WorkflowRuntimeParameters;
/**
* WebService Database service facade factory.
*
* @author mhorst
*
*/
public class WebServiceISLookupFacadeFactory implements ServiceFacadeFactory<ISLookupFacade> {
//------------------------ LOGIC --------------------------
@Override
public ISLookupFacade instantiate(Map<String, String> parameters) {
Preconditions.checkArgument(parameters.containsKey(IMPORT_ISLOOKUP_SERVICE_LOCATION),
"unknown ISLookup service location: no parameter provided: '%s'", IMPORT_ISLOOKUP_SERVICE_LOCATION);
return new WebServiceISLookupFacade(parameters.get(IMPORT_ISLOOKUP_SERVICE_LOCATION),
Long.parseLong(WorkflowRuntimeParameters.getParamValue(DNET_SERVICE_CLIENT_READ_TIMEOUT, DNET_SERVICE_READ_TIMEOUT_DEFAULT_VALUE, parameters)),
Long.parseLong(WorkflowRuntimeParameters.getParamValue(DNET_SERVICE_CLIENT_CONNECTION_TIMEOUT, DNET_SERVICE_CONNECTION_TIMEOUT_DEFAULT_VALUE, parameters)),
Long.parseLong(WorkflowRuntimeParameters.getParamValue(IMPORT_RESULT_SET_CLIENT_READ_TIMEOUT, RESULTSET_READ_TIMEOUT_DEFAULT_VALUE, parameters)),
Long.parseLong(WorkflowRuntimeParameters.getParamValue(IMPORT_RESULT_SET_CLIENT_CONNECTION_TIMEOUT, RESULTSET_CONNECTION_TIMEOUT_DEFAULT_VALUE, parameters)),
Integer.parseInt(WorkflowRuntimeParameters.getParamValue(IMPORT_RESULT_SET_PAGESIZE, RESULTSET_PAGESIZE_DEFAULT_VALUE, parameters)));
}
}

View File

@ -0,0 +1,52 @@
package eu.dnetlib.dhp.wf.importer.facade;
import javax.xml.ws.wsaddressing.W3CEndpointReference;
import eu.dnetlib.data.mdstore.MDStoreService;
import eu.dnetlib.data.mdstore.MDStoreServiceException;
import eu.dnetlib.enabling.resultset.client.ResultSetClientFactory;
import eu.dnetlib.enabling.tools.JaxwsServiceResolverImpl;
/**
* WebService based MDStore facade.
*
* @author mhorst
*
*/
public class WebServiceMDStoreFacade extends AbstractResultSetAwareWebServiceFacade<MDStoreService> implements MDStoreFacade {
//------------------------ CONSTRUCTORS -------------------
/**
* @param serviceLocation MDStore webservice location
* @param serviceReadTimeout service read timeout
* @param serviceConnectionTimeout service connection timeout
* @param resultSetReadTimeout resultset read timeout
* @param resultSetConnectionTimeout result set connection timeout
* @param resultSetPageSize resultset page size
*/
public WebServiceMDStoreFacade(String serviceLocation,
long serviceReadTimeout, long serviceConnectionTimeout,
long resultSetReadTimeout, long resultSetConnectionTimeout, int resultSetPageSize) {
super(MDStoreService.class, serviceLocation,
serviceReadTimeout, serviceConnectionTimeout,
resultSetReadTimeout, resultSetConnectionTimeout, resultSetPageSize);
}
//------------------------ LOGIC --------------------------
@Override
public Iterable<String> deliverMDRecords(String mdStoreId) throws ServiceFacadeException {
try {
W3CEndpointReference eprResult = getService().deliverMDRecords(mdStoreId, null, null, null);
ResultSetClientFactory rsFactory = new ResultSetClientFactory(
getResultSetPageSize(), getResultSetReadTimeout(), getResultSetConnectionTimeout());
rsFactory.setServiceResolver(new JaxwsServiceResolverImpl());
return rsFactory.getClient(eprResult);
} catch (MDStoreServiceException e) {
throw new ServiceFacadeException("delivering records for md store " + mdStoreId + " failed!", e);
}
}
}

Some files were not shown because too many files have changed in this diff Show More