1
0
Fork 0

merged master

This commit is contained in:
Sandro La Bruzzo 2020-04-20 14:04:40 +02:00
commit b2c872cb4d
241 changed files with 15238 additions and 10022 deletions

View File

@ -3,7 +3,6 @@ package eu.dnetlib.maven.plugin.properties;
import java.io.File; import java.io.File;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.List; import java.util.List;
import org.apache.commons.lang.ArrayUtils; import org.apache.commons.lang.ArrayUtils;
import org.apache.commons.lang.StringUtils; import org.apache.commons.lang.StringUtils;
import org.apache.maven.plugin.AbstractMojo; import org.apache.maven.plugin.AbstractMojo;
@ -12,60 +11,63 @@ import org.apache.maven.plugin.MojoFailureException;
/** /**
* Generates oozie properties which were not provided from commandline. * Generates oozie properties which were not provided from commandline.
* @author mhorst
* *
* @author mhorst
* @goal generate-properties * @goal generate-properties
*/ */
public class GenerateOoziePropertiesMojo extends AbstractMojo { public class GenerateOoziePropertiesMojo extends AbstractMojo {
public static final String PROPERTY_NAME_WF_SOURCE_DIR = "workflow.source.dir"; public static final String PROPERTY_NAME_WF_SOURCE_DIR = "workflow.source.dir";
public static final String PROPERTY_NAME_SANDBOX_NAME = "sandboxName"; public static final String PROPERTY_NAME_SANDBOX_NAME = "sandboxName";
private final String[] limiters = {"dhp", "dnetlib", "eu"}; private final String[] limiters = {"dhp", "dnetlib", "eu"};
@Override @Override
public void execute() throws MojoExecutionException, MojoFailureException { public void execute() throws MojoExecutionException, MojoFailureException {
if (System.getProperties().containsKey(PROPERTY_NAME_WF_SOURCE_DIR) && if (System.getProperties().containsKey(PROPERTY_NAME_WF_SOURCE_DIR)
!System.getProperties().containsKey(PROPERTY_NAME_SANDBOX_NAME)) { && !System.getProperties().containsKey(PROPERTY_NAME_SANDBOX_NAME)) {
String generatedSandboxName = generateSandboxName(System.getProperties().getProperty( String generatedSandboxName =
PROPERTY_NAME_WF_SOURCE_DIR)); generateSandboxName(
if (generatedSandboxName!=null) { System.getProperties().getProperty(PROPERTY_NAME_WF_SOURCE_DIR));
System.getProperties().setProperty(PROPERTY_NAME_SANDBOX_NAME, if (generatedSandboxName != null) {
generatedSandboxName); System.getProperties()
} else { .setProperty(PROPERTY_NAME_SANDBOX_NAME, generatedSandboxName);
System.out.println("unable to generate sandbox name from path: " + } else {
System.getProperties().getProperty(PROPERTY_NAME_WF_SOURCE_DIR)); System.out.println(
} "unable to generate sandbox name from path: "
} + System.getProperties().getProperty(PROPERTY_NAME_WF_SOURCE_DIR));
}
}
} }
/** /**
* Generates sandbox name from workflow source directory. * Generates sandbox name from workflow source directory.
*
* @param wfSourceDir * @param wfSourceDir
* @return generated sandbox name * @return generated sandbox name
*/ */
private String generateSandboxName(String wfSourceDir) { private String generateSandboxName(String wfSourceDir) {
// utilize all dir names until finding one of the limiters // utilize all dir names until finding one of the limiters
List<String> sandboxNameParts = new ArrayList<String>(); List<String> sandboxNameParts = new ArrayList<String>();
String[] tokens = StringUtils.split(wfSourceDir, File.separatorChar); String[] tokens = StringUtils.split(wfSourceDir, File.separatorChar);
ArrayUtils.reverse(tokens); ArrayUtils.reverse(tokens);
if (tokens.length>0) { if (tokens.length > 0) {
for (String token : tokens) { for (String token : tokens) {
for (String limiter : limiters) { for (String limiter : limiters) {
if (limiter.equals(token)) { if (limiter.equals(token)) {
return sandboxNameParts.size()>0? return sandboxNameParts.size() > 0
StringUtils.join(sandboxNameParts.toArray()):null; ? StringUtils.join(sandboxNameParts.toArray())
} : null;
} }
if (sandboxNameParts.size()>0) { }
sandboxNameParts.add(0, File.separator); if (sandboxNameParts.size() > 0) {
} sandboxNameParts.add(0, File.separator);
sandboxNameParts.add(0, token); }
} sandboxNameParts.add(0, token);
return StringUtils.join(sandboxNameParts.toArray()); }
} else { return StringUtils.join(sandboxNameParts.toArray());
return null; } else {
} return null;
}
} }
} }

View File

@ -1,19 +1,17 @@
/** /**
* Licensed under the Educational Community License, Version 2.0 (the "License"); you may not use
* this file except in compliance with the License. You may obtain a copy of the License at
* *
* Licensed under the Educational Community License, Version 2.0 (the "License"); * <p>http://www.opensource.org/licenses/ecl2.php
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* *
* http://www.opensource.org/licenses/ecl2.php * <p>Unless required by applicable law or agreed to in writing, software distributed under the
* * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
* Unless required by applicable law or agreed to in writing, software * express or implied. See the License for the specific language governing permissions and
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License. * limitations under the License.
*/ */
package eu.dnetlib.maven.plugin.properties; package eu.dnetlib.maven.plugin.properties;
import edu.umd.cs.findbugs.annotations.SuppressFBWarnings;
import java.io.File; import java.io.File;
import java.io.FileInputStream; import java.io.FileInputStream;
import java.io.IOException; import java.io.IOException;
@ -26,7 +24,6 @@ import java.util.List;
import java.util.Map.Entry; import java.util.Map.Entry;
import java.util.Properties; import java.util.Properties;
import java.util.Set; import java.util.Set;
import org.apache.commons.io.FileUtils; import org.apache.commons.io.FileUtils;
import org.apache.commons.io.IOUtils; import org.apache.commons.io.IOUtils;
import org.apache.commons.lang.StringUtils; import org.apache.commons.lang.StringUtils;
@ -38,28 +35,23 @@ import org.springframework.core.io.DefaultResourceLoader;
import org.springframework.core.io.Resource; import org.springframework.core.io.Resource;
import org.springframework.core.io.ResourceLoader; import org.springframework.core.io.ResourceLoader;
import edu.umd.cs.findbugs.annotations.SuppressFBWarnings;
/** /**
* Writes project properties for the keys listed in specified properties files. * Writes project properties for the keys listed in specified properties files. Based on:
* Based on:
* http://site.kuali.org/maven/plugins/properties-maven-plugin/1.3.2/write-project-properties-mojo.html * http://site.kuali.org/maven/plugins/properties-maven-plugin/1.3.2/write-project-properties-mojo.html
*
* @author mhorst * @author mhorst
* @goal write-project-properties * @goal write-project-properties
*/ */
public class WritePredefinedProjectProperties extends AbstractMojo { public class WritePredefinedProjectProperties extends AbstractMojo {
private static final String CR = "\r"; private static final String CR = "\r";
private static final String LF = "\n"; private static final String LF = "\n";
private static final String TAB = "\t"; private static final String TAB = "\t";
protected static final String PROPERTY_PREFIX_ENV = "env."; protected static final String PROPERTY_PREFIX_ENV = "env.";
private static final String ENCODING_UTF8 = "utf8"; private static final String ENCODING_UTF8 = "utf8";
/** /** @parameter property="properties.includePropertyKeysFromFiles" */
* @parameter property="properties.includePropertyKeysFromFiles" private String[] includePropertyKeysFromFiles;
*/
private String[] includePropertyKeysFromFiles;
/** /**
* @parameter default-value="${project}" * @parameter default-value="${project}"
@ -72,37 +64,39 @@ public class WritePredefinedProjectProperties extends AbstractMojo {
* The file that properties will be written to * The file that properties will be written to
* *
* @parameter property="properties.outputFile" * @parameter property="properties.outputFile"
* default-value="${project.build.directory}/properties/project.properties"; * default-value="${project.build.directory}/properties/project.properties";
* @required * @required
*/ */
protected File outputFile; protected File outputFile;
/** /**
* If true, the plugin will silently ignore any non-existent properties files, and the build will continue * If true, the plugin will silently ignore any non-existent properties files, and the build
* will continue
* *
* @parameter property="properties.quiet" default-value="true" * @parameter property="properties.quiet" default-value="true"
*/ */
private boolean quiet; private boolean quiet;
/** /**
* Comma separated list of characters to escape when writing property values. cr=carriage return, lf=linefeed, * Comma separated list of characters to escape when writing property values. cr=carriage
* tab=tab. Any other values are taken literally. * return, lf=linefeed, tab=tab. Any other values are taken literally.
* *
* @parameter default-value="cr,lf,tab" property="properties.escapeChars" * @parameter default-value="cr,lf,tab" property="properties.escapeChars"
*/ */
private String escapeChars; private String escapeChars;
/** /**
* If true, the plugin will include system properties when writing the properties file. System properties override * If true, the plugin will include system properties when writing the properties file. System
* both environment variables and project properties. * properties override both environment variables and project properties.
* *
* @parameter default-value="false" property="properties.includeSystemProperties" * @parameter default-value="false" property="properties.includeSystemProperties"
*/ */
private boolean includeSystemProperties; private boolean includeSystemProperties;
/** /**
* If true, the plugin will include environment variables when writing the properties file. Environment variables * If true, the plugin will include environment variables when writing the properties file.
* are prefixed with "env". Environment variables override project properties. * Environment variables are prefixed with "env". Environment variables override project
* properties.
* *
* @parameter default-value="false" property="properties.includeEnvironmentVariables" * @parameter default-value="false" property="properties.includeEnvironmentVariables"
*/ */
@ -116,8 +110,8 @@ public class WritePredefinedProjectProperties extends AbstractMojo {
private String exclude; private String exclude;
/** /**
* Comma separated set of properties to write to the properties file. If provided, only the properties matching * Comma separated set of properties to write to the properties file. If provided, only the
* those supplied here will be written to the properties file. * properties matching those supplied here will be written to the properties file.
* *
* @parameter property="properties.include" * @parameter property="properties.include"
*/ */
@ -127,7 +121,7 @@ public class WritePredefinedProjectProperties extends AbstractMojo {
* @see org.apache.maven.plugin.AbstractMojo#execute() * @see org.apache.maven.plugin.AbstractMojo#execute()
*/ */
@Override @Override
@SuppressFBWarnings({"NP_UNWRITTEN_FIELD","UWF_UNWRITTEN_FIELD"}) @SuppressFBWarnings({"NP_UNWRITTEN_FIELD", "UWF_UNWRITTEN_FIELD"})
public void execute() throws MojoExecutionException, MojoFailureException { public void execute() throws MojoExecutionException, MojoFailureException {
Properties properties = new Properties(); Properties properties = new Properties();
// Add project properties // Add project properties
@ -149,10 +143,11 @@ public class WritePredefinedProjectProperties extends AbstractMojo {
getLog().info("Creating " + outputFile); getLog().info("Creating " + outputFile);
writeProperties(outputFile, comment, properties, escapeTokens); writeProperties(outputFile, comment, properties, escapeTokens);
} }
/** /**
* Provides environment variables. * Provides environment variables.
*
* @return environment variables * @return environment variables
*/ */
protected static Properties getEnvironmentVariables() { protected static Properties getEnvironmentVariables() {
@ -165,32 +160,34 @@ public class WritePredefinedProjectProperties extends AbstractMojo {
/** /**
* Removes properties which should not be written. * Removes properties which should not be written.
*
* @param properties * @param properties
* @param omitCSV * @param omitCSV
* @param includeCSV * @param includeCSV
* @throws MojoExecutionException * @throws MojoExecutionException
*/ */
protected void trim(Properties properties, String omitCSV, String includeCSV) throws MojoExecutionException { protected void trim(Properties properties, String omitCSV, String includeCSV)
throws MojoExecutionException {
List<String> omitKeys = getListFromCSV(omitCSV); List<String> omitKeys = getListFromCSV(omitCSV);
for (String key : omitKeys) { for (String key : omitKeys) {
properties.remove(key); properties.remove(key);
} }
List<String> includeKeys = getListFromCSV(includeCSV); List<String> includeKeys = getListFromCSV(includeCSV);
// mh: including keys from predefined properties // mh: including keys from predefined properties
if (includePropertyKeysFromFiles!=null && includePropertyKeysFromFiles.length>0) { if (includePropertyKeysFromFiles != null && includePropertyKeysFromFiles.length > 0) {
for (String currentIncludeLoc : includePropertyKeysFromFiles) { for (String currentIncludeLoc : includePropertyKeysFromFiles) {
if (validate(currentIncludeLoc)) { if (validate(currentIncludeLoc)) {
Properties p = getProperties(currentIncludeLoc); Properties p = getProperties(currentIncludeLoc);
for (String key : p.stringPropertyNames()) { for (String key : p.stringPropertyNames()) {
includeKeys.add(key); includeKeys.add(key);
} }
} }
} }
} }
if (includeKeys!=null && !includeKeys.isEmpty()) { if (includeKeys != null && !includeKeys.isEmpty()) {
// removing only when include keys provided // removing only when include keys provided
Set<String> keys = properties.stringPropertyNames(); Set<String> keys = properties.stringPropertyNames();
for (String key : keys) { for (String key : keys) {
if (!includeKeys.contains(key)) { if (!includeKeys.contains(key)) {
properties.remove(key); properties.remove(key);
@ -201,6 +198,7 @@ public class WritePredefinedProjectProperties extends AbstractMojo {
/** /**
* Checks whether file exists. * Checks whether file exists.
*
* @param location * @param location
* @return true when exists, false otherwise. * @return true when exists, false otherwise.
*/ */
@ -219,6 +217,7 @@ public class WritePredefinedProjectProperties extends AbstractMojo {
/** /**
* Validates resource location. * Validates resource location.
*
* @param location * @param location
* @return true when valid, false otherwise * @return true when valid, false otherwise
* @throws MojoExecutionException * @throws MojoExecutionException
@ -238,6 +237,7 @@ public class WritePredefinedProjectProperties extends AbstractMojo {
/** /**
* Provides input stream. * Provides input stream.
*
* @param location * @param location
* @return input stream * @return input stream
* @throws IOException * @throws IOException
@ -254,6 +254,7 @@ public class WritePredefinedProjectProperties extends AbstractMojo {
/** /**
* Creates properties for given location. * Creates properties for given location.
*
* @param location * @param location
* @return properties for given location * @return properties for given location
* @throws MojoExecutionException * @throws MojoExecutionException
@ -278,6 +279,7 @@ public class WritePredefinedProjectProperties extends AbstractMojo {
/** /**
* Provides escape characters. * Provides escape characters.
*
* @param escapeChars * @param escapeChars
* @return escape characters * @return escape characters
*/ */
@ -293,6 +295,7 @@ public class WritePredefinedProjectProperties extends AbstractMojo {
/** /**
* Provides real token. * Provides real token.
*
* @param token * @param token
* @return real token * @return real token
*/ */
@ -310,6 +313,7 @@ public class WritePredefinedProjectProperties extends AbstractMojo {
/** /**
* Returns content. * Returns content.
*
* @param comment * @param comment
* @param properties * @param properties
* @param escapeTokens * @param escapeTokens
@ -332,13 +336,15 @@ public class WritePredefinedProjectProperties extends AbstractMojo {
/** /**
* Writes properties to given file. * Writes properties to given file.
*
* @param file * @param file
* @param comment * @param comment
* @param properties * @param properties
* @param escapeTokens * @param escapeTokens
* @throws MojoExecutionException * @throws MojoExecutionException
*/ */
protected void writeProperties(File file, String comment, Properties properties, List<String> escapeTokens) protected void writeProperties(
File file, String comment, Properties properties, List<String> escapeTokens)
throws MojoExecutionException { throws MojoExecutionException {
try { try {
String content = getContent(comment, properties, escapeTokens); String content = getContent(comment, properties, escapeTokens);
@ -350,12 +356,13 @@ public class WritePredefinedProjectProperties extends AbstractMojo {
/** /**
* Escapes characters. * Escapes characters.
*
* @param s * @param s
* @param escapeChars * @param escapeChars
* @return * @return
*/ */
protected String escape(String s, List<String> escapeChars) { protected String escape(String s, List<String> escapeChars) {
String result = s; String result = s;
for (String escapeChar : escapeChars) { for (String escapeChar : escapeChars) {
result = result.replace(escapeChar, getReplacementToken(escapeChar)); result = result.replace(escapeChar, getReplacementToken(escapeChar));
} }
@ -364,6 +371,7 @@ public class WritePredefinedProjectProperties extends AbstractMojo {
/** /**
* Provides replacement token. * Provides replacement token.
*
* @param escapeChar * @param escapeChar
* @return replacement token * @return replacement token
*/ */
@ -380,21 +388,22 @@ public class WritePredefinedProjectProperties extends AbstractMojo {
} }
/** /**
* Returns list from csv. * Returns list from csv.
* @param csv *
* @return list of values generated from CSV * @param csv
*/ * @return list of values generated from CSV
protected static final List<String> getListFromCSV(String csv) { */
if (StringUtils.isBlank(csv)) { protected static final List<String> getListFromCSV(String csv) {
return new ArrayList<String>(); if (StringUtils.isBlank(csv)) {
} return new ArrayList<String>();
List<String> list = new ArrayList<String>(); }
String[] tokens = StringUtils.split(csv, ","); List<String> list = new ArrayList<String>();
for (String token : tokens) { String[] tokens = StringUtils.split(csv, ",");
list.add(token.trim()); for (String token : tokens) {
} list.add(token.trim());
return list; }
} return list;
}
public void setIncludeSystemProperties(boolean includeSystemProperties) { public void setIncludeSystemProperties(boolean includeSystemProperties) {
this.includeSystemProperties = includeSystemProperties; this.includeSystemProperties = includeSystemProperties;
@ -420,17 +429,16 @@ public class WritePredefinedProjectProperties extends AbstractMojo {
this.quiet = quiet; this.quiet = quiet;
} }
/** /**
* Sets property files for which keys properties should be included. * Sets property files for which keys properties should be included.
* @param includePropertyKeysFromFiles *
*/ * @param includePropertyKeysFromFiles
public void setIncludePropertyKeysFromFiles( */
String[] includePropertyKeysFromFiles) { public void setIncludePropertyKeysFromFiles(String[] includePropertyKeysFromFiles) {
if (includePropertyKeysFromFiles!=null) { if (includePropertyKeysFromFiles != null) {
this.includePropertyKeysFromFiles = Arrays.copyOf( this.includePropertyKeysFromFiles =
includePropertyKeysFromFiles, Arrays.copyOf(
includePropertyKeysFromFiles.length); includePropertyKeysFromFiles, includePropertyKeysFromFiles.length);
} }
} }
} }

View File

@ -1,16 +1,13 @@
package eu.dnetlib.maven.plugin.properties; package eu.dnetlib.maven.plugin.properties;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import static eu.dnetlib.maven.plugin.properties.GenerateOoziePropertiesMojo.PROPERTY_NAME_SANDBOX_NAME; import static eu.dnetlib.maven.plugin.properties.GenerateOoziePropertiesMojo.PROPERTY_NAME_SANDBOX_NAME;
import static eu.dnetlib.maven.plugin.properties.GenerateOoziePropertiesMojo.PROPERTY_NAME_WF_SOURCE_DIR; import static eu.dnetlib.maven.plugin.properties.GenerateOoziePropertiesMojo.PROPERTY_NAME_WF_SOURCE_DIR;
import static org.junit.jupiter.api.Assertions.*; import static org.junit.jupiter.api.Assertions.*;
/** import org.junit.jupiter.api.BeforeEach;
* @author mhorst, claudio.atzori import org.junit.jupiter.api.Test;
*
*/ /** @author mhorst, claudio.atzori */
public class GenerateOoziePropertiesMojoTest { public class GenerateOoziePropertiesMojoTest {
private GenerateOoziePropertiesMojo mojo = new GenerateOoziePropertiesMojo(); private GenerateOoziePropertiesMojo mojo = new GenerateOoziePropertiesMojo();
@ -27,7 +24,7 @@ public class GenerateOoziePropertiesMojoTest {
mojo.execute(); mojo.execute();
// assert // assert
assertNull(System.getProperty(PROPERTY_NAME_SANDBOX_NAME)); assertNull(System.getProperty(PROPERTY_NAME_SANDBOX_NAME));
} }
@Test @Test
@ -96,5 +93,4 @@ public class GenerateOoziePropertiesMojoTest {
// assert // assert
assertEquals("wf/transformers", System.getProperty(PROPERTY_NAME_SANDBOX_NAME)); assertEquals("wf/transformers", System.getProperty(PROPERTY_NAME_SANDBOX_NAME));
} }
} }

View File

@ -1,5 +1,12 @@
package eu.dnetlib.maven.plugin.properties; package eu.dnetlib.maven.plugin.properties;
import static eu.dnetlib.maven.plugin.properties.WritePredefinedProjectProperties.PROPERTY_PREFIX_ENV;
import static org.junit.jupiter.api.Assertions.*;
import static org.mockito.Mockito.doReturn;
import static org.mockito.Mockito.lenient;
import java.io.*;
import java.util.Properties;
import org.apache.maven.plugin.MojoExecutionException; import org.apache.maven.plugin.MojoExecutionException;
import org.apache.maven.project.MavenProject; import org.apache.maven.project.MavenProject;
import org.junit.jupiter.api.*; import org.junit.jupiter.api.*;
@ -9,23 +16,11 @@ import org.mockito.Mock;
import org.mockito.MockitoAnnotations; import org.mockito.MockitoAnnotations;
import org.mockito.junit.jupiter.MockitoExtension; import org.mockito.junit.jupiter.MockitoExtension;
import java.io.*; /** @author mhorst, claudio.atzori */
import java.util.Properties;
import static eu.dnetlib.maven.plugin.properties.WritePredefinedProjectProperties.PROPERTY_PREFIX_ENV;
import static org.junit.jupiter.api.Assertions.*;
import static org.mockito.Mockito.doReturn;
import static org.mockito.Mockito.lenient;
/**
* @author mhorst, claudio.atzori
*
*/
@ExtendWith(MockitoExtension.class) @ExtendWith(MockitoExtension.class)
public class WritePredefinedProjectPropertiesTest { public class WritePredefinedProjectPropertiesTest {
@Mock @Mock private MavenProject mavenProject;
private MavenProject mavenProject;
private WritePredefinedProjectProperties mojo; private WritePredefinedProjectProperties mojo;
@ -86,7 +81,8 @@ public class WritePredefinedProjectPropertiesTest {
} }
@Test @Test
public void testExecuteWithProjectPropertiesExclusion(@TempDir File testFolder) throws Exception { public void testExecuteWithProjectPropertiesExclusion(@TempDir File testFolder)
throws Exception {
// given // given
String key = "projectPropertyKey"; String key = "projectPropertyKey";
String value = "projectPropertyValue"; String value = "projectPropertyValue";
@ -110,7 +106,8 @@ public class WritePredefinedProjectPropertiesTest {
} }
@Test @Test
public void testExecuteWithProjectPropertiesInclusion(@TempDir File testFolder) throws Exception { public void testExecuteWithProjectPropertiesInclusion(@TempDir File testFolder)
throws Exception {
// given // given
String key = "projectPropertyKey"; String key = "projectPropertyKey";
String value = "projectPropertyValue"; String value = "projectPropertyValue";
@ -134,7 +131,8 @@ public class WritePredefinedProjectPropertiesTest {
} }
@Test @Test
public void testExecuteIncludingPropertyKeysFromFile(@TempDir File testFolder) throws Exception { public void testExecuteIncludingPropertyKeysFromFile(@TempDir File testFolder)
throws Exception {
// given // given
String key = "projectPropertyKey"; String key = "projectPropertyKey";
String value = "projectPropertyValue"; String value = "projectPropertyValue";
@ -150,7 +148,8 @@ public class WritePredefinedProjectPropertiesTest {
includedProperties.setProperty(includedKey, "irrelevantValue"); includedProperties.setProperty(includedKey, "irrelevantValue");
includedProperties.store(new FileWriter(includedPropertiesFile), null); includedProperties.store(new FileWriter(includedPropertiesFile), null);
mojo.setIncludePropertyKeysFromFiles(new String[] {includedPropertiesFile.getAbsolutePath()}); mojo.setIncludePropertyKeysFromFiles(
new String[] {includedPropertiesFile.getAbsolutePath()});
// execute // execute
mojo.execute(); mojo.execute();
@ -164,7 +163,8 @@ public class WritePredefinedProjectPropertiesTest {
} }
@Test @Test
public void testExecuteIncludingPropertyKeysFromClasspathResource(@TempDir File testFolder) throws Exception { public void testExecuteIncludingPropertyKeysFromClasspathResource(@TempDir File testFolder)
throws Exception {
// given // given
String key = "projectPropertyKey"; String key = "projectPropertyKey";
String value = "projectPropertyValue"; String value = "projectPropertyValue";
@ -175,7 +175,8 @@ public class WritePredefinedProjectPropertiesTest {
projectProperties.setProperty(includedKey, includedValue); projectProperties.setProperty(includedKey, includedValue);
doReturn(projectProperties).when(mavenProject).getProperties(); doReturn(projectProperties).when(mavenProject).getProperties();
mojo.setIncludePropertyKeysFromFiles(new String[] {"/eu/dnetlib/maven/plugin/properties/included.properties"}); mojo.setIncludePropertyKeysFromFiles(
new String[] {"/eu/dnetlib/maven/plugin/properties/included.properties"});
// execute // execute
mojo.execute(); mojo.execute();
@ -207,7 +208,8 @@ public class WritePredefinedProjectPropertiesTest {
} }
@Test @Test
public void testExecuteIncludingPropertyKeysFromXmlFile(@TempDir File testFolder) throws Exception { public void testExecuteIncludingPropertyKeysFromXmlFile(@TempDir File testFolder)
throws Exception {
// given // given
String key = "projectPropertyKey"; String key = "projectPropertyKey";
String value = "projectPropertyValue"; String value = "projectPropertyValue";
@ -223,7 +225,8 @@ public class WritePredefinedProjectPropertiesTest {
includedProperties.setProperty(includedKey, "irrelevantValue"); includedProperties.setProperty(includedKey, "irrelevantValue");
includedProperties.storeToXML(new FileOutputStream(includedPropertiesFile), null); includedProperties.storeToXML(new FileOutputStream(includedPropertiesFile), null);
mojo.setIncludePropertyKeysFromFiles(new String[] {includedPropertiesFile.getAbsolutePath()}); mojo.setIncludePropertyKeysFromFiles(
new String[] {includedPropertiesFile.getAbsolutePath()});
// execute // execute
mojo.execute(); mojo.execute();
@ -237,7 +240,8 @@ public class WritePredefinedProjectPropertiesTest {
} }
@Test @Test
public void testExecuteIncludingPropertyKeysFromInvalidXmlFile(@TempDir File testFolder) throws Exception { public void testExecuteIncludingPropertyKeysFromInvalidXmlFile(@TempDir File testFolder)
throws Exception {
// given // given
String key = "projectPropertyKey"; String key = "projectPropertyKey";
String value = "projectPropertyValue"; String value = "projectPropertyValue";
@ -253,7 +257,8 @@ public class WritePredefinedProjectPropertiesTest {
includedProperties.setProperty(includedKey, "irrelevantValue"); includedProperties.setProperty(includedKey, "irrelevantValue");
includedProperties.store(new FileOutputStream(includedPropertiesFile), null); includedProperties.store(new FileOutputStream(includedPropertiesFile), null);
mojo.setIncludePropertyKeysFromFiles(new String[] {includedPropertiesFile.getAbsolutePath()}); mojo.setIncludePropertyKeysFromFiles(
new String[] {includedPropertiesFile.getAbsolutePath()});
// execute // execute
Assertions.assertThrows(MojoExecutionException.class, () -> mojo.execute()); Assertions.assertThrows(MojoExecutionException.class, () -> mojo.execute());
@ -296,7 +301,7 @@ public class WritePredefinedProjectPropertiesTest {
Properties storedProperties = getStoredProperties(testFolder); Properties storedProperties = getStoredProperties(testFolder);
assertTrue(storedProperties.size() > 0); assertTrue(storedProperties.size() > 0);
for (Object currentKey : storedProperties.keySet()) { for (Object currentKey : storedProperties.keySet()) {
assertTrue(((String)currentKey).startsWith(PROPERTY_PREFIX_ENV)); assertTrue(((String) currentKey).startsWith(PROPERTY_PREFIX_ENV));
} }
} }
@ -320,7 +325,8 @@ public class WritePredefinedProjectPropertiesTest {
} }
@Test @Test
public void testExecuteWithSystemPropertiesAndEscapeChars(@TempDir File testFolder) throws Exception { public void testExecuteWithSystemPropertiesAndEscapeChars(@TempDir File testFolder)
throws Exception {
// given // given
String key = "systemPropertyKey "; String key = "systemPropertyKey ";
String value = "systemPropertyValue"; String value = "systemPropertyValue";
@ -347,7 +353,8 @@ public class WritePredefinedProjectPropertiesTest {
return new File(testFolder, "test.properties"); return new File(testFolder, "test.properties");
} }
private Properties getStoredProperties(File testFolder) throws FileNotFoundException, IOException { private Properties getStoredProperties(File testFolder)
throws FileNotFoundException, IOException {
Properties properties = new Properties(); Properties properties = new Properties();
properties.load(new FileInputStream(getPropertiesFileLocation(testFolder))); properties.load(new FileInputStream(getPropertiesFileLocation(testFolder)));
return properties; return properties;

View File

@ -3,47 +3,45 @@ package eu.dnetlib.collector.worker.model;
import java.util.HashMap; import java.util.HashMap;
import java.util.Map; import java.util.Map;
public class ApiDescriptor { public class ApiDescriptor {
private String id; private String id;
private String baseUrl; private String baseUrl;
private String protocol; private String protocol;
private Map<String, String> params = new HashMap<>(); private Map<String, String> params = new HashMap<>();
public String getBaseUrl() { public String getBaseUrl() {
return baseUrl; return baseUrl;
} }
public void setBaseUrl(final String baseUrl) { public void setBaseUrl(final String baseUrl) {
this.baseUrl = baseUrl; this.baseUrl = baseUrl;
} }
public String getId() { public String getId() {
return id; return id;
} }
public void setId(final String id) { public void setId(final String id) {
this.id = id; this.id = id;
} }
public Map<String, String> getParams() { public Map<String, String> getParams() {
return params; return params;
} }
public void setParams(final HashMap<String, String> params) { public void setParams(final HashMap<String, String> params) {
this.params = params; this.params = params;
} }
public String getProtocol() { public String getProtocol() {
return protocol; return protocol;
} }
public void setProtocol(final String protocol) {
this.protocol = protocol;
}
public void setProtocol(final String protocol) {
this.protocol = protocol;
}
} }

View File

@ -2,7 +2,6 @@ package eu.dnetlib.data.mdstore.manager.common.model;
import java.io.Serializable; import java.io.Serializable;
import java.util.UUID; import java.util.UUID;
import javax.persistence.Column; import javax.persistence.Column;
import javax.persistence.Entity; import javax.persistence.Entity;
import javax.persistence.Id; import javax.persistence.Id;
@ -12,108 +11,107 @@ import javax.persistence.Table;
@Table(name = "mdstores") @Table(name = "mdstores")
public class MDStore implements Serializable { public class MDStore implements Serializable {
/** /** */
* private static final long serialVersionUID = 3160530489149700055L;
*/
private static final long serialVersionUID = 3160530489149700055L;
@Id @Id
@Column(name = "id") @Column(name = "id")
private String id; private String id;
@Column(name = "format") @Column(name = "format")
private String format; private String format;
@Column(name = "layout") @Column(name = "layout")
private String layout; private String layout;
@Column(name = "interpretation") @Column(name = "interpretation")
private String interpretation; private String interpretation;
@Column(name = "datasource_name") @Column(name = "datasource_name")
private String datasourceName; private String datasourceName;
@Column(name = "datasource_id") @Column(name = "datasource_id")
private String datasourceId; private String datasourceId;
@Column(name = "api_id") @Column(name = "api_id")
private String apiId; private String apiId;
public String getId() { public String getId() {
return id; return id;
} }
public void setId(final String id) { public void setId(final String id) {
this.id = id; this.id = id;
} }
public String getFormat() { public String getFormat() {
return format; return format;
} }
public void setFormat(final String format) { public void setFormat(final String format) {
this.format = format; this.format = format;
} }
public String getLayout() { public String getLayout() {
return layout; return layout;
} }
public void setLayout(final String layout) { public void setLayout(final String layout) {
this.layout = layout; this.layout = layout;
} }
public String getInterpretation() { public String getInterpretation() {
return interpretation; return interpretation;
} }
public void setInterpretation(final String interpretation) { public void setInterpretation(final String interpretation) {
this.interpretation = interpretation; this.interpretation = interpretation;
} }
public String getDatasourceName() { public String getDatasourceName() {
return datasourceName; return datasourceName;
} }
public void setDatasourceName(final String datasourceName) { public void setDatasourceName(final String datasourceName) {
this.datasourceName = datasourceName; this.datasourceName = datasourceName;
} }
public String getDatasourceId() { public String getDatasourceId() {
return datasourceId; return datasourceId;
} }
public void setDatasourceId(final String datasourceId) { public void setDatasourceId(final String datasourceId) {
this.datasourceId = datasourceId; this.datasourceId = datasourceId;
} }
public String getApiId() { public String getApiId() {
return apiId; return apiId;
} }
public void setApiId(final String apiId) { public void setApiId(final String apiId) {
this.apiId = apiId; this.apiId = apiId;
} }
public static MDStore newInstance(final String format, final String layout, final String interpretation) { public static MDStore newInstance(
return newInstance(format, layout, interpretation, null, null, null); final String format, final String layout, final String interpretation) {
} return newInstance(format, layout, interpretation, null, null, null);
}
public static MDStore newInstance(final String format,
final String layout,
final String interpretation,
final String dsName,
final String dsId,
final String apiId) {
final MDStore md = new MDStore();
md.setId("md-" + UUID.randomUUID());
md.setFormat(format);
md.setLayout(layout);
md.setInterpretation(interpretation);
md.setDatasourceName(dsName);
md.setDatasourceId(dsId);
md.setApiId(apiId);
return md;
}
public static MDStore newInstance(
final String format,
final String layout,
final String interpretation,
final String dsName,
final String dsId,
final String apiId) {
final MDStore md = new MDStore();
md.setId("md-" + UUID.randomUUID());
md.setFormat(format);
md.setLayout(layout);
md.setInterpretation(interpretation);
md.setDatasourceName(dsName);
md.setDatasourceId(dsId);
md.setApiId(apiId);
return md;
}
} }

View File

@ -1,7 +1,6 @@
package eu.dnetlib.data.mdstore.manager.common.model; package eu.dnetlib.data.mdstore.manager.common.model;
import java.io.Serializable; import java.io.Serializable;
import javax.persistence.Column; import javax.persistence.Column;
import javax.persistence.Entity; import javax.persistence.Entity;
import javax.persistence.Id; import javax.persistence.Id;
@ -11,42 +10,40 @@ import javax.persistence.Table;
@Table(name = "mdstore_current_versions") @Table(name = "mdstore_current_versions")
public class MDStoreCurrentVersion implements Serializable { public class MDStoreCurrentVersion implements Serializable {
/** /** */
* private static final long serialVersionUID = -4757725888593745773L;
*/
private static final long serialVersionUID = -4757725888593745773L;
@Id @Id
@Column(name = "mdstore") @Column(name = "mdstore")
private String mdstore; private String mdstore;
@Column(name = "current_version") @Column(name = "current_version")
private String currentVersion; private String currentVersion;
public String getMdstore() { public String getMdstore() {
return mdstore; return mdstore;
} }
public void setMdstore(final String mdstore) { public void setMdstore(final String mdstore) {
this.mdstore = mdstore; this.mdstore = mdstore;
} }
public String getCurrentVersion() { public String getCurrentVersion() {
return currentVersion; return currentVersion;
} }
public void setCurrentVersion(final String currentVersion) { public void setCurrentVersion(final String currentVersion) {
this.currentVersion = currentVersion; this.currentVersion = currentVersion;
} }
public static MDStoreCurrentVersion newInstance(final String mdId, final String versionId) { public static MDStoreCurrentVersion newInstance(final String mdId, final String versionId) {
final MDStoreCurrentVersion cv = new MDStoreCurrentVersion(); final MDStoreCurrentVersion cv = new MDStoreCurrentVersion();
cv.setMdstore(mdId); cv.setMdstore(mdId);
cv.setCurrentVersion(versionId); cv.setCurrentVersion(versionId);
return cv; return cv;
} }
public static MDStoreCurrentVersion newInstance(final MDStoreVersion v) { public static MDStoreCurrentVersion newInstance(final MDStoreVersion v) {
return newInstance(v.getMdstore(), v.getId()); return newInstance(v.getMdstore(), v.getId());
} }
} }

View File

@ -2,7 +2,6 @@ package eu.dnetlib.data.mdstore.manager.common.model;
import java.io.Serializable; import java.io.Serializable;
import java.util.Date; import java.util.Date;
import javax.persistence.Column; import javax.persistence.Column;
import javax.persistence.Entity; import javax.persistence.Entity;
import javax.persistence.Id; import javax.persistence.Id;
@ -14,88 +13,85 @@ import javax.persistence.TemporalType;
@Table(name = "mdstore_versions") @Table(name = "mdstore_versions")
public class MDStoreVersion implements Serializable { public class MDStoreVersion implements Serializable {
/** /** */
* private static final long serialVersionUID = -4763494442274298339L;
*/
private static final long serialVersionUID = -4763494442274298339L;
@Id @Id
@Column(name = "id") @Column(name = "id")
private String id; private String id;
@Column(name = "mdstore") @Column(name = "mdstore")
private String mdstore; private String mdstore;
@Column(name = "writing") @Column(name = "writing")
private boolean writing; private boolean writing;
@Column(name = "readcount") @Column(name = "readcount")
private int readCount = 0; private int readCount = 0;
@Column(name = "lastupdate") @Column(name = "lastupdate")
@Temporal(TemporalType.TIMESTAMP) @Temporal(TemporalType.TIMESTAMP)
private Date lastUpdate; private Date lastUpdate;
@Column(name = "size") @Column(name = "size")
private long size = 0; private long size = 0;
public static MDStoreVersion newInstance(final String mdId, final boolean writing) { public static MDStoreVersion newInstance(final String mdId, final boolean writing) {
final MDStoreVersion t = new MDStoreVersion(); final MDStoreVersion t = new MDStoreVersion();
t.setId(mdId + "-" + new Date().getTime()); t.setId(mdId + "-" + new Date().getTime());
t.setMdstore(mdId); t.setMdstore(mdId);
t.setLastUpdate(null); t.setLastUpdate(null);
t.setWriting(writing); t.setWriting(writing);
t.setReadCount(0); t.setReadCount(0);
t.setSize(0); t.setSize(0);
return t; return t;
} }
public String getId() { public String getId() {
return id; return id;
} }
public void setId(final String id) { public void setId(final String id) {
this.id = id; this.id = id;
} }
public String getMdstore() { public String getMdstore() {
return mdstore; return mdstore;
} }
public void setMdstore(final String mdstore) { public void setMdstore(final String mdstore) {
this.mdstore = mdstore; this.mdstore = mdstore;
} }
public boolean isWriting() { public boolean isWriting() {
return writing; return writing;
} }
public void setWriting(final boolean writing) { public void setWriting(final boolean writing) {
this.writing = writing; this.writing = writing;
} }
public int getReadCount() { public int getReadCount() {
return readCount; return readCount;
} }
public void setReadCount(final int readCount) { public void setReadCount(final int readCount) {
this.readCount = readCount; this.readCount = readCount;
} }
public Date getLastUpdate() { public Date getLastUpdate() {
return lastUpdate; return lastUpdate;
} }
public void setLastUpdate(final Date lastUpdate) { public void setLastUpdate(final Date lastUpdate) {
this.lastUpdate = lastUpdate; this.lastUpdate = lastUpdate;
} }
public long getSize() { public long getSize() {
return size; return size;
} }
public void setSize(final long size) {
this.size = size;
}
public void setSize(final long size) {
this.size = size;
}
} }

View File

@ -2,7 +2,6 @@ package eu.dnetlib.data.mdstore.manager.common.model;
import java.io.Serializable; import java.io.Serializable;
import java.util.Date; import java.util.Date;
import javax.persistence.Column; import javax.persistence.Column;
import javax.persistence.Entity; import javax.persistence.Entity;
import javax.persistence.Id; import javax.persistence.Id;
@ -14,132 +13,129 @@ import javax.persistence.TemporalType;
@Table(name = "mdstores_with_info") @Table(name = "mdstores_with_info")
public class MDStoreWithInfo implements Serializable { public class MDStoreWithInfo implements Serializable {
/** /** */
* private static final long serialVersionUID = -8445784770687571492L;
*/
private static final long serialVersionUID = -8445784770687571492L;
@Id @Id
@Column(name = "id") @Column(name = "id")
private String id; private String id;
@Column(name = "format") @Column(name = "format")
private String format; private String format;
@Column(name = "layout") @Column(name = "layout")
private String layout; private String layout;
@Column(name = "interpretation") @Column(name = "interpretation")
private String interpretation; private String interpretation;
@Column(name = "datasource_name") @Column(name = "datasource_name")
private String datasourceName; private String datasourceName;
@Column(name = "datasource_id") @Column(name = "datasource_id")
private String datasourceId; private String datasourceId;
@Column(name = "api_id") @Column(name = "api_id")
private String apiId; private String apiId;
@Column(name = "current_version") @Column(name = "current_version")
private String currentVersion; private String currentVersion;
@Column(name = "lastupdate") @Column(name = "lastupdate")
@Temporal(TemporalType.TIMESTAMP) @Temporal(TemporalType.TIMESTAMP)
private Date lastUpdate; private Date lastUpdate;
@Column(name = "size") @Column(name = "size")
private long size = 0; private long size = 0;
@Column(name = "n_versions") @Column(name = "n_versions")
private long numberOfVersions = 0; private long numberOfVersions = 0;
public String getId() { public String getId() {
return id; return id;
} }
public void setId(final String id) { public void setId(final String id) {
this.id = id; this.id = id;
} }
public String getFormat() { public String getFormat() {
return format; return format;
} }
public void setFormat(final String format) { public void setFormat(final String format) {
this.format = format; this.format = format;
} }
public String getLayout() { public String getLayout() {
return layout; return layout;
} }
public void setLayout(final String layout) { public void setLayout(final String layout) {
this.layout = layout; this.layout = layout;
} }
public String getInterpretation() { public String getInterpretation() {
return interpretation; return interpretation;
} }
public void setInterpretation(final String interpretation) { public void setInterpretation(final String interpretation) {
this.interpretation = interpretation; this.interpretation = interpretation;
} }
public String getDatasourceName() { public String getDatasourceName() {
return datasourceName; return datasourceName;
} }
public void setDatasourceName(final String datasourceName) { public void setDatasourceName(final String datasourceName) {
this.datasourceName = datasourceName; this.datasourceName = datasourceName;
} }
public String getDatasourceId() { public String getDatasourceId() {
return datasourceId; return datasourceId;
} }
public void setDatasourceId(final String datasourceId) { public void setDatasourceId(final String datasourceId) {
this.datasourceId = datasourceId; this.datasourceId = datasourceId;
} }
public String getApiId() { public String getApiId() {
return apiId; return apiId;
} }
public void setApiId(final String apiId) { public void setApiId(final String apiId) {
this.apiId = apiId; this.apiId = apiId;
} }
public String getCurrentVersion() { public String getCurrentVersion() {
return currentVersion; return currentVersion;
} }
public void setCurrentVersion(final String currentVersion) { public void setCurrentVersion(final String currentVersion) {
this.currentVersion = currentVersion; this.currentVersion = currentVersion;
} }
public Date getLastUpdate() { public Date getLastUpdate() {
return lastUpdate; return lastUpdate;
} }
public void setLastUpdate(final Date lastUpdate) { public void setLastUpdate(final Date lastUpdate) {
this.lastUpdate = lastUpdate; this.lastUpdate = lastUpdate;
} }
public long getSize() { public long getSize() {
return size; return size;
} }
public void setSize(final long size) { public void setSize(final long size) {
this.size = size; this.size = size;
} }
public long getNumberOfVersions() { public long getNumberOfVersions() {
return numberOfVersions; return numberOfVersions;
} }
public void setNumberOfVersions(final long numberOfVersions) {
this.numberOfVersions = numberOfVersions;
}
public void setNumberOfVersions(final long numberOfVersions) {
this.numberOfVersions = numberOfVersions;
}
} }

View File

@ -1,10 +1,6 @@
package eu.dnetlib.dhp.application; package eu.dnetlib.dhp.application;
import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.ObjectMapper;
import org.apache.commons.cli.*;
import org.apache.commons.codec.binary.Base64;
import org.apache.commons.io.IOUtils;
import java.io.ByteArrayInputStream; import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream; import java.io.ByteArrayOutputStream;
import java.io.Serializable; import java.io.Serializable;
@ -12,7 +8,9 @@ import java.io.StringWriter;
import java.util.*; import java.util.*;
import java.util.zip.GZIPInputStream; import java.util.zip.GZIPInputStream;
import java.util.zip.GZIPOutputStream; import java.util.zip.GZIPOutputStream;
import java.util.zip.Inflater; import org.apache.commons.cli.*;
import org.apache.commons.codec.binary.Base64;
import org.apache.commons.io.IOUtils;
public class ArgumentApplicationParser implements Serializable { public class ArgumentApplicationParser implements Serializable {
@ -23,7 +21,8 @@ public class ArgumentApplicationParser implements Serializable {
public ArgumentApplicationParser(final String json_configuration) throws Exception { public ArgumentApplicationParser(final String json_configuration) throws Exception {
final ObjectMapper mapper = new ObjectMapper(); final ObjectMapper mapper = new ObjectMapper();
final OptionsParameter[] configuration = mapper.readValue(json_configuration, OptionsParameter[].class); final OptionsParameter[] configuration =
mapper.readValue(json_configuration, OptionsParameter[].class);
createOptionMap(configuration); createOptionMap(configuration);
} }
@ -33,23 +32,26 @@ public class ArgumentApplicationParser implements Serializable {
private void createOptionMap(final OptionsParameter[] configuration) { private void createOptionMap(final OptionsParameter[] configuration) {
Arrays.stream(configuration).map(conf -> { Arrays.stream(configuration)
final Option o = new Option(conf.getParamName(), true, conf.getParamDescription()); .map(
o.setLongOpt(conf.getParamLongName()); conf -> {
o.setRequired(conf.isParamRequired()); final Option o =
if (conf.isCompressed()) { new Option(
compressedValues.add(conf.getParamLongName()); conf.getParamName(), true, conf.getParamDescription());
} o.setLongOpt(conf.getParamLongName());
return o; o.setRequired(conf.isParamRequired());
}).forEach(options::addOption); if (conf.isCompressed()) {
compressedValues.add(conf.getParamLongName());
// HelpFormatter formatter = new HelpFormatter(); }
// formatter.printHelp("myapp", null, options, null, true); return o;
})
.forEach(options::addOption);
// HelpFormatter formatter = new HelpFormatter();
// formatter.printHelp("myapp", null, options, null, true);
} }
public static String decompressValue(final String abstractCompressed) { public static String decompressValue(final String abstractCompressed) {
try { try {
byte[] byteArray = Base64.decodeBase64(abstractCompressed.getBytes()); byte[] byteArray = Base64.decodeBase64(abstractCompressed.getBytes());
@ -63,7 +65,7 @@ public class ArgumentApplicationParser implements Serializable {
} }
} }
public static String compressArgument(final String value) throws Exception{ public static String compressArgument(final String value) throws Exception {
ByteArrayOutputStream out = new ByteArrayOutputStream(); ByteArrayOutputStream out = new ByteArrayOutputStream();
GZIPOutputStream gzip = new GZIPOutputStream(out); GZIPOutputStream gzip = new GZIPOutputStream(out);
gzip.write(value.getBytes()); gzip.write(value.getBytes());
@ -74,7 +76,14 @@ public class ArgumentApplicationParser implements Serializable {
public void parseArgument(final String[] args) throws Exception { public void parseArgument(final String[] args) throws Exception {
CommandLineParser parser = new BasicParser(); CommandLineParser parser = new BasicParser();
CommandLine cmd = parser.parse(options, args); CommandLine cmd = parser.parse(options, args);
Arrays.stream(cmd.getOptions()).forEach(it -> objectMap.put(it.getLongOpt(), compressedValues.contains(it.getLongOpt())? decompressValue(it.getValue()): it.getValue())); Arrays.stream(cmd.getOptions())
.forEach(
it ->
objectMap.put(
it.getLongOpt(),
compressedValues.contains(it.getLongOpt())
? decompressValue(it.getValue())
: it.getValue()));
} }
public String get(final String key) { public String get(final String key) {

View File

@ -1,6 +1,5 @@
package eu.dnetlib.dhp.application; package eu.dnetlib.dhp.application;
public class OptionsParameter { public class OptionsParameter {
private String paramName; private String paramName;
@ -9,8 +8,7 @@ public class OptionsParameter {
private boolean paramRequired; private boolean paramRequired;
private boolean compressed; private boolean compressed;
public OptionsParameter() { public OptionsParameter() {}
}
public String getParamName() { public String getParamName() {
return paramName; return paramName;

View File

@ -3,23 +3,19 @@ package eu.dnetlib.dhp.common;
import java.io.Serializable; import java.io.Serializable;
import java.util.function.Supplier; import java.util.function.Supplier;
/** /** Provides serializable and throwing extensions to standard functional interfaces. */
* Provides serializable and throwing extensions to standard functional interfaces.
*/
public class FunctionalInterfaceSupport { public class FunctionalInterfaceSupport {
private FunctionalInterfaceSupport() { private FunctionalInterfaceSupport() {}
}
/** /**
* Serializable supplier of any kind of objects. To be used withing spark processing pipelines when supplying * Serializable supplier of any kind of objects. To be used withing spark processing pipelines
* functions externally. * when supplying functions externally.
* *
* @param <T> * @param <T>
*/ */
@FunctionalInterface @FunctionalInterface
public interface SerializableSupplier<T> extends Supplier<T>, Serializable { public interface SerializableSupplier<T> extends Supplier<T>, Serializable {}
}
/** /**
* Extension of consumer accepting functions throwing an exception. * Extension of consumer accepting functions throwing an exception.
@ -52,5 +48,4 @@ public class FunctionalInterfaceSupport {
public interface ThrowingRunnable<E extends Exception> { public interface ThrowingRunnable<E extends Exception> {
void run() throws E; void run() throws E;
} }
} }

View File

@ -1,5 +1,10 @@
package eu.dnetlib.dhp.common; package eu.dnetlib.dhp.common;
import static eu.dnetlib.dhp.common.ThrowingSupport.rethrowAsRuntimeException;
import java.util.Arrays;
import java.util.List;
import java.util.stream.Collectors;
import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FileSystem;
@ -7,51 +12,60 @@ import org.apache.hadoop.fs.Path;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import java.util.Arrays; /** HDFS utility methods. */
import java.util.List;
import java.util.stream.Collectors;
import static eu.dnetlib.dhp.common.ThrowingSupport.rethrowAsRuntimeException;
/**
* HDFS utility methods.
*/
public class HdfsSupport { public class HdfsSupport {
private static final Logger logger = LoggerFactory.getLogger(HdfsSupport.class); private static final Logger logger = LoggerFactory.getLogger(HdfsSupport.class);
private HdfsSupport() { private HdfsSupport() {}
/**
* Checks a path (file or dir) exists on HDFS.
*
* @param path Path to be checked
* @param configuration Configuration of hadoop env
*/
public static boolean exists(String path, Configuration configuration) {
logger.info("Removing path: {}", path);
return rethrowAsRuntimeException(
() -> {
Path f = new Path(path);
FileSystem fileSystem = FileSystem.get(configuration);
return fileSystem.exists(f);
});
} }
/** /**
* Removes a path (file or dir) from HDFS. * Removes a path (file or dir) from HDFS.
* *
* @param path Path to be removed * @param path Path to be removed
* @param configuration Configuration of hadoop env * @param configuration Configuration of hadoop env
*/ */
public static void remove(String path, Configuration configuration) { public static void remove(String path, Configuration configuration) {
logger.info("Removing path: {}", path); logger.info("Removing path: {}", path);
rethrowAsRuntimeException(() -> { rethrowAsRuntimeException(
Path f = new Path(path); () -> {
FileSystem fileSystem = FileSystem.get(configuration); Path f = new Path(path);
if (fileSystem.exists(f)) { FileSystem fileSystem = FileSystem.get(configuration);
fileSystem.delete(f, true); if (fileSystem.exists(f)) {
} fileSystem.delete(f, true);
}); }
});
} }
/** /**
* Lists hadoop files located below path or alternatively lists subdirs under path. * Lists hadoop files located below path or alternatively lists subdirs under path.
* *
* @param path Path to be listed for hadoop files * @param path Path to be listed for hadoop files
* @param configuration Configuration of hadoop env * @param configuration Configuration of hadoop env
* @return List with string locations of hadoop files * @return List with string locations of hadoop files
*/ */
public static List<String> listFiles(String path, Configuration configuration) { public static List<String> listFiles(String path, Configuration configuration) {
logger.info("Listing files in path: {}", path); logger.info("Listing files in path: {}", path);
return rethrowAsRuntimeException(() -> Arrays return rethrowAsRuntimeException(
.stream(FileSystem.get(configuration).listStatus(new Path(path))) () ->
.filter(FileStatus::isDirectory) Arrays.stream(FileSystem.get(configuration).listStatus(new Path(path)))
.map(x -> x.getPath().toString()) .filter(FileStatus::isDirectory)
.collect(Collectors.toList())); .map(x -> x.getPath().toString())
.collect(Collectors.toList()));
} }
} }

View File

@ -1,61 +1,71 @@
package eu.dnetlib.dhp.common; package eu.dnetlib.dhp.common;
import eu.dnetlib.dhp.common.FunctionalInterfaceSupport.ThrowingConsumer; import eu.dnetlib.dhp.common.FunctionalInterfaceSupport.ThrowingConsumer;
import java.util.Objects;
import java.util.function.Function;
import org.apache.spark.SparkConf; import org.apache.spark.SparkConf;
import org.apache.spark.sql.SparkSession; import org.apache.spark.sql.SparkSession;
import java.util.Objects; /** SparkSession utility methods. */
import java.util.function.Function;
/**
* SparkSession utility methods.
*/
public class SparkSessionSupport { public class SparkSessionSupport {
private SparkSessionSupport() { private SparkSessionSupport() {}
/**
* Runs a given function using SparkSession created using default builder and supplied
* SparkConf. Stops SparkSession when SparkSession is managed. Allows to reuse SparkSession
* created externally.
*
* @param conf SparkConf instance
* @param isSparkSessionManaged When true will stop SparkSession
* @param fn Consumer to be applied to constructed SparkSession
*/
public static void runWithSparkSession(
SparkConf conf,
Boolean isSparkSessionManaged,
ThrowingConsumer<SparkSession, Exception> fn) {
runWithSparkSession(
c -> SparkSession.builder().config(c).getOrCreate(),
conf,
isSparkSessionManaged,
fn);
} }
/** /**
* Runs a given function using SparkSession created using default builder and supplied SparkConf. Stops SparkSession * Runs a given function using SparkSession created with hive support and using default builder
* when SparkSession is managed. Allows to reuse SparkSession created externally. * and supplied SparkConf. Stops SparkSession when SparkSession is managed. Allows to reuse
* SparkSession created externally.
* *
* @param conf SparkConf instance * @param conf SparkConf instance
* @param isSparkSessionManaged When true will stop SparkSession * @param isSparkSessionManaged When true will stop SparkSession
* @param fn Consumer to be applied to constructed SparkSession * @param fn Consumer to be applied to constructed SparkSession
*/ */
public static void runWithSparkSession(SparkConf conf, public static void runWithSparkHiveSession(
Boolean isSparkSessionManaged, SparkConf conf,
ThrowingConsumer<SparkSession, Exception> fn) { Boolean isSparkSessionManaged,
runWithSparkSession(c -> SparkSession.builder().config(c).getOrCreate(), conf, isSparkSessionManaged, fn); ThrowingConsumer<SparkSession, Exception> fn) {
runWithSparkSession(
c -> SparkSession.builder().config(c).enableHiveSupport().getOrCreate(),
conf,
isSparkSessionManaged,
fn);
} }
/** /**
* Runs a given function using SparkSession created with hive support and using default builder and supplied SparkConf. * Runs a given function using SparkSession created using supplied builder and supplied
* Stops SparkSession when SparkSession is managed. Allows to reuse SparkSession created externally. * SparkConf. Stops SparkSession when SparkSession is managed. Allows to reuse SparkSession
* created externally.
* *
* @param conf SparkConf instance * @param sparkSessionBuilder Builder of SparkSession
* @param conf SparkConf instance
* @param isSparkSessionManaged When true will stop SparkSession * @param isSparkSessionManaged When true will stop SparkSession
* @param fn Consumer to be applied to constructed SparkSession * @param fn Consumer to be applied to constructed SparkSession
*/ */
public static void runWithSparkHiveSession(SparkConf conf, public static void runWithSparkSession(
Boolean isSparkSessionManaged, Function<SparkConf, SparkSession> sparkSessionBuilder,
ThrowingConsumer<SparkSession, Exception> fn) { SparkConf conf,
runWithSparkSession(c -> SparkSession.builder().config(c).enableHiveSupport().getOrCreate(), conf, isSparkSessionManaged, fn); Boolean isSparkSessionManaged,
} ThrowingConsumer<SparkSession, Exception> fn) {
/**
* Runs a given function using SparkSession created using supplied builder and supplied SparkConf. Stops SparkSession
* when SparkSession is managed. Allows to reuse SparkSession created externally.
*
* @param sparkSessionBuilder Builder of SparkSession
* @param conf SparkConf instance
* @param isSparkSessionManaged When true will stop SparkSession
* @param fn Consumer to be applied to constructed SparkSession
*/
public static void runWithSparkSession(Function<SparkConf, SparkSession> sparkSessionBuilder,
SparkConf conf,
Boolean isSparkSessionManaged,
ThrowingConsumer<SparkSession, Exception> fn) {
SparkSession spark = null; SparkSession spark = null;
try { try {
spark = sparkSessionBuilder.apply(conf); spark = sparkSessionBuilder.apply(conf);

View File

@ -3,18 +3,15 @@ package eu.dnetlib.dhp.common;
import eu.dnetlib.dhp.common.FunctionalInterfaceSupport.ThrowingRunnable; import eu.dnetlib.dhp.common.FunctionalInterfaceSupport.ThrowingRunnable;
import eu.dnetlib.dhp.common.FunctionalInterfaceSupport.ThrowingSupplier; import eu.dnetlib.dhp.common.FunctionalInterfaceSupport.ThrowingSupplier;
/** /** Exception handling utility methods. */
* Exception handling utility methods.
*/
public class ThrowingSupport { public class ThrowingSupport {
private ThrowingSupport() { private ThrowingSupport() {}
}
/** /**
* Executes given runnable and rethrows any exceptions as RuntimeException. * Executes given runnable and rethrows any exceptions as RuntimeException.
* *
* @param fn Runnable to be executed * @param fn Runnable to be executed
* @param <E> Type of exception thrown * @param <E> Type of exception thrown
*/ */
public static <E extends Exception> void rethrowAsRuntimeException(ThrowingRunnable<E> fn) { public static <E extends Exception> void rethrowAsRuntimeException(ThrowingRunnable<E> fn) {
@ -28,11 +25,12 @@ public class ThrowingSupport {
/** /**
* Executes given runnable and rethrows any exceptions as RuntimeException with custom message. * Executes given runnable and rethrows any exceptions as RuntimeException with custom message.
* *
* @param fn Runnable to be executed * @param fn Runnable to be executed
* @param msg Message to be set for rethrown exception * @param msg Message to be set for rethrown exception
* @param <E> Type of exception thrown * @param <E> Type of exception thrown
*/ */
public static <E extends Exception> void rethrowAsRuntimeException(ThrowingRunnable<E> fn, String msg) { public static <E extends Exception> void rethrowAsRuntimeException(
ThrowingRunnable<E> fn, String msg) {
try { try {
fn.run(); fn.run();
} catch (Exception e) { } catch (Exception e) {
@ -43,7 +41,7 @@ public class ThrowingSupport {
/** /**
* Executes given supplier and rethrows any exceptions as RuntimeException. * Executes given supplier and rethrows any exceptions as RuntimeException.
* *
* @param fn Supplier to be executed * @param fn Supplier to be executed
* @param <T> Type of returned value * @param <T> Type of returned value
* @param <E> Type of exception thrown * @param <E> Type of exception thrown
* @return Result of supplier execution * @return Result of supplier execution
@ -59,18 +57,18 @@ public class ThrowingSupport {
/** /**
* Executes given supplier and rethrows any exceptions as RuntimeException with custom message. * Executes given supplier and rethrows any exceptions as RuntimeException with custom message.
* *
* @param fn Supplier to be executed * @param fn Supplier to be executed
* @param msg Message to be set for rethrown exception * @param msg Message to be set for rethrown exception
* @param <T> Type of returned value * @param <T> Type of returned value
* @param <E> Type of exception thrown * @param <E> Type of exception thrown
* @return Result of supplier execution * @return Result of supplier execution
*/ */
public static <T, E extends Exception> T rethrowAsRuntimeException(ThrowingSupplier<T, E> fn, String msg) { public static <T, E extends Exception> T rethrowAsRuntimeException(
ThrowingSupplier<T, E> fn, String msg) {
try { try {
return fn.get(); return fn.get();
} catch (Exception e) { } catch (Exception e) {
throw new RuntimeException(msg, e); throw new RuntimeException(msg, e);
} }
} }
} }

View File

@ -1,66 +1,52 @@
package eu.dnetlib.dhp.model.mdstore; package eu.dnetlib.dhp.model.mdstore;
import eu.dnetlib.dhp.utils.DHPUtils; import eu.dnetlib.dhp.utils.DHPUtils;
import java.io.Serializable; import java.io.Serializable;
/** This class models a record inside the new Metadata store collection on HDFS * */
/**
* This class models a record inside the new Metadata store collection on HDFS *
*
*/
public class MetadataRecord implements Serializable { public class MetadataRecord implements Serializable {
/** /** The D-Net Identifier associated to the record */
* The D-Net Identifier associated to the record
*/
private String id; private String id;
/** /** The original Identifier of the record */
* The original Identifier of the record
*/
private String originalId; private String originalId;
/** The encoding of the record, should be JSON or XML */
/**
* The encoding of the record, should be JSON or XML
*/
private String encoding; private String encoding;
/** /**
* The information about the provenance of the record see @{@link Provenance} * The information about the provenance of the record see @{@link Provenance} for the model of
* for the model of this information * this information
*/ */
private Provenance provenance; private Provenance provenance;
/** /** The content of the metadata */
* The content of the metadata
*/
private String body; private String body;
/** /** the date when the record has been stored */
* the date when the record has been stored
*/
private long dateOfCollection; private long dateOfCollection;
/** /** the date when the record has been stored */
* the date when the record has been stored
*/
private long dateOfTransformation; private long dateOfTransformation;
public MetadataRecord() { public MetadataRecord() {
this.dateOfCollection = System.currentTimeMillis(); this.dateOfCollection = System.currentTimeMillis();
} }
public MetadataRecord(String originalId, String encoding, Provenance provenance, String body, long dateOfCollection) { public MetadataRecord(
String originalId,
String encoding,
Provenance provenance,
String body,
long dateOfCollection) {
this.originalId = originalId; this.originalId = originalId;
this.encoding = encoding; this.encoding = encoding;
this.provenance = provenance; this.provenance = provenance;
this.body = body; this.body = body;
this.dateOfCollection = dateOfCollection; this.dateOfCollection = dateOfCollection;
this.id = DHPUtils.generateIdentifier(originalId,this.provenance.getNsPrefix()); this.id = DHPUtils.generateIdentifier(originalId, this.provenance.getNsPrefix());
} }
public String getId() { public String getId() {
@ -71,7 +57,6 @@ public class MetadataRecord implements Serializable {
this.id = id; this.id = id;
} }
public String getOriginalId() { public String getOriginalId() {
return originalId; return originalId;
} }
@ -96,7 +81,6 @@ public class MetadataRecord implements Serializable {
this.provenance = provenance; this.provenance = provenance;
} }
public String getBody() { public String getBody() {
return body; return body;
} }
@ -127,7 +111,6 @@ public class MetadataRecord implements Serializable {
return false; return false;
} }
return ((MetadataRecord) o).getId().equalsIgnoreCase(id); return ((MetadataRecord) o).getId().equalsIgnoreCase(id);
} }
@Override @Override

View File

@ -2,27 +2,20 @@ package eu.dnetlib.dhp.model.mdstore;
import java.io.Serializable; import java.io.Serializable;
/** /**
* @author Sandro La Bruzzo * @author Sandro La Bruzzo
* * <p>Provenace class models the provenance of the record in the metadataStore It contains the
* Provenace class models the provenance of the record in the metadataStore * identifier and the name of the datasource that gives the record
* It contains the identifier and the name of the datasource that gives the
* record
*
*/ */
public class Provenance implements Serializable { public class Provenance implements Serializable {
private String datasourceId; private String datasourceId;
private String datasourceName; private String datasourceName;
private String nsPrefix; private String nsPrefix;
public Provenance() { public Provenance() {}
}
public Provenance(String datasourceId, String datasourceName, String nsPrefix) { public Provenance(String datasourceId, String datasourceName, String nsPrefix) {
this.datasourceId = datasourceId; this.datasourceId = datasourceId;

View File

@ -1,20 +1,17 @@
package eu.dnetlib.dhp.parser.utility; package eu.dnetlib.dhp.parser.utility;
import com.ximpleware.AutoPilot;
import com.ximpleware.VTDNav;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.HashMap; import java.util.HashMap;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
/** Created by sandro on 9/29/16. */
import com.ximpleware.AutoPilot;
import com.ximpleware.VTDNav;
/**
* Created by sandro on 9/29/16.
*/
public class VtdUtilityParser { public class VtdUtilityParser {
public static List<Node> getTextValuesWithAttributes(final AutoPilot ap, final VTDNav vn, final String xpath, final List<String> attributes) public static List<Node> getTextValuesWithAttributes(
final AutoPilot ap, final VTDNav vn, final String xpath, final List<String> attributes)
throws VtdException { throws VtdException {
final List<Node> results = new ArrayList<>(); final List<Node> results = new ArrayList<>();
try { try {
@ -35,25 +32,28 @@ public class VtdUtilityParser {
} }
} }
private static Map<String, String> getAttributes(final VTDNav vn, final List<String> attributes) { private static Map<String, String> getAttributes(
final VTDNav vn, final List<String> attributes) {
final Map<String, String> currentAttributes = new HashMap<>(); final Map<String, String> currentAttributes = new HashMap<>();
if (attributes != null) { if (attributes != null) {
attributes.forEach(attributeKey -> { attributes.forEach(
try { attributeKey -> {
int attr = vn.getAttrVal(attributeKey); try {
if (attr > -1) { int attr = vn.getAttrVal(attributeKey);
currentAttributes.put(attributeKey, vn.toNormalizedString(attr)); if (attr > -1) {
} currentAttributes.put(attributeKey, vn.toNormalizedString(attr));
} catch (Throwable e) { }
throw new RuntimeException(e); } catch (Throwable e) {
} throw new RuntimeException(e);
}); }
});
} }
return currentAttributes; return currentAttributes;
} }
public static List<String> getTextValue(final AutoPilot ap, final VTDNav vn, final String xpath) throws VtdException { public static List<String> getTextValue(final AutoPilot ap, final VTDNav vn, final String xpath)
throws VtdException {
List<String> results = new ArrayList<>(); List<String> results = new ArrayList<>();
try { try {
ap.selectXPath(xpath); ap.selectXPath(xpath);
@ -67,13 +67,13 @@ public class VtdUtilityParser {
} }
} }
public static String getSingleValue(final AutoPilot ap, final VTDNav nav, final String xpath) throws VtdException { public static String getSingleValue(final AutoPilot ap, final VTDNav nav, final String xpath)
throws VtdException {
try { try {
ap.selectXPath(xpath); ap.selectXPath(xpath);
while (ap.evalXPath() != -1) { while (ap.evalXPath() != -1) {
int it = nav.getText(); int it = nav.getText();
if (it > -1) if (it > -1) return nav.toNormalizedString(it);
return nav.toNormalizedString(it);
} }
return null; return null;
} catch (Exception e) { } catch (Exception e) {
@ -103,5 +103,4 @@ public class VtdUtilityParser {
this.attributes = attributes; this.attributes = attributes;
} }
} }
} }

View File

@ -1,18 +1,16 @@
package eu.dnetlib.dhp.utils; package eu.dnetlib.dhp.utils;
import com.jayway.jsonpath.JsonPath; import com.jayway.jsonpath.JsonPath;
import net.minidev.json.JSONArray;
import org.apache.commons.codec.binary.Base64;
import org.apache.commons.codec.binary.Base64OutputStream;
import org.apache.commons.codec.binary.Hex;
import org.apache.commons.lang3.StringUtils;
import java.io.ByteArrayInputStream; import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream; import java.io.ByteArrayOutputStream;
import java.nio.charset.StandardCharsets; import java.nio.charset.StandardCharsets;
import java.security.MessageDigest; import java.security.MessageDigest;
import java.util.zip.GZIPInputStream; import java.util.zip.GZIPInputStream;
import java.util.zip.GZIPOutputStream; import java.util.zip.GZIPOutputStream;
import net.minidev.json.JSONArray;
import org.apache.commons.codec.binary.Base64;
import org.apache.commons.codec.binary.Base64OutputStream;
import org.apache.commons.codec.binary.Hex;
public class DHPUtils { public class DHPUtils {
@ -28,41 +26,40 @@ public class DHPUtils {
} }
public static String generateIdentifier(final String originalId, final String nsPrefix) { public static String generateIdentifier(final String originalId, final String nsPrefix) {
return String.format("%s::%s",nsPrefix, DHPUtils.md5(originalId)); return String.format("%s::%s", nsPrefix, DHPUtils.md5(originalId));
} }
public static String compressString(final String input ) { public static String compressString(final String input) {
try ( ByteArrayOutputStream out = new ByteArrayOutputStream(); Base64OutputStream b64os = new Base64OutputStream(out)) { try (ByteArrayOutputStream out = new ByteArrayOutputStream();
Base64OutputStream b64os = new Base64OutputStream(out)) {
GZIPOutputStream gzip = new GZIPOutputStream(b64os); GZIPOutputStream gzip = new GZIPOutputStream(b64os);
gzip.write(input.getBytes(StandardCharsets.UTF_8)); gzip.write(input.getBytes(StandardCharsets.UTF_8));
gzip.close(); gzip.close();
return out.toString(); return out.toString();
} catch (Throwable e ) { } catch (Throwable e) {
return null; return null;
} }
} }
public static String decompressString(final String input) { public static String decompressString(final String input) {
byte[] byteArray = Base64.decodeBase64(input.getBytes()); byte[] byteArray = Base64.decodeBase64(input.getBytes());
int len; int len;
try (GZIPInputStream gis = new GZIPInputStream(new ByteArrayInputStream((byteArray))); ByteArrayOutputStream bos = new ByteArrayOutputStream(byteArray.length)) { try (GZIPInputStream gis = new GZIPInputStream(new ByteArrayInputStream((byteArray)));
ByteArrayOutputStream bos = new ByteArrayOutputStream(byteArray.length)) {
byte[] buffer = new byte[1024]; byte[] buffer = new byte[1024];
while((len = gis.read(buffer)) != -1){ while ((len = gis.read(buffer)) != -1) {
bos.write(buffer, 0, len); bos.write(buffer, 0, len);
} }
return bos.toString(); return bos.toString();
} catch (Exception e) { } catch (Exception e) {
return null; return null;
} }
} }
public static String getJPathString(final String jsonPath, final String json) { public static String getJPathString(final String jsonPath, final String json) {
try { try {
Object o = JsonPath.read(json, jsonPath); Object o = JsonPath.read(json, jsonPath);
if (o instanceof String) if (o instanceof String) return (String) o;
return (String) o;
if (o instanceof JSONArray && ((JSONArray) o).size() > 0) if (o instanceof JSONArray && ((JSONArray) o).size() > 0)
return (String) ((JSONArray) o).get(0); return (String) ((JSONArray) o).get(0);
return o.toString(); return o.toString();
@ -70,5 +67,4 @@ public class DHPUtils {
return ""; return "";
} }
} }
} }

View File

@ -9,10 +9,13 @@ import net.sf.saxon.trans.XPathException;
public abstract class AbstractExtensionFunction extends ExtensionFunctionDefinition { public abstract class AbstractExtensionFunction extends ExtensionFunctionDefinition {
public static String DEFAULT_SAXON_EXT_NS_URI = "http://www.d-net.research-infrastructures.eu/saxon-extension"; public static String DEFAULT_SAXON_EXT_NS_URI =
"http://www.d-net.research-infrastructures.eu/saxon-extension";
public abstract String getName(); public abstract String getName();
public abstract Sequence doCall(XPathContext context, Sequence[] arguments) throws XPathException;
public abstract Sequence doCall(XPathContext context, Sequence[] arguments)
throws XPathException;
@Override @Override
public StructuredQName getFunctionQName() { public StructuredQName getFunctionQName() {
@ -28,5 +31,4 @@ public abstract class AbstractExtensionFunction extends ExtensionFunctionDefinit
} }
}; };
} }
} }

View File

@ -1,5 +1,9 @@
package eu.dnetlib.dhp.utils.saxon; package eu.dnetlib.dhp.utils.saxon;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Calendar;
import java.util.GregorianCalendar;
import net.sf.saxon.expr.XPathContext; import net.sf.saxon.expr.XPathContext;
import net.sf.saxon.om.Item; import net.sf.saxon.om.Item;
import net.sf.saxon.om.Sequence; import net.sf.saxon.om.Sequence;
@ -7,14 +11,9 @@ import net.sf.saxon.trans.XPathException;
import net.sf.saxon.value.SequenceType; import net.sf.saxon.value.SequenceType;
import net.sf.saxon.value.StringValue; import net.sf.saxon.value.StringValue;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Calendar;
import java.util.GregorianCalendar;
public class ExtractYear extends AbstractExtensionFunction { public class ExtractYear extends AbstractExtensionFunction {
private static final String[] dateFormats = { "yyyy-MM-dd", "yyyy/MM/dd" }; private static final String[] dateFormats = {"yyyy-MM-dd", "yyyy/MM/dd"};
@Override @Override
public String getName() { public String getName() {
@ -45,7 +44,7 @@ public class ExtractYear extends AbstractExtensionFunction {
@Override @Override
public SequenceType[] getArgumentTypes() { public SequenceType[] getArgumentTypes() {
return new SequenceType[] { SequenceType.OPTIONAL_ITEM }; return new SequenceType[] {SequenceType.OPTIONAL_ITEM};
} }
@Override @Override
@ -60,7 +59,8 @@ public class ExtractYear extends AbstractExtensionFunction {
c.setTime(new SimpleDateFormat(format).parse(s)); c.setTime(new SimpleDateFormat(format).parse(s));
String year = String.valueOf(c.get(Calendar.YEAR)); String year = String.valueOf(c.get(Calendar.YEAR));
return year; return year;
} catch (ParseException e) {} } catch (ParseException e) {
}
} }
return ""; return "";
} }

View File

@ -1,18 +1,19 @@
package eu.dnetlib.dhp.utils.saxon; package eu.dnetlib.dhp.utils.saxon;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Date;
import net.sf.saxon.expr.XPathContext; import net.sf.saxon.expr.XPathContext;
import net.sf.saxon.om.Sequence; import net.sf.saxon.om.Sequence;
import net.sf.saxon.trans.XPathException; import net.sf.saxon.trans.XPathException;
import net.sf.saxon.value.SequenceType; import net.sf.saxon.value.SequenceType;
import net.sf.saxon.value.StringValue; import net.sf.saxon.value.StringValue;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Date;
public class NormalizeDate extends AbstractExtensionFunction { public class NormalizeDate extends AbstractExtensionFunction {
private static final String[] normalizeDateFormats = { "yyyy-MM-dd'T'hh:mm:ss", "yyyy-MM-dd", "yyyy/MM/dd", "yyyy" }; private static final String[] normalizeDateFormats = {
"yyyy-MM-dd'T'hh:mm:ss", "yyyy-MM-dd", "yyyy/MM/dd", "yyyy"
};
private static final String normalizeOutFormat = new String("yyyy-MM-dd'T'hh:mm:ss'Z'"); private static final String normalizeOutFormat = new String("yyyy-MM-dd'T'hh:mm:ss'Z'");
@ -42,7 +43,7 @@ public class NormalizeDate extends AbstractExtensionFunction {
@Override @Override
public SequenceType[] getArgumentTypes() { public SequenceType[] getArgumentTypes() {
return new SequenceType[] { SequenceType.OPTIONAL_ITEM }; return new SequenceType[] {SequenceType.OPTIONAL_ITEM};
} }
@Override @Override
@ -58,9 +59,9 @@ public class NormalizeDate extends AbstractExtensionFunction {
Date parse = new SimpleDateFormat(format).parse(date); Date parse = new SimpleDateFormat(format).parse(date);
String res = new SimpleDateFormat(normalizeOutFormat).format(parse); String res = new SimpleDateFormat(normalizeOutFormat).format(parse);
return res; return res;
} catch (ParseException e) {} } catch (ParseException e) {
}
} }
return ""; return "";
} }
} }

View File

@ -24,7 +24,8 @@ public class PickFirst extends AbstractExtensionFunction {
final String s1 = getValue(arguments[0]); final String s1 = getValue(arguments[0]);
final String s2 = getValue(arguments[1]); final String s2 = getValue(arguments[1]);
return new StringValue(StringUtils.isNotBlank(s1) ? s1 : StringUtils.isNotBlank(s2) ? s2 : ""); return new StringValue(
StringUtils.isNotBlank(s1) ? s1 : StringUtils.isNotBlank(s2) ? s2 : "");
} }
private String getValue(final Sequence arg) throws XPathException { private String getValue(final Sequence arg) throws XPathException {
@ -49,12 +50,11 @@ public class PickFirst extends AbstractExtensionFunction {
@Override @Override
public SequenceType[] getArgumentTypes() { public SequenceType[] getArgumentTypes() {
return new SequenceType[] { SequenceType.OPTIONAL_ITEM }; return new SequenceType[] {SequenceType.OPTIONAL_ITEM};
} }
@Override @Override
public SequenceType getResultType(SequenceType[] suppliedArgumentTypes) { public SequenceType getResultType(SequenceType[] suppliedArgumentTypes) {
return SequenceType.SINGLE_STRING; return SequenceType.SINGLE_STRING;
} }
} }

View File

@ -1,17 +1,17 @@
package eu.dnetlib.dhp.utils.saxon; package eu.dnetlib.dhp.utils.saxon;
import net.sf.saxon.Configuration; import java.io.StringReader;
import net.sf.saxon.TransformerFactoryImpl;
import javax.xml.transform.Transformer; import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerException; import javax.xml.transform.TransformerException;
import javax.xml.transform.stream.StreamSource; import javax.xml.transform.stream.StreamSource;
import java.io.StringReader; import net.sf.saxon.Configuration;
import net.sf.saxon.TransformerFactoryImpl;
public class SaxonTransformerFactory { public class SaxonTransformerFactory {
/** /**
* Creates the index record transformer from the given XSLT * Creates the index record transformer from the given XSLT
*
* @param xslt * @param xslt
* @return * @return
* @throws TransformerException * @throws TransformerException
@ -26,5 +26,4 @@ public class SaxonTransformerFactory {
return factory.newTransformer(new StreamSource(new StringReader(xslt))); return factory.newTransformer(new StreamSource(new StringReader(xslt)));
} }
} }

View File

@ -2,7 +2,6 @@ package eu.dnetlib.message;
import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.ObjectMapper;
import java.io.IOException; import java.io.IOException;
import java.util.Map; import java.util.Map;
@ -16,20 +15,12 @@ public class Message {
private Map<String, String> body; private Map<String, String> body;
public static Message fromJson(final String json) throws IOException { public static Message fromJson(final String json) throws IOException {
final ObjectMapper jsonMapper = new ObjectMapper(); final ObjectMapper jsonMapper = new ObjectMapper();
return jsonMapper.readValue(json, Message.class); return jsonMapper.readValue(json, Message.class);
} }
public Message() {}
public Message() {
}
public Message(String workflowId, String jobName, MessageType type, Map<String, String> body) { public Message(String workflowId, String jobName, MessageType type, Map<String, String> body) {
this.workflowId = workflowId; this.workflowId = workflowId;

View File

@ -4,7 +4,6 @@ import com.rabbitmq.client.AMQP;
import com.rabbitmq.client.Channel; import com.rabbitmq.client.Channel;
import com.rabbitmq.client.DefaultConsumer; import com.rabbitmq.client.DefaultConsumer;
import com.rabbitmq.client.Envelope; import com.rabbitmq.client.Envelope;
import java.io.IOException; import java.io.IOException;
import java.nio.charset.StandardCharsets; import java.nio.charset.StandardCharsets;
import java.util.concurrent.LinkedBlockingQueue; import java.util.concurrent.LinkedBlockingQueue;
@ -13,7 +12,6 @@ public class MessageConsumer extends DefaultConsumer {
final LinkedBlockingQueue<Message> queueMessages; final LinkedBlockingQueue<Message> queueMessages;
/** /**
* Constructs a new instance and records its association to the passed-in channel. * Constructs a new instance and records its association to the passed-in channel.
* *
@ -25,19 +23,20 @@ public class MessageConsumer extends DefaultConsumer {
this.queueMessages = queueMessages; this.queueMessages = queueMessages;
} }
@Override @Override
public void handleDelivery(String consumerTag, Envelope envelope, AMQP.BasicProperties properties, byte[] body) throws IOException { public void handleDelivery(
String consumerTag, Envelope envelope, AMQP.BasicProperties properties, byte[] body)
throws IOException {
final String json = new String(body, StandardCharsets.UTF_8); final String json = new String(body, StandardCharsets.UTF_8);
Message message = Message.fromJson(json); Message message = Message.fromJson(json);
try { try {
this.queueMessages.put(message); this.queueMessages.put(message);
System.out.println("Receiving Message "+message); System.out.println("Receiving Message " + message);
} catch (InterruptedException e) { } catch (InterruptedException e) {
if (message.getType()== MessageType.REPORT) if (message.getType() == MessageType.REPORT)
throw new RuntimeException("Error on sending message"); throw new RuntimeException("Error on sending message");
else { else {
//TODO LOGGING EXCEPTION // TODO LOGGING EXCEPTION
} }
} finally { } finally {
getChannel().basicAck(envelope.getDeliveryTag(), false); getChannel().basicAck(envelope.getDeliveryTag(), false);

View File

@ -3,8 +3,6 @@ package eu.dnetlib.message;
import com.rabbitmq.client.Channel; import com.rabbitmq.client.Channel;
import com.rabbitmq.client.Connection; import com.rabbitmq.client.Connection;
import com.rabbitmq.client.ConnectionFactory; import com.rabbitmq.client.ConnectionFactory;
import sun.rmi.runtime.Log;
import java.io.IOException; import java.io.IOException;
import java.util.HashMap; import java.util.HashMap;
import java.util.Map; import java.util.Map;
@ -21,23 +19,32 @@ public class MessageManager {
private Connection connection; private Connection connection;
private Map<String , Channel> channels = new HashMap<>(); private Map<String, Channel> channels = new HashMap<>();
private boolean durable; private boolean durable;
private boolean autodelete; private boolean autodelete;
final private LinkedBlockingQueue<Message> queueMessages; private final LinkedBlockingQueue<Message> queueMessages;
public MessageManager(String messageHost, String username, String password, final LinkedBlockingQueue<Message> queueMessages) { public MessageManager(
String messageHost,
String username,
String password,
final LinkedBlockingQueue<Message> queueMessages) {
this.queueMessages = queueMessages; this.queueMessages = queueMessages;
this.messageHost = messageHost; this.messageHost = messageHost;
this.username = username; this.username = username;
this.password = password; this.password = password;
} }
public MessageManager(
public MessageManager(String messageHost, String username, String password, boolean durable, boolean autodelete, final LinkedBlockingQueue<Message> queueMessages) { String messageHost,
String username,
String password,
boolean durable,
boolean autodelete,
final LinkedBlockingQueue<Message> queueMessages) {
this.queueMessages = queueMessages; this.queueMessages = queueMessages;
this.messageHost = messageHost; this.messageHost = messageHost;
this.username = username; this.username = username;
@ -55,7 +62,12 @@ public class MessageManager {
return factory.newConnection(); return factory.newConnection();
} }
private Channel createChannel(final Connection connection, final String queueName, final boolean durable, final boolean autodelete ) throws Exception { private Channel createChannel(
final Connection connection,
final String queueName,
final boolean durable,
final boolean autodelete)
throws Exception {
Map<String, Object> args = new HashMap<>(); Map<String, Object> args = new HashMap<>();
args.put("x-message-ttl", 10000); args.put("x-message-ttl", 10000);
Channel channel = connection.createChannel(); Channel channel = connection.createChannel();
@ -63,9 +75,10 @@ public class MessageManager {
return channel; return channel;
} }
private Channel getOrCreateChannel(final String queueName, boolean durable, boolean autodelete) throws Exception { private Channel getOrCreateChannel(final String queueName, boolean durable, boolean autodelete)
throws Exception {
if (channels.containsKey(queueName)) { if (channels.containsKey(queueName)) {
return channels.get(queueName); return channels.get(queueName);
} }
if (this.connection == null) { if (this.connection == null) {
@ -75,16 +88,16 @@ public class MessageManager {
return channels.get(queueName); return channels.get(queueName);
} }
public void close() throws IOException { public void close() throws IOException {
channels.values().forEach(ch-> { channels.values()
try { .forEach(
ch.close(); ch -> {
} catch (Exception e) { try {
//TODO LOG ch.close();
} } catch (Exception e) {
}); // TODO LOG
}
});
this.connection.close(); this.connection.close();
} }
@ -92,26 +105,30 @@ public class MessageManager {
public boolean sendMessage(final Message message, String queueName) throws Exception { public boolean sendMessage(final Message message, String queueName) throws Exception {
try { try {
Channel channel = getOrCreateChannel(queueName, this.durable, this.autodelete); Channel channel = getOrCreateChannel(queueName, this.durable, this.autodelete);
channel.basicPublish("", queueName,null, message.toString().getBytes()); channel.basicPublish("", queueName, null, message.toString().getBytes());
return true; return true;
} catch (Throwable e) { } catch (Throwable e) {
throw new RuntimeException(e); throw new RuntimeException(e);
} }
} }
public boolean sendMessage(final Message message, String queueName, boolean durable_var, boolean autodelete_var) throws Exception { public boolean sendMessage(
final Message message, String queueName, boolean durable_var, boolean autodelete_var)
throws Exception {
try { try {
Channel channel = getOrCreateChannel(queueName, durable_var, autodelete_var); Channel channel = getOrCreateChannel(queueName, durable_var, autodelete_var);
channel.basicPublish("", queueName,null, message.toString().getBytes()); channel.basicPublish("", queueName, null, message.toString().getBytes());
return true; return true;
} catch (Throwable e) { } catch (Throwable e) {
throw new RuntimeException(e); throw new RuntimeException(e);
} }
} }
public void startConsumingMessage(final String queueName, final boolean durable, final boolean autodelete) throws Exception{ public void startConsumingMessage(
final String queueName, final boolean durable, final boolean autodelete)
throws Exception {
Channel channel = createChannel(createConnection(), queueName, durable, autodelete); Channel channel = createChannel(createConnection(), queueName, durable, autodelete);
channel.basicConsume(queueName, false, new MessageConsumer(channel,queueMessages)); channel.basicConsume(queueName, false, new MessageConsumer(channel, queueMessages));
} }
} }

View File

@ -1,8 +1,6 @@
package eu.dnetlib.message; package eu.dnetlib.message;
public enum MessageType { public enum MessageType {
ONGOING, ONGOING,
REPORT REPORT
} }

View File

@ -2,7 +2,7 @@ package eu.dnetlib.scholexplorer.relation;
import java.io.Serializable; import java.io.Serializable;
public class RelInfo implements Serializable { public class RelInfo implements Serializable {
private String original; private String original;
private String inverse; private String inverse;

View File

@ -1,19 +1,18 @@
package eu.dnetlib.scholexplorer.relation; package eu.dnetlib.scholexplorer.relation;
import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.ObjectMapper;
import org.apache.commons.io.IOUtils;
import java.io.Serializable; import java.io.Serializable;
import java.util.HashMap; import java.util.HashMap;
import org.apache.commons.io.IOUtils;
public class RelationMapper extends HashMap<String,RelInfo > implements Serializable { public class RelationMapper extends HashMap<String, RelInfo> implements Serializable {
public static RelationMapper load() throws Exception { public static RelationMapper load() throws Exception {
final String json = IOUtils.toString(RelationMapper.class.getResourceAsStream("relations.json")); final String json =
IOUtils.toString(RelationMapper.class.getResourceAsStream("relations.json"));
ObjectMapper mapper = new ObjectMapper(); ObjectMapper mapper = new ObjectMapper();
return mapper.readValue(json, RelationMapper.class); return mapper.readValue(json, RelationMapper.class);
} }
} }

View File

@ -1,30 +1,46 @@
package eu.dnetlib.dhp.application; package eu.dnetlib.dhp.application;
import org.apache.commons.io.IOUtils;
import org.junit.jupiter.api.Test;
import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNotNull; import static org.junit.jupiter.api.Assertions.assertNotNull;
import org.apache.commons.io.IOUtils;
import org.junit.jupiter.api.Test;
public class ArgumentApplicationParserTest { public class ArgumentApplicationParserTest {
@Test @Test
public void testParseParameter() throws Exception { public void testParseParameter() throws Exception {
final String jsonConfiguration = IOUtils.toString(this.getClass().getResourceAsStream("/eu/dnetlib/application/parameters.json")); final String jsonConfiguration =
IOUtils.toString(
this.getClass()
.getResourceAsStream("/eu/dnetlib/application/parameters.json"));
assertNotNull(jsonConfiguration); assertNotNull(jsonConfiguration);
ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
parser.parseArgument(new String[]{"-p", "value0", parser.parseArgument(
"-a", "value1", new String[] {
"-n", "value2", "-p",
"-u", "value3", "value0",
"-ru", "value4", "-a",
"-rp", "value5", "value1",
"-rh", "value6", "-n",
"-ro", "value7", "value2",
"-rr", "value8", "-u",
"-w", "value9", "value3",
"-cc", ArgumentApplicationParser.compressArgument(jsonConfiguration) "-ru",
}); "value4",
"-rp",
"value5",
"-rh",
"value6",
"-ro",
"value7",
"-rr",
"value8",
"-w",
"value9",
"-cc",
ArgumentApplicationParser.compressArgument(jsonConfiguration)
});
assertNotNull(parser.get("hdfsPath")); assertNotNull(parser.get("hdfsPath"));
assertNotNull(parser.get("apidescriptor")); assertNotNull(parser.get("apidescriptor"));
assertNotNull(parser.get("namenode")); assertNotNull(parser.get("namenode"));
@ -47,10 +63,4 @@ public class ArgumentApplicationParserTest {
assertEquals("value9", parser.get("workflowId")); assertEquals("value9", parser.get("workflowId"));
assertEquals(jsonConfiguration, parser.get("ccCoco")); assertEquals(jsonConfiguration, parser.get("ccCoco"));
} }
} }

View File

@ -1,9 +1,6 @@
package eu.dnetlib.dhp.common; package eu.dnetlib.dhp.common;
import org.apache.hadoop.conf.Configuration; import static org.junit.jupiter.api.Assertions.*;
import org.junit.jupiter.api.Nested;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.io.TempDir;
import java.io.IOException; import java.io.IOException;
import java.nio.file.Files; import java.nio.file.Files;
@ -11,8 +8,10 @@ import java.nio.file.Path;
import java.util.Arrays; import java.util.Arrays;
import java.util.List; import java.util.List;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import org.apache.hadoop.conf.Configuration;
import static org.junit.jupiter.api.Assertions.*; import org.junit.jupiter.api.Nested;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.io.TempDir;
public class HdfsSupportTest { public class HdfsSupportTest {
@ -22,8 +21,8 @@ public class HdfsSupportTest {
@Test @Test
public void shouldThrowARuntimeExceptionOnError() { public void shouldThrowARuntimeExceptionOnError() {
// when // when
assertThrows(RuntimeException.class, () -> assertThrows(
HdfsSupport.remove(null, new Configuration())); RuntimeException.class, () -> HdfsSupport.remove(null, new Configuration()));
} }
@Test @Test
@ -54,8 +53,8 @@ public class HdfsSupportTest {
@Test @Test
public void shouldThrowARuntimeExceptionOnError() { public void shouldThrowARuntimeExceptionOnError() {
// when // when
assertThrows(RuntimeException.class, () -> assertThrows(
HdfsSupport.listFiles(null, new Configuration())); RuntimeException.class, () -> HdfsSupport.listFiles(null, new Configuration()));
} }
@Test @Test
@ -68,8 +67,10 @@ public class HdfsSupportTest {
// then // then
assertEquals(2, paths.size()); assertEquals(2, paths.size());
List<String> expecteds = Arrays.stream(new String[]{subDir1.toString(), subDir2.toString()}) List<String> expecteds =
.sorted().collect(Collectors.toList()); Arrays.stream(new String[] {subDir1.toString(), subDir2.toString()})
.sorted()
.collect(Collectors.toList());
List<String> actuals = paths.stream().sorted().collect(Collectors.toList()); List<String> actuals = paths.stream().sorted().collect(Collectors.toList());
assertTrue(actuals.get(0).contains(expecteds.get(0))); assertTrue(actuals.get(0).contains(expecteds.get(0)));
assertTrue(actuals.get(1).contains(expecteds.get(1))); assertTrue(actuals.get(1).contains(expecteds.get(1)));

View File

@ -1,22 +1,22 @@
package eu.dnetlib.dhp.common; package eu.dnetlib.dhp.common;
import static org.mockito.Mockito.*;
import eu.dnetlib.dhp.common.FunctionalInterfaceSupport.ThrowingConsumer; import eu.dnetlib.dhp.common.FunctionalInterfaceSupport.ThrowingConsumer;
import java.util.function.Function;
import org.apache.spark.SparkConf; import org.apache.spark.SparkConf;
import org.apache.spark.sql.SparkSession; import org.apache.spark.sql.SparkSession;
import org.junit.jupiter.api.Nested; import org.junit.jupiter.api.Nested;
import org.junit.jupiter.api.Test; import org.junit.jupiter.api.Test;
import java.util.function.Function;
import static org.mockito.Mockito.*;
public class SparkSessionSupportTest { public class SparkSessionSupportTest {
@Nested @Nested
class RunWithSparkSession { class RunWithSparkSession {
@Test @Test
public void shouldExecuteFunctionAndNotStopSparkSessionWhenSparkSessionIsNotManaged() throws Exception { public void shouldExecuteFunctionAndNotStopSparkSessionWhenSparkSessionIsNotManaged()
throws Exception {
// given // given
SparkSession spark = mock(SparkSession.class); SparkSession spark = mock(SparkSession.class);
SparkConf conf = mock(SparkConf.class); SparkConf conf = mock(SparkConf.class);
@ -34,7 +34,8 @@ public class SparkSessionSupportTest {
} }
@Test @Test
public void shouldExecuteFunctionAndStopSparkSessionWhenSparkSessionIsManaged() throws Exception { public void shouldExecuteFunctionAndStopSparkSessionWhenSparkSessionIsManaged()
throws Exception {
// given // given
SparkSession spark = mock(SparkSession.class); SparkSession spark = mock(SparkSession.class);
SparkConf conf = mock(SparkConf.class); SparkConf conf = mock(SparkConf.class);

View File

@ -1,9 +1,9 @@
package eu.dnetlib.dhp.model.mdstore; package eu.dnetlib.dhp.model.mdstore;
import org.junit.jupiter.api.Test;
import static org.junit.jupiter.api.Assertions.assertTrue; import static org.junit.jupiter.api.Assertions.assertTrue;
import org.junit.jupiter.api.Test;
public class MetadataRecordTest { public class MetadataRecordTest {
@Test @Test

View File

@ -1,12 +1,11 @@
package eu.dnetlib.message; package eu.dnetlib.message;
import org.junit.jupiter.api.Test; import static org.junit.jupiter.api.Assertions.*;
import java.io.IOException; import java.io.IOException;
import java.util.HashMap; import java.util.HashMap;
import java.util.Map; import java.util.Map;
import org.junit.jupiter.api.Test;
import static org.junit.jupiter.api.Assertions.*;
public class MessageTest { public class MessageTest {
@ -16,7 +15,7 @@ public class MessageTest {
m.setWorkflowId("wId"); m.setWorkflowId("wId");
m.setType(MessageType.ONGOING); m.setType(MessageType.ONGOING);
m.setJobName("Collection"); m.setJobName("Collection");
Map<String,String> body= new HashMap<>(); Map<String, String> body = new HashMap<>();
body.put("parsedItem", "300"); body.put("parsedItem", "300");
body.put("ExecutionTime", "30s"); body.put("ExecutionTime", "30s");
@ -28,28 +27,26 @@ public class MessageTest {
assertEquals(m1.getJobName(), m.getJobName()); assertEquals(m1.getJobName(), m.getJobName());
assertNotNull(m1.getBody()); assertNotNull(m1.getBody());
m1.getBody().keySet().forEach(it -> assertEquals(m1.getBody().get(it), m.getBody().get(it))); m1.getBody()
.keySet()
.forEach(it -> assertEquals(m1.getBody().get(it), m.getBody().get(it)));
assertEquals(m1.getJobName(), m.getJobName()); assertEquals(m1.getJobName(), m.getJobName());
} }
@Test @Test
public void toStringTest() { public void toStringTest() {
final String expectedJson= "{\"workflowId\":\"wId\",\"jobName\":\"Collection\",\"type\":\"ONGOING\",\"body\":{\"ExecutionTime\":\"30s\",\"parsedItem\":\"300\"}}"; final String expectedJson =
"{\"workflowId\":\"wId\",\"jobName\":\"Collection\",\"type\":\"ONGOING\",\"body\":{\"ExecutionTime\":\"30s\",\"parsedItem\":\"300\"}}";
Message m = new Message(); Message m = new Message();
m.setWorkflowId("wId"); m.setWorkflowId("wId");
m.setType(MessageType.ONGOING); m.setType(MessageType.ONGOING);
m.setJobName("Collection"); m.setJobName("Collection");
Map<String,String> body= new HashMap<>(); Map<String, String> body = new HashMap<>();
body.put("parsedItem", "300"); body.put("parsedItem", "300");
body.put("ExecutionTime", "30s"); body.put("ExecutionTime", "30s");
m.setBody(body); m.setBody(body);
assertEquals(expectedJson,m.toString()); assertEquals(expectedJson, m.toString());
} }
} }

View File

@ -2,14 +2,12 @@ package eu.dnetlib.scholexplorer.relation;
import org.junit.jupiter.api.Test; import org.junit.jupiter.api.Test;
public class RelationMapperTest { public class RelationMapperTest {
@Test @Test
public void testLoadRels() throws Exception{ public void testLoadRels() throws Exception {
RelationMapper relationMapper = RelationMapper.load(); RelationMapper relationMapper = RelationMapper.load();
relationMapper.keySet().forEach(System.out::println); relationMapper.keySet().forEach(System.out::println);
} }
} }

View File

@ -2,7 +2,6 @@ package eu.dnetlib.dhp.schema.action;
import com.fasterxml.jackson.databind.annotation.JsonDeserialize; import com.fasterxml.jackson.databind.annotation.JsonDeserialize;
import eu.dnetlib.dhp.schema.oaf.Oaf; import eu.dnetlib.dhp.schema.oaf.Oaf;
import java.io.Serializable; import java.io.Serializable;
@JsonDeserialize(using = AtomicActionDeserializer.class) @JsonDeserialize(using = AtomicActionDeserializer.class)
@ -12,8 +11,7 @@ public class AtomicAction<T extends Oaf> implements Serializable {
private T payload; private T payload;
public AtomicAction() { public AtomicAction() {}
}
public AtomicAction(Class<T> clazz, T payload) { public AtomicAction(Class<T> clazz, T payload) {
this.clazz = clazz; this.clazz = clazz;

View File

@ -7,13 +7,13 @@ import com.fasterxml.jackson.databind.JsonDeserializer;
import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.schema.oaf.Oaf; import eu.dnetlib.dhp.schema.oaf.Oaf;
import java.io.IOException; import java.io.IOException;
public class AtomicActionDeserializer extends JsonDeserializer { public class AtomicActionDeserializer extends JsonDeserializer {
@Override @Override
public Object deserialize(JsonParser jp, DeserializationContext ctxt) throws IOException, JsonProcessingException { public Object deserialize(JsonParser jp, DeserializationContext ctxt)
throws IOException, JsonProcessingException {
JsonNode node = jp.getCodec().readTree(jp); JsonNode node = jp.getCodec().readTree(jp);
String classTag = node.get("clazz").asText(); String classTag = node.get("clazz").asText();
JsonNode payload = node.get("payload"); JsonNode payload = node.get("payload");

View File

@ -2,15 +2,19 @@ package eu.dnetlib.dhp.schema.common;
import eu.dnetlib.dhp.schema.oaf.OafEntity; import eu.dnetlib.dhp.schema.oaf.OafEntity;
/** /** Actual entity types in the Graph */
* Actual entity types in the Graph
*/
public enum EntityType { public enum EntityType {
publication,
publication, dataset, otherresearchproduct, software, datasource, organization, project; dataset,
otherresearchproduct,
software,
datasource,
organization,
project;
/** /**
* Resolves the EntityType, given the relative class name * Resolves the EntityType, given the relative class name
*
* @param clazz the given class name * @param clazz the given class name
* @param <T> actual OafEntity subclass * @param <T> actual OafEntity subclass
* @return the EntityType associated to the given class * @return the EntityType associated to the given class
@ -19,5 +23,4 @@ public enum EntityType {
return EntityType.valueOf(clazz.getSimpleName().toLowerCase()); return EntityType.valueOf(clazz.getSimpleName().toLowerCase());
} }
} }

View File

@ -1,9 +1,9 @@
package eu.dnetlib.dhp.schema.common; package eu.dnetlib.dhp.schema.common;
/** /** Main entity types in the Graph */
* Main entity types in the Graph
*/
public enum MainEntityType { public enum MainEntityType {
result,
result, datasource, organization, project datasource,
organization,
project
} }

View File

@ -2,33 +2,29 @@ package eu.dnetlib.dhp.schema.common;
import com.google.common.collect.Maps; import com.google.common.collect.Maps;
import eu.dnetlib.dhp.schema.oaf.*; import eu.dnetlib.dhp.schema.oaf.*;
import java.util.Map; import java.util.Map;
/** /** Oaf model utility methods. */
* Oaf model utility methods.
*/
public class ModelSupport { public class ModelSupport {
/** /** Defines the mapping between the actual entity type and the main entity type */
* Defines the mapping between the actual entity type and the main entity type
*/
private static Map<EntityType, MainEntityType> entityMapping = Maps.newHashMap(); private static Map<EntityType, MainEntityType> entityMapping = Maps.newHashMap();
static { static {
entityMapping.put(EntityType.publication, MainEntityType.result); entityMapping.put(EntityType.publication, MainEntityType.result);
entityMapping.put(EntityType.dataset, MainEntityType.result); entityMapping.put(EntityType.dataset, MainEntityType.result);
entityMapping.put(EntityType.otherresearchproduct, MainEntityType.result); entityMapping.put(EntityType.otherresearchproduct, MainEntityType.result);
entityMapping.put(EntityType.software, MainEntityType.result); entityMapping.put(EntityType.software, MainEntityType.result);
entityMapping.put(EntityType.datasource, MainEntityType.datasource); entityMapping.put(EntityType.datasource, MainEntityType.datasource);
entityMapping.put(EntityType.organization, MainEntityType.organization); entityMapping.put(EntityType.organization, MainEntityType.organization);
entityMapping.put(EntityType.project, MainEntityType.project); entityMapping.put(EntityType.project, MainEntityType.project);
} }
/** /**
* Defines the mapping between the actual entity types and the relative classes implementing them * Defines the mapping between the actual entity types and the relative classes implementing
* them
*/ */
public final static Map<EntityType, Class> entityTypes = Maps.newHashMap(); public static final Map<EntityType, Class> entityTypes = Maps.newHashMap();
static { static {
entityTypes.put(EntityType.datasource, Datasource.class); entityTypes.put(EntityType.datasource, Datasource.class);
@ -40,7 +36,7 @@ public class ModelSupport {
entityTypes.put(EntityType.publication, Publication.class); entityTypes.put(EntityType.publication, Publication.class);
} }
public final static Map<String, Class> oafTypes = Maps.newHashMap(); public static final Map<String, Class> oafTypes = Maps.newHashMap();
static { static {
oafTypes.put("datasource", Datasource.class); oafTypes.put("datasource", Datasource.class);
@ -55,19 +51,19 @@ public class ModelSupport {
private static final String schemeTemplate = "dnet:%s_%s_relations"; private static final String schemeTemplate = "dnet:%s_%s_relations";
private ModelSupport() { private ModelSupport() {}
}
/** /**
* Checks subclass-superclass relationship. * Checks subclass-superclass relationship.
* *
* @param subClazzObject Subclass object instance * @param subClazzObject Subclass object instance
* @param superClazzObject Superclass object instance * @param superClazzObject Superclass object instance
* @param <X> Subclass type * @param <X> Subclass type
* @param <Y> Superclass type * @param <Y> Superclass type
* @return True if X is a subclass of Y * @return True if X is a subclass of Y
*/ */
public static <X extends Oaf, Y extends Oaf> Boolean isSubClass(X subClazzObject, Y superClazzObject) { public static <X extends Oaf, Y extends Oaf> Boolean isSubClass(
X subClazzObject, Y superClazzObject) {
return isSubClass(subClazzObject.getClass(), superClazzObject.getClass()); return isSubClass(subClazzObject.getClass(), superClazzObject.getClass());
} }
@ -75,25 +71,27 @@ public class ModelSupport {
* Checks subclass-superclass relationship. * Checks subclass-superclass relationship.
* *
* @param subClazzObject Subclass object instance * @param subClazzObject Subclass object instance
* @param superClazz Superclass class * @param superClazz Superclass class
* @param <X> Subclass type * @param <X> Subclass type
* @param <Y> Superclass type * @param <Y> Superclass type
* @return True if X is a subclass of Y * @return True if X is a subclass of Y
*/ */
public static <X extends Oaf, Y extends Oaf> Boolean isSubClass(X subClazzObject, Class<Y> superClazz) { public static <X extends Oaf, Y extends Oaf> Boolean isSubClass(
X subClazzObject, Class<Y> superClazz) {
return isSubClass(subClazzObject.getClass(), superClazz); return isSubClass(subClazzObject.getClass(), superClazz);
} }
/** /**
* Checks subclass-superclass relationship. * Checks subclass-superclass relationship.
* *
* @param subClazz Subclass class * @param subClazz Subclass class
* @param superClazz Superclass class * @param superClazz Superclass class
* @param <X> Subclass type * @param <X> Subclass type
* @param <Y> Superclass type * @param <Y> Superclass type
* @return True if X is a subclass of Y * @return True if X is a subclass of Y
*/ */
public static <X extends Oaf, Y extends Oaf> Boolean isSubClass(Class<X> subClazz, Class<Y> superClazz) { public static <X extends Oaf, Y extends Oaf> Boolean isSubClass(
Class<X> subClazz, Class<Y> superClazz) {
return superClazz.isAssignableFrom(subClazz); return superClazz.isAssignableFrom(subClazz);
} }
@ -104,33 +102,33 @@ public class ModelSupport {
* @return * @return
*/ */
public static <T extends Oaf> Class<T>[] getOafModelClasses() { public static <T extends Oaf> Class<T>[] getOafModelClasses() {
return new Class[]{ return new Class[] {
Author.class, Author.class,
Context.class, Context.class,
Country.class, Country.class,
DataInfo.class, DataInfo.class,
Dataset.class, Dataset.class,
Datasource.class, Datasource.class,
ExternalReference.class, ExternalReference.class,
ExtraInfo.class, ExtraInfo.class,
Field.class, Field.class,
GeoLocation.class, GeoLocation.class,
Instance.class, Instance.class,
Journal.class, Journal.class,
KeyValue.class, KeyValue.class,
Oaf.class, Oaf.class,
OafEntity.class, OafEntity.class,
OAIProvenance.class, OAIProvenance.class,
Organization.class, Organization.class,
OriginDescription.class, OriginDescription.class,
OtherResearchProduct.class, OtherResearchProduct.class,
Project.class, Project.class,
Publication.class, Publication.class,
Qualifier.class, Qualifier.class,
Relation.class, Relation.class,
Result.class, Result.class,
Software.class, Software.class,
StructuredProperty.class StructuredProperty.class
}; };
} }
@ -143,9 +141,9 @@ public class ModelSupport {
} }
public static String getScheme(final String sourceType, final String targetType) { public static String getScheme(final String sourceType, final String targetType) {
return String.format(schemeTemplate, return String.format(
schemeTemplate,
entityMapping.get(EntityType.valueOf(sourceType)).name(), entityMapping.get(EntityType.valueOf(sourceType)).name(),
entityMapping.get(EntityType.valueOf(targetType)).name()); entityMapping.get(EntityType.valueOf(targetType)).name());
} }
} }

View File

@ -71,12 +71,12 @@ public class Author implements Serializable {
if (this == o) return true; if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false; if (o == null || getClass() != o.getClass()) return false;
Author author = (Author) o; Author author = (Author) o;
return Objects.equals(fullname, author.fullname) && return Objects.equals(fullname, author.fullname)
Objects.equals(name, author.name) && && Objects.equals(name, author.name)
Objects.equals(surname, author.surname) && && Objects.equals(surname, author.surname)
Objects.equals(rank, author.rank) && && Objects.equals(rank, author.rank)
Objects.equals(pid, author.pid) && && Objects.equals(pid, author.pid)
Objects.equals(affiliation, author.affiliation); && Objects.equals(affiliation, author.affiliation);
} }
@Override @Override

View File

@ -26,17 +26,14 @@ public class Context implements Serializable {
@Override @Override
public int hashCode() { public int hashCode() {
return id ==null? 0 : id.hashCode(); return id == null ? 0 : id.hashCode();
} }
@Override @Override
public boolean equals(Object obj) { public boolean equals(Object obj) {
if (this == obj) if (this == obj) return true;
return true; if (obj == null) return false;
if (obj == null) if (getClass() != obj.getClass()) return false;
return false;
if (getClass() != obj.getClass())
return false;
Context other = (Context) obj; Context other = (Context) obj;

View File

@ -1,7 +1,6 @@
package eu.dnetlib.dhp.schema.oaf; package eu.dnetlib.dhp.schema.oaf;
import java.io.Serializable; import java.io.Serializable;
import java.util.List;
import java.util.Objects; import java.util.Objects;
public class DataInfo implements Serializable { public class DataInfo implements Serializable {
@ -13,7 +12,6 @@ public class DataInfo implements Serializable {
private String inferenceprovenance; private String inferenceprovenance;
private Qualifier provenanceaction; private Qualifier provenanceaction;
public Boolean getInvisible() { public Boolean getInvisible() {
return invisible; return invisible;
} }
@ -67,16 +65,22 @@ public class DataInfo implements Serializable {
if (this == o) return true; if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false; if (o == null || getClass() != o.getClass()) return false;
DataInfo dataInfo = (DataInfo) o; DataInfo dataInfo = (DataInfo) o;
return Objects.equals(invisible, dataInfo.invisible) && return Objects.equals(invisible, dataInfo.invisible)
Objects.equals(inferred, dataInfo.inferred) && && Objects.equals(inferred, dataInfo.inferred)
Objects.equals(deletedbyinference, dataInfo.deletedbyinference) && && Objects.equals(deletedbyinference, dataInfo.deletedbyinference)
Objects.equals(trust, dataInfo.trust) && && Objects.equals(trust, dataInfo.trust)
Objects.equals(inferenceprovenance, dataInfo.inferenceprovenance) && && Objects.equals(inferenceprovenance, dataInfo.inferenceprovenance)
Objects.equals(provenanceaction, dataInfo.provenanceaction); && Objects.equals(provenanceaction, dataInfo.provenanceaction);
} }
@Override @Override
public int hashCode() { public int hashCode() {
return Objects.hash(invisible, inferred, deletedbyinference, trust, inferenceprovenance, provenanceaction); return Objects.hash(
invisible,
inferred,
deletedbyinference,
trust,
inferenceprovenance,
provenanceaction);
} }
} }

View File

@ -20,7 +20,7 @@ public class Dataset extends Result implements Serializable {
private List<GeoLocation> geolocation; private List<GeoLocation> geolocation;
public Field<String> getStoragedate() { public Field<String> getStoragedate() {
return storagedate; return storagedate;
} }
@ -80,23 +80,32 @@ public class Dataset extends Result implements Serializable {
public void mergeFrom(OafEntity e) { public void mergeFrom(OafEntity e) {
super.mergeFrom(e); super.mergeFrom(e);
if (!Dataset.class.isAssignableFrom(e.getClass())){ if (!Dataset.class.isAssignableFrom(e.getClass())) {
return; return;
} }
final Dataset d = (Dataset) e; final Dataset d = (Dataset) e;
storagedate = d.getStoragedate() != null && compareTrust(this, e)<0? d.getStoragedate() : storagedate; storagedate =
d.getStoragedate() != null && compareTrust(this, e) < 0
? d.getStoragedate()
: storagedate;
device= d.getDevice() != null && compareTrust(this, e)<0? d.getDevice() : device; device = d.getDevice() != null && compareTrust(this, e) < 0 ? d.getDevice() : device;
size= d.getSize() != null && compareTrust(this, e)<0? d.getSize() : size; size = d.getSize() != null && compareTrust(this, e) < 0 ? d.getSize() : size;
version= d.getVersion() != null && compareTrust(this, e)<0? d.getVersion() : version; version = d.getVersion() != null && compareTrust(this, e) < 0 ? d.getVersion() : version;
lastmetadataupdate= d.getLastmetadataupdate() != null && compareTrust(this, e)<0? d.getLastmetadataupdate() :lastmetadataupdate; lastmetadataupdate =
d.getLastmetadataupdate() != null && compareTrust(this, e) < 0
? d.getLastmetadataupdate()
: lastmetadataupdate;
metadataversionnumber= d.getMetadataversionnumber() != null && compareTrust(this, e)<0? d.getMetadataversionnumber() : metadataversionnumber; metadataversionnumber =
d.getMetadataversionnumber() != null && compareTrust(this, e) < 0
? d.getMetadataversionnumber()
: metadataversionnumber;
geolocation = mergeLists(geolocation, d.getGeolocation()); geolocation = mergeLists(geolocation, d.getGeolocation());
@ -109,17 +118,25 @@ public class Dataset extends Result implements Serializable {
if (o == null || getClass() != o.getClass()) return false; if (o == null || getClass() != o.getClass()) return false;
if (!super.equals(o)) return false; if (!super.equals(o)) return false;
Dataset dataset = (Dataset) o; Dataset dataset = (Dataset) o;
return Objects.equals(storagedate, dataset.storagedate) && return Objects.equals(storagedate, dataset.storagedate)
Objects.equals(device, dataset.device) && && Objects.equals(device, dataset.device)
Objects.equals(size, dataset.size) && && Objects.equals(size, dataset.size)
Objects.equals(version, dataset.version) && && Objects.equals(version, dataset.version)
Objects.equals(lastmetadataupdate, dataset.lastmetadataupdate) && && Objects.equals(lastmetadataupdate, dataset.lastmetadataupdate)
Objects.equals(metadataversionnumber, dataset.metadataversionnumber) && && Objects.equals(metadataversionnumber, dataset.metadataversionnumber)
Objects.equals(geolocation, dataset.geolocation); && Objects.equals(geolocation, dataset.geolocation);
} }
@Override @Override
public int hashCode() { public int hashCode() {
return Objects.hash(super.hashCode(), storagedate, device, size, version, lastmetadataupdate, metadataversionnumber, geolocation); return Objects.hash(
super.hashCode(),
storagedate,
device,
size,
version,
lastmetadataupdate,
metadataversionnumber,
geolocation);
} }
} }

View File

@ -72,7 +72,7 @@ public class Datasource extends OafEntity implements Serializable {
private Field<String> citationguidelineurl; private Field<String> citationguidelineurl;
//{yes, no, uknown} // {yes, no, uknown}
private Field<String> qualitymanagementkind; private Field<String> qualitymanagementkind;
private Field<String> pidsystems; private Field<String> pidsystems;
@ -367,65 +367,148 @@ public class Datasource extends OafEntity implements Serializable {
public void mergeFrom(OafEntity e) { public void mergeFrom(OafEntity e) {
super.mergeFrom(e); super.mergeFrom(e);
if (!Datasource.class.isAssignableFrom(e.getClass())){ if (!Datasource.class.isAssignableFrom(e.getClass())) {
return; return;
} }
Datasource d = (Datasource)e; Datasource d = (Datasource) e;
datasourcetype = d.getDatasourcetype() != null && compareTrust(this, e)<0? d.getDatasourcetype() : datasourcetype; datasourcetype =
openairecompatibility = d.getOpenairecompatibility() != null && compareTrust(this, e)<0? d.getOpenairecompatibility() : openairecompatibility; d.getDatasourcetype() != null && compareTrust(this, e) < 0
officialname = d.getOfficialname() != null && compareTrust(this, e)<0? d.getOfficialname() : officialname; ? d.getDatasourcetype()
englishname = d.getEnglishname() != null && compareTrust(this, e)<0? d.getEnglishname() : officialname; : datasourcetype;
websiteurl = d.getWebsiteurl() != null && compareTrust(this, e)<0? d.getWebsiteurl() : websiteurl; openairecompatibility =
logourl = d.getLogourl() != null && compareTrust(this, e)<0? d.getLogourl() : getLogourl(); d.getOpenairecompatibility() != null && compareTrust(this, e) < 0
contactemail = d.getContactemail() != null && compareTrust(this, e)<0? d.getContactemail() : contactemail; ? d.getOpenairecompatibility()
namespaceprefix = d.getNamespaceprefix() != null && compareTrust(this, e)<0? d.getNamespaceprefix() : namespaceprefix; : openairecompatibility;
latitude = d.getLatitude() != null && compareTrust(this, e)<0? d.getLatitude() : latitude; officialname =
longitude = d.getLongitude() != null && compareTrust(this, e)<0? d.getLongitude() : longitude; d.getOfficialname() != null && compareTrust(this, e) < 0
dateofvalidation = d.getDateofvalidation() != null && compareTrust(this, e)<0? d.getDateofvalidation() : dateofvalidation; ? d.getOfficialname()
description = d.getDescription() != null && compareTrust(this, e)<0? d.getDescription() : description; : officialname;
englishname =
d.getEnglishname() != null && compareTrust(this, e) < 0
? d.getEnglishname()
: officialname;
websiteurl =
d.getWebsiteurl() != null && compareTrust(this, e) < 0
? d.getWebsiteurl()
: websiteurl;
logourl =
d.getLogourl() != null && compareTrust(this, e) < 0 ? d.getLogourl() : getLogourl();
contactemail =
d.getContactemail() != null && compareTrust(this, e) < 0
? d.getContactemail()
: contactemail;
namespaceprefix =
d.getNamespaceprefix() != null && compareTrust(this, e) < 0
? d.getNamespaceprefix()
: namespaceprefix;
latitude =
d.getLatitude() != null && compareTrust(this, e) < 0 ? d.getLatitude() : latitude;
longitude =
d.getLongitude() != null && compareTrust(this, e) < 0
? d.getLongitude()
: longitude;
dateofvalidation =
d.getDateofvalidation() != null && compareTrust(this, e) < 0
? d.getDateofvalidation()
: dateofvalidation;
description =
d.getDescription() != null && compareTrust(this, e) < 0
? d.getDescription()
: description;
subjects = mergeLists(subjects, d.getSubjects()); subjects = mergeLists(subjects, d.getSubjects());
// opendoar specific fields (od*) // opendoar specific fields (od*)
odnumberofitems = d.getOdnumberofitems() != null && compareTrust(this, e)<0? d.getOdnumberofitems() : odnumberofitems; odnumberofitems =
odnumberofitemsdate = d.getOdnumberofitemsdate() != null && compareTrust(this, e)<0? d.getOdnumberofitemsdate() : odnumberofitemsdate; d.getOdnumberofitems() != null && compareTrust(this, e) < 0
odpolicies = d.getOdpolicies() != null && compareTrust(this, e)<0? d.getOdpolicies() : odpolicies; ? d.getOdnumberofitems()
: odnumberofitems;
odnumberofitemsdate =
d.getOdnumberofitemsdate() != null && compareTrust(this, e) < 0
? d.getOdnumberofitemsdate()
: odnumberofitemsdate;
odpolicies =
d.getOdpolicies() != null && compareTrust(this, e) < 0
? d.getOdpolicies()
: odpolicies;
odlanguages = mergeLists(odlanguages, d.getOdlanguages()); odlanguages = mergeLists(odlanguages, d.getOdlanguages());
odcontenttypes = mergeLists(odcontenttypes, d.getOdcontenttypes()); odcontenttypes = mergeLists(odcontenttypes, d.getOdcontenttypes());
accessinfopackage = mergeLists(accessinfopackage, d.getAccessinfopackage()); accessinfopackage = mergeLists(accessinfopackage, d.getAccessinfopackage());
// re3data fields // re3data fields
releasestartdate = d.getReleasestartdate() != null && compareTrust(this, e)<0? d.getReleasestartdate() : releasestartdate; releasestartdate =
releaseenddate = d.getReleaseenddate() != null && compareTrust(this, e)<0? d.getReleaseenddate() : releaseenddate; d.getReleasestartdate() != null && compareTrust(this, e) < 0
missionstatementurl = d.getMissionstatementurl() != null && compareTrust(this, e)<0? d.getMissionstatementurl() : missionstatementurl; ? d.getReleasestartdate()
dataprovider = d.getDataprovider() != null && compareTrust(this, e)<0? d.getDataprovider() : dataprovider; : releasestartdate;
serviceprovider = d.getServiceprovider() != null && compareTrust(this, e)<0? d.getServiceprovider() : serviceprovider; releaseenddate =
d.getReleaseenddate() != null && compareTrust(this, e) < 0
? d.getReleaseenddate()
: releaseenddate;
missionstatementurl =
d.getMissionstatementurl() != null && compareTrust(this, e) < 0
? d.getMissionstatementurl()
: missionstatementurl;
dataprovider =
d.getDataprovider() != null && compareTrust(this, e) < 0
? d.getDataprovider()
: dataprovider;
serviceprovider =
d.getServiceprovider() != null && compareTrust(this, e) < 0
? d.getServiceprovider()
: serviceprovider;
// {open, restricted or closed} // {open, restricted or closed}
databaseaccesstype = d.getDatabaseaccesstype() != null && compareTrust(this, e)<0? d.getDatabaseaccesstype() : databaseaccesstype; databaseaccesstype =
d.getDatabaseaccesstype() != null && compareTrust(this, e) < 0
? d.getDatabaseaccesstype()
: databaseaccesstype;
// {open, restricted or closed} // {open, restricted or closed}
datauploadtype = d.getDatauploadtype() != null && compareTrust(this, e)<0? d.getDatauploadtype() : datauploadtype; datauploadtype =
d.getDatauploadtype() != null && compareTrust(this, e) < 0
? d.getDatauploadtype()
: datauploadtype;
// {feeRequired, registration, other} // {feeRequired, registration, other}
databaseaccessrestriction = d.getDatabaseaccessrestriction() != null && compareTrust(this, e)<0? d.getDatabaseaccessrestriction() : databaseaccessrestriction; databaseaccessrestriction =
d.getDatabaseaccessrestriction() != null && compareTrust(this, e) < 0
? d.getDatabaseaccessrestriction()
: databaseaccessrestriction;
// {feeRequired, registration, other} // {feeRequired, registration, other}
datauploadrestriction = d.getDatauploadrestriction() != null && compareTrust(this, e)<0? d.getDatauploadrestriction() : datauploadrestriction; datauploadrestriction =
d.getDatauploadrestriction() != null && compareTrust(this, e) < 0
? d.getDatauploadrestriction()
: datauploadrestriction;
versioning = d.getVersioning() != null && compareTrust(this, e)<0? d.getVersioning() : versioning; versioning =
citationguidelineurl = d.getCitationguidelineurl() != null && compareTrust(this, e)<0? d.getCitationguidelineurl() : citationguidelineurl; d.getVersioning() != null && compareTrust(this, e) < 0
? d.getVersioning()
: versioning;
citationguidelineurl =
d.getCitationguidelineurl() != null && compareTrust(this, e) < 0
? d.getCitationguidelineurl()
: citationguidelineurl;
//{yes, no, unknown} // {yes, no, unknown}
qualitymanagementkind = d.getQualitymanagementkind() != null && compareTrust(this, e)<0? d.getQualitymanagementkind() : qualitymanagementkind; qualitymanagementkind =
pidsystems = d.getPidsystems() != null && compareTrust(this, e)<0? d.getPidsystems() : pidsystems; d.getQualitymanagementkind() != null && compareTrust(this, e) < 0
? d.getQualitymanagementkind()
: qualitymanagementkind;
pidsystems =
d.getPidsystems() != null && compareTrust(this, e) < 0
? d.getPidsystems()
: pidsystems;
certificates = d.getCertificates() != null && compareTrust(this, e)<0? d.getCertificates() : certificates; certificates =
d.getCertificates() != null && compareTrust(this, e) < 0
? d.getCertificates()
: certificates;
policies = mergeLists(policies, d.getPolicies()); policies = mergeLists(policies, d.getPolicies());
journal = d.getJournal() != null && compareTrust(this, e)<0? d.getJournal() : journal; journal = d.getJournal() != null && compareTrust(this, e) < 0 ? d.getJournal() : journal;
mergeOAFDataInfo(e); mergeOAFDataInfo(e);
} }
@ -436,45 +519,81 @@ public class Datasource extends OafEntity implements Serializable {
if (o == null || getClass() != o.getClass()) return false; if (o == null || getClass() != o.getClass()) return false;
if (!super.equals(o)) return false; if (!super.equals(o)) return false;
Datasource that = (Datasource) o; Datasource that = (Datasource) o;
return Objects.equals(datasourcetype, that.datasourcetype) && return Objects.equals(datasourcetype, that.datasourcetype)
Objects.equals(openairecompatibility, that.openairecompatibility) && && Objects.equals(openairecompatibility, that.openairecompatibility)
Objects.equals(officialname, that.officialname) && && Objects.equals(officialname, that.officialname)
Objects.equals(englishname, that.englishname) && && Objects.equals(englishname, that.englishname)
Objects.equals(websiteurl, that.websiteurl) && && Objects.equals(websiteurl, that.websiteurl)
Objects.equals(logourl, that.logourl) && && Objects.equals(logourl, that.logourl)
Objects.equals(contactemail, that.contactemail) && && Objects.equals(contactemail, that.contactemail)
Objects.equals(namespaceprefix, that.namespaceprefix) && && Objects.equals(namespaceprefix, that.namespaceprefix)
Objects.equals(latitude, that.latitude) && && Objects.equals(latitude, that.latitude)
Objects.equals(longitude, that.longitude) && && Objects.equals(longitude, that.longitude)
Objects.equals(dateofvalidation, that.dateofvalidation) && && Objects.equals(dateofvalidation, that.dateofvalidation)
Objects.equals(description, that.description) && && Objects.equals(description, that.description)
Objects.equals(subjects, that.subjects) && && Objects.equals(subjects, that.subjects)
Objects.equals(odnumberofitems, that.odnumberofitems) && && Objects.equals(odnumberofitems, that.odnumberofitems)
Objects.equals(odnumberofitemsdate, that.odnumberofitemsdate) && && Objects.equals(odnumberofitemsdate, that.odnumberofitemsdate)
Objects.equals(odpolicies, that.odpolicies) && && Objects.equals(odpolicies, that.odpolicies)
Objects.equals(odlanguages, that.odlanguages) && && Objects.equals(odlanguages, that.odlanguages)
Objects.equals(odcontenttypes, that.odcontenttypes) && && Objects.equals(odcontenttypes, that.odcontenttypes)
Objects.equals(accessinfopackage, that.accessinfopackage) && && Objects.equals(accessinfopackage, that.accessinfopackage)
Objects.equals(releasestartdate, that.releasestartdate) && && Objects.equals(releasestartdate, that.releasestartdate)
Objects.equals(releaseenddate, that.releaseenddate) && && Objects.equals(releaseenddate, that.releaseenddate)
Objects.equals(missionstatementurl, that.missionstatementurl) && && Objects.equals(missionstatementurl, that.missionstatementurl)
Objects.equals(dataprovider, that.dataprovider) && && Objects.equals(dataprovider, that.dataprovider)
Objects.equals(serviceprovider, that.serviceprovider) && && Objects.equals(serviceprovider, that.serviceprovider)
Objects.equals(databaseaccesstype, that.databaseaccesstype) && && Objects.equals(databaseaccesstype, that.databaseaccesstype)
Objects.equals(datauploadtype, that.datauploadtype) && && Objects.equals(datauploadtype, that.datauploadtype)
Objects.equals(databaseaccessrestriction, that.databaseaccessrestriction) && && Objects.equals(databaseaccessrestriction, that.databaseaccessrestriction)
Objects.equals(datauploadrestriction, that.datauploadrestriction) && && Objects.equals(datauploadrestriction, that.datauploadrestriction)
Objects.equals(versioning, that.versioning) && && Objects.equals(versioning, that.versioning)
Objects.equals(citationguidelineurl, that.citationguidelineurl) && && Objects.equals(citationguidelineurl, that.citationguidelineurl)
Objects.equals(qualitymanagementkind, that.qualitymanagementkind) && && Objects.equals(qualitymanagementkind, that.qualitymanagementkind)
Objects.equals(pidsystems, that.pidsystems) && && Objects.equals(pidsystems, that.pidsystems)
Objects.equals(certificates, that.certificates) && && Objects.equals(certificates, that.certificates)
Objects.equals(policies, that.policies) && && Objects.equals(policies, that.policies)
Objects.equals(journal, that.journal); && Objects.equals(journal, that.journal);
} }
@Override @Override
public int hashCode() { public int hashCode() {
return Objects.hash(super.hashCode(), datasourcetype, openairecompatibility, officialname, englishname, websiteurl, logourl, contactemail, namespaceprefix, latitude, longitude, dateofvalidation, description, subjects, odnumberofitems, odnumberofitemsdate, odpolicies, odlanguages, odcontenttypes, accessinfopackage, releasestartdate, releaseenddate, missionstatementurl, dataprovider, serviceprovider, databaseaccesstype, datauploadtype, databaseaccessrestriction, datauploadrestriction, versioning, citationguidelineurl, qualitymanagementkind, pidsystems, certificates, policies, journal); return Objects.hash(
super.hashCode(),
datasourcetype,
openairecompatibility,
officialname,
englishname,
websiteurl,
logourl,
contactemail,
namespaceprefix,
latitude,
longitude,
dateofvalidation,
description,
subjects,
odnumberofitems,
odnumberofitemsdate,
odpolicies,
odlanguages,
odcontenttypes,
accessinfopackage,
releasestartdate,
releaseenddate,
missionstatementurl,
dataprovider,
serviceprovider,
databaseaccesstype,
datauploadtype,
databaseaccessrestriction,
datauploadrestriction,
versioning,
citationguidelineurl,
qualitymanagementkind,
pidsystems,
certificates,
policies,
journal);
} }
} }

View File

@ -97,18 +97,19 @@ public class ExternalReference implements Serializable {
if (this == o) return true; if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false; if (o == null || getClass() != o.getClass()) return false;
ExternalReference that = (ExternalReference) o; ExternalReference that = (ExternalReference) o;
return Objects.equals(sitename, that.sitename) && return Objects.equals(sitename, that.sitename)
Objects.equals(label, that.label) && && Objects.equals(label, that.label)
Objects.equals(url, that.url) && && Objects.equals(url, that.url)
Objects.equals(description, that.description) && && Objects.equals(description, that.description)
Objects.equals(qualifier, that.qualifier) && && Objects.equals(qualifier, that.qualifier)
Objects.equals(refidentifier, that.refidentifier) && && Objects.equals(refidentifier, that.refidentifier)
Objects.equals(query, that.query) && && Objects.equals(query, that.query)
Objects.equals(dataInfo, that.dataInfo); && Objects.equals(dataInfo, that.dataInfo);
} }
@Override @Override
public int hashCode() { public int hashCode() {
return Objects.hash(sitename, label, url, description, qualifier, refidentifier, query, dataInfo); return Objects.hash(
sitename, label, url, description, qualifier, refidentifier, query, dataInfo);
} }
} }

View File

@ -60,11 +60,11 @@ public class ExtraInfo implements Serializable {
if (this == o) return true; if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false; if (o == null || getClass() != o.getClass()) return false;
ExtraInfo extraInfo = (ExtraInfo) o; ExtraInfo extraInfo = (ExtraInfo) o;
return Objects.equals(name, extraInfo.name) && return Objects.equals(name, extraInfo.name)
Objects.equals(typology, extraInfo.typology) && && Objects.equals(typology, extraInfo.typology)
Objects.equals(provenance, extraInfo.provenance) && && Objects.equals(provenance, extraInfo.provenance)
Objects.equals(trust, extraInfo.trust) && && Objects.equals(trust, extraInfo.trust)
Objects.equals(value, extraInfo.value); && Objects.equals(value, extraInfo.value);
} }
@Override @Override

View File

@ -31,12 +31,9 @@ public class Field<T> implements Serializable {
@Override @Override
public boolean equals(Object obj) { public boolean equals(Object obj) {
if (this == obj) if (this == obj) return true;
return true; if (obj == null) return false;
if (obj == null) if (getClass() != obj.getClass()) return false;
return false;
if (getClass() != obj.getClass())
return false;
Field<T> other = (Field<T>) obj; Field<T> other = (Field<T>) obj;
return getValue().equals(other.getValue()); return getValue().equals(other.getValue());
} }

View File

@ -1,9 +1,8 @@
package eu.dnetlib.dhp.schema.oaf; package eu.dnetlib.dhp.schema.oaf;
import com.fasterxml.jackson.annotation.JsonIgnore; import com.fasterxml.jackson.annotation.JsonIgnore;
import org.apache.commons.lang3.StringUtils;
import java.io.Serializable; import java.io.Serializable;
import org.apache.commons.lang3.StringUtils;
public class GeoLocation implements Serializable { public class GeoLocation implements Serializable {
@ -39,13 +38,17 @@ public class GeoLocation implements Serializable {
@JsonIgnore @JsonIgnore
public boolean isBlank() { public boolean isBlank() {
return StringUtils.isBlank(point) && return StringUtils.isBlank(point) && StringUtils.isBlank(box) && StringUtils.isBlank(place);
StringUtils.isBlank(box) &&
StringUtils.isBlank(place);
} }
public String toComparableString() { public String toComparableString() {
return isBlank()?"":String.format("%s::%s%s", point != null ? point.toLowerCase() : "", box != null ? box.toLowerCase() : "", place != null ? place.toLowerCase() : ""); return isBlank()
? ""
: String.format(
"%s::%s%s",
point != null ? point.toLowerCase() : "",
box != null ? box.toLowerCase() : "",
place != null ? place.toLowerCase() : "");
} }
@Override @Override
@ -55,16 +58,12 @@ public class GeoLocation implements Serializable {
@Override @Override
public boolean equals(Object obj) { public boolean equals(Object obj) {
if (this == obj) if (this == obj) return true;
return true; if (obj == null) return false;
if (obj == null) if (getClass() != obj.getClass()) return false;
return false;
if (getClass() != obj.getClass())
return false;
GeoLocation other = (GeoLocation) obj; GeoLocation other = (GeoLocation) obj;
return toComparableString() return toComparableString().equals(other.toComparableString());
.equals(other.toComparableString());
} }
} }

View File

@ -16,19 +16,21 @@ public class Instance implements Serializable {
private List<String> url; private List<String> url;
// other research products specifc // other research products specifc
private String distributionlocation; private String distributionlocation;
private KeyValue collectedfrom; private KeyValue collectedfrom;
private Field<String> dateofacceptance; private Field<String> dateofacceptance;
// ( article | book ) processing charges. Defined here to cope with possible wrongly typed results // ( article | book ) processing charges. Defined here to cope with possible wrongly typed
// results
private Field<String> processingchargeamount; private Field<String> processingchargeamount;
// currency - alphabetic code describe in ISO-4217. Defined here to cope with possible wrongly typed results // currency - alphabetic code describe in ISO-4217. Defined here to cope with possible wrongly
// typed results
private Field<String> processingchargecurrency; private Field<String> processingchargecurrency;
private Field<String> refereed; //peer-review status private Field<String> refereed; // peer-review status
public Field<String> getLicense() { public Field<String> getLicense() {
return license; return license;
@ -118,12 +120,19 @@ public class Instance implements Serializable {
this.refereed = refereed; this.refereed = refereed;
} }
public String toComparableString(){ public String toComparableString() {
return String.format("%s::%s::%s::%s", return String.format(
hostedby != null && hostedby.getKey()!= null ? hostedby.getKey().toLowerCase() : "", "%s::%s::%s::%s",
accessright!= null && accessright.getClassid()!= null ? accessright.getClassid() : "", hostedby != null && hostedby.getKey() != null
instancetype!= null && instancetype.getClassid()!= null ? instancetype.getClassid() : "", ? hostedby.getKey().toLowerCase()
url != null ? url:""); : "",
accessright != null && accessright.getClassid() != null
? accessright.getClassid()
: "",
instancetype != null && instancetype.getClassid() != null
? instancetype.getClassid()
: "",
url != null ? url : "");
} }
@Override @Override
@ -133,16 +142,12 @@ public class Instance implements Serializable {
@Override @Override
public boolean equals(Object obj) { public boolean equals(Object obj) {
if (this == obj) if (this == obj) return true;
return true; if (obj == null) return false;
if (obj == null) if (getClass() != obj.getClass()) return false;
return false;
if (getClass() != obj.getClass())
return false;
Instance other = (Instance) obj; Instance other = (Instance) obj;
return toComparableString() return toComparableString().equals(other.toComparableString());
.equals(other.toComparableString());
} }
} }

View File

@ -130,22 +130,34 @@ public class Journal implements Serializable {
if (this == o) return true; if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false; if (o == null || getClass() != o.getClass()) return false;
Journal journal = (Journal) o; Journal journal = (Journal) o;
return Objects.equals(name, journal.name) && return Objects.equals(name, journal.name)
Objects.equals(issnPrinted, journal.issnPrinted) && && Objects.equals(issnPrinted, journal.issnPrinted)
Objects.equals(issnOnline, journal.issnOnline) && && Objects.equals(issnOnline, journal.issnOnline)
Objects.equals(issnLinking, journal.issnLinking) && && Objects.equals(issnLinking, journal.issnLinking)
Objects.equals(ep, journal.ep) && && Objects.equals(ep, journal.ep)
Objects.equals(iss, journal.iss) && && Objects.equals(iss, journal.iss)
Objects.equals(sp, journal.sp) && && Objects.equals(sp, journal.sp)
Objects.equals(vol, journal.vol) && && Objects.equals(vol, journal.vol)
Objects.equals(edition, journal.edition) && && Objects.equals(edition, journal.edition)
Objects.equals(conferenceplace, journal.conferenceplace) && && Objects.equals(conferenceplace, journal.conferenceplace)
Objects.equals(conferencedate, journal.conferencedate) && && Objects.equals(conferencedate, journal.conferencedate)
Objects.equals(dataInfo, journal.dataInfo); && Objects.equals(dataInfo, journal.dataInfo);
} }
@Override @Override
public int hashCode() { public int hashCode() {
return Objects.hash(name, issnPrinted, issnOnline, issnLinking, ep, iss, sp, vol, edition, conferenceplace, conferencedate, dataInfo); return Objects.hash(
name,
issnPrinted,
issnOnline,
issnLinking,
ep,
iss,
sp,
vol,
edition,
conferenceplace,
conferencedate,
dataInfo);
} }
} }

View File

@ -1,9 +1,8 @@
package eu.dnetlib.dhp.schema.oaf; package eu.dnetlib.dhp.schema.oaf;
import com.fasterxml.jackson.annotation.JsonIgnore; import com.fasterxml.jackson.annotation.JsonIgnore;
import org.apache.commons.lang3.StringUtils;
import java.io.Serializable; import java.io.Serializable;
import org.apache.commons.lang3.StringUtils;
public class KeyValue implements Serializable { public class KeyValue implements Serializable {
@ -38,7 +37,12 @@ public class KeyValue implements Serializable {
} }
public String toComparableString() { public String toComparableString() {
return isBlank()?"":String.format("%s::%s", key != null ? key.toLowerCase() : "", value != null ? value.toLowerCase() : ""); return isBlank()
? ""
: String.format(
"%s::%s",
key != null ? key.toLowerCase() : "",
value != null ? value.toLowerCase() : "");
} }
@JsonIgnore @JsonIgnore
@ -53,12 +57,9 @@ public class KeyValue implements Serializable {
@Override @Override
public boolean equals(Object obj) { public boolean equals(Object obj) {
if (this == obj) if (this == obj) return true;
return true; if (obj == null) return false;
if (obj == null) if (getClass() != obj.getClass()) return false;
return false;
if (getClass() != obj.getClass())
return false;
KeyValue other = (KeyValue) obj; KeyValue other = (KeyValue) obj;

View File

@ -3,7 +3,7 @@ package eu.dnetlib.dhp.schema.oaf;
import java.io.Serializable; import java.io.Serializable;
import java.util.Objects; import java.util.Objects;
public class OAIProvenance implements Serializable { public class OAIProvenance implements Serializable {
private OriginDescription originDescription; private OriginDescription originDescription;

View File

@ -1,14 +1,25 @@
package eu.dnetlib.dhp.schema.oaf; package eu.dnetlib.dhp.schema.oaf;
import java.io.Serializable; import java.io.Serializable;
import java.util.List;
import java.util.Objects; import java.util.Objects;
public abstract class Oaf implements Serializable { public abstract class Oaf implements Serializable {
protected List<KeyValue> collectedfrom;
private DataInfo dataInfo; private DataInfo dataInfo;
private Long lastupdatetimestamp; private Long lastupdatetimestamp;
public List<KeyValue> getCollectedfrom() {
return collectedfrom;
}
public void setCollectedfrom(List<KeyValue> collectedfrom) {
this.collectedfrom = collectedfrom;
}
public DataInfo getDataInfo() { public DataInfo getDataInfo() {
return dataInfo; return dataInfo;
} }
@ -25,24 +36,18 @@ public abstract class Oaf implements Serializable {
this.lastupdatetimestamp = lastupdatetimestamp; this.lastupdatetimestamp = lastupdatetimestamp;
} }
public void mergeOAFDataInfo(Oaf e) { public void mergeOAFDataInfo(Oaf e) {
if (e.getDataInfo()!= null && compareTrust(this,e)<0) if (e.getDataInfo() != null && compareTrust(this, e) < 0) dataInfo = e.getDataInfo();
dataInfo = e.getDataInfo();
} }
protected String extractTrust(Oaf e) { protected String extractTrust(Oaf e) {
if (e == null || e.getDataInfo()== null || e.getDataInfo().getTrust()== null) if (e == null || e.getDataInfo() == null || e.getDataInfo().getTrust() == null)
return "0.0"; return "0.0";
return e.getDataInfo().getTrust(); return e.getDataInfo().getTrust();
} }
protected int compareTrust(Oaf a, Oaf b) { protected int compareTrust(Oaf a, Oaf b) {
return extractTrust(a).compareTo(extractTrust(b)); return extractTrust(a).compareTo(extractTrust(b));
} }
@Override @Override
@ -50,8 +55,8 @@ public abstract class Oaf implements Serializable {
if (this == o) return true; if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false; if (o == null || getClass() != o.getClass()) return false;
Oaf oaf = (Oaf) o; Oaf oaf = (Oaf) o;
return Objects.equals(dataInfo, oaf.dataInfo) && return Objects.equals(dataInfo, oaf.dataInfo)
Objects.equals(lastupdatetimestamp, oaf.lastupdatetimestamp); && Objects.equals(lastupdatetimestamp, oaf.lastupdatetimestamp);
} }
@Override @Override

View File

@ -10,8 +10,6 @@ public abstract class OafEntity extends Oaf implements Serializable {
private List<String> originalId; private List<String> originalId;
private List<KeyValue> collectedfrom;
private List<StructuredProperty> pid; private List<StructuredProperty> pid;
private String dateofcollection; private String dateofcollection;
@ -38,14 +36,6 @@ public abstract class OafEntity extends Oaf implements Serializable {
this.originalId = originalId; this.originalId = originalId;
} }
public List<KeyValue> getCollectedfrom() {
return collectedfrom;
}
public void setCollectedfrom(List<KeyValue> collectedfrom) {
this.collectedfrom = collectedfrom;
}
public List<StructuredProperty> getPid() { public List<StructuredProperty> getPid() {
return pid; return pid;
} }
@ -86,11 +76,9 @@ public abstract class OafEntity extends Oaf implements Serializable {
this.oaiprovenance = oaiprovenance; this.oaiprovenance = oaiprovenance;
} }
public void mergeFrom(OafEntity e) { public void mergeFrom(OafEntity e) {
if (e == null) if (e == null) return;
return;
originalId = mergeLists(originalId, e.getOriginalId()); originalId = mergeLists(originalId, e.getOriginalId());
@ -108,12 +96,15 @@ public abstract class OafEntity extends Oaf implements Serializable {
if (e.getOaiprovenance() != null && compareTrust(this, e) < 0) if (e.getOaiprovenance() != null && compareTrust(this, e) < 0)
oaiprovenance = e.getOaiprovenance(); oaiprovenance = e.getOaiprovenance();
} }
protected <T> List<T> mergeLists(final List<T>... lists) { protected <T> List<T> mergeLists(final List<T>... lists) {
return Arrays.stream(lists).filter(Objects::nonNull).flatMap(List::stream).distinct().collect(Collectors.toList()); return Arrays.stream(lists)
.filter(Objects::nonNull)
.flatMap(List::stream)
.distinct()
.collect(Collectors.toList());
} }
@Override @Override
@ -122,18 +113,27 @@ public abstract class OafEntity extends Oaf implements Serializable {
if (o == null || getClass() != o.getClass()) return false; if (o == null || getClass() != o.getClass()) return false;
if (!super.equals(o)) return false; if (!super.equals(o)) return false;
OafEntity oafEntity = (OafEntity) o; OafEntity oafEntity = (OafEntity) o;
return Objects.equals(id, oafEntity.id) && return Objects.equals(id, oafEntity.id)
Objects.equals(originalId, oafEntity.originalId) && && Objects.equals(originalId, oafEntity.originalId)
Objects.equals(collectedfrom, oafEntity.collectedfrom) && && Objects.equals(collectedfrom, oafEntity.collectedfrom)
Objects.equals(pid, oafEntity.pid) && && Objects.equals(pid, oafEntity.pid)
Objects.equals(dateofcollection, oafEntity.dateofcollection) && && Objects.equals(dateofcollection, oafEntity.dateofcollection)
Objects.equals(dateoftransformation, oafEntity.dateoftransformation) && && Objects.equals(dateoftransformation, oafEntity.dateoftransformation)
Objects.equals(extraInfo, oafEntity.extraInfo) && && Objects.equals(extraInfo, oafEntity.extraInfo)
Objects.equals(oaiprovenance, oafEntity.oaiprovenance); && Objects.equals(oaiprovenance, oafEntity.oaiprovenance);
} }
@Override @Override
public int hashCode() { public int hashCode() {
return Objects.hash(super.hashCode(), id, originalId, collectedfrom, pid, dateofcollection, dateoftransformation, extraInfo, oaiprovenance); return Objects.hash(
super.hashCode(),
id,
originalId,
collectedfrom,
pid,
dateofcollection,
dateoftransformation,
extraInfo,
oaiprovenance);
} }
} }

View File

@ -122,7 +122,8 @@ public class Organization extends OafEntity implements Serializable {
return ecinternationalorganizationeurinterests; return ecinternationalorganizationeurinterests;
} }
public void setEcinternationalorganizationeurinterests(Field<String> ecinternationalorganizationeurinterests) { public void setEcinternationalorganizationeurinterests(
Field<String> ecinternationalorganizationeurinterests) {
this.ecinternationalorganizationeurinterests = ecinternationalorganizationeurinterests; this.ecinternationalorganizationeurinterests = ecinternationalorganizationeurinterests;
} }
@ -166,32 +167,70 @@ public class Organization extends OafEntity implements Serializable {
this.country = country; this.country = country;
} }
@Override @Override
public void mergeFrom(OafEntity e) { public void mergeFrom(OafEntity e) {
super.mergeFrom(e); super.mergeFrom(e);
if (!Organization.class.isAssignableFrom(e.getClass())){ if (!Organization.class.isAssignableFrom(e.getClass())) {
return; return;
} }
final Organization o = (Organization) e; final Organization o = (Organization) e;
legalshortname = o.getLegalshortname() != null && compareTrust(this, e)<0? o.getLegalshortname() : legalshortname; legalshortname =
legalname = o.getLegalname() != null && compareTrust(this, e)<0 ? o.getLegalname() : legalname; o.getLegalshortname() != null && compareTrust(this, e) < 0
? o.getLegalshortname()
: legalshortname;
legalname =
o.getLegalname() != null && compareTrust(this, e) < 0
? o.getLegalname()
: legalname;
alternativeNames = mergeLists(o.getAlternativeNames(), alternativeNames); alternativeNames = mergeLists(o.getAlternativeNames(), alternativeNames);
websiteurl = o.getWebsiteurl() != null && compareTrust(this, e)<0? o.getWebsiteurl() : websiteurl; websiteurl =
logourl = o.getLogourl() != null && compareTrust(this, e)<0? o.getLogourl() : logourl; o.getWebsiteurl() != null && compareTrust(this, e) < 0
eclegalbody = o.getEclegalbody() != null && compareTrust(this, e)<0? o.getEclegalbody() : eclegalbody; ? o.getWebsiteurl()
eclegalperson = o.getEclegalperson() != null && compareTrust(this, e)<0? o.getEclegalperson() : eclegalperson; : websiteurl;
ecnonprofit = o.getEcnonprofit() != null && compareTrust(this, e)<0? o.getEcnonprofit() : ecnonprofit; logourl = o.getLogourl() != null && compareTrust(this, e) < 0 ? o.getLogourl() : logourl;
ecresearchorganization = o.getEcresearchorganization() != null && compareTrust(this, e)<0? o.getEcresearchorganization() : ecresearchorganization; eclegalbody =
echighereducation = o.getEchighereducation() != null && compareTrust(this, e)<0? o.getEchighereducation() : echighereducation; o.getEclegalbody() != null && compareTrust(this, e) < 0
ecinternationalorganizationeurinterests = o.getEcinternationalorganizationeurinterests() != null && compareTrust(this, e)<0? o.getEcinternationalorganizationeurinterests() : ecinternationalorganizationeurinterests; ? o.getEclegalbody()
ecinternationalorganization = o.getEcinternationalorganization() != null && compareTrust(this, e)<0? o.getEcinternationalorganization() : ecinternationalorganization; : eclegalbody;
ecenterprise = o.getEcenterprise() != null && compareTrust(this, e)<0? o.getEcenterprise() :ecenterprise; eclegalperson =
ecsmevalidated = o.getEcsmevalidated() != null && compareTrust(this, e)<0? o.getEcsmevalidated() :ecsmevalidated; o.getEclegalperson() != null && compareTrust(this, e) < 0
ecnutscode = o.getEcnutscode() != null && compareTrust(this, e)<0? o.getEcnutscode() :ecnutscode; ? o.getEclegalperson()
country = o.getCountry() != null && compareTrust(this, e)<0 ? o.getCountry() :country; : eclegalperson;
ecnonprofit =
o.getEcnonprofit() != null && compareTrust(this, e) < 0
? o.getEcnonprofit()
: ecnonprofit;
ecresearchorganization =
o.getEcresearchorganization() != null && compareTrust(this, e) < 0
? o.getEcresearchorganization()
: ecresearchorganization;
echighereducation =
o.getEchighereducation() != null && compareTrust(this, e) < 0
? o.getEchighereducation()
: echighereducation;
ecinternationalorganizationeurinterests =
o.getEcinternationalorganizationeurinterests() != null && compareTrust(this, e) < 0
? o.getEcinternationalorganizationeurinterests()
: ecinternationalorganizationeurinterests;
ecinternationalorganization =
o.getEcinternationalorganization() != null && compareTrust(this, e) < 0
? o.getEcinternationalorganization()
: ecinternationalorganization;
ecenterprise =
o.getEcenterprise() != null && compareTrust(this, e) < 0
? o.getEcenterprise()
: ecenterprise;
ecsmevalidated =
o.getEcsmevalidated() != null && compareTrust(this, e) < 0
? o.getEcsmevalidated()
: ecsmevalidated;
ecnutscode =
o.getEcnutscode() != null && compareTrust(this, e) < 0
? o.getEcnutscode()
: ecnutscode;
country = o.getCountry() != null && compareTrust(this, e) < 0 ? o.getCountry() : country;
mergeOAFDataInfo(o); mergeOAFDataInfo(o);
} }
@ -201,26 +240,45 @@ public class Organization extends OafEntity implements Serializable {
if (o == null || getClass() != o.getClass()) return false; if (o == null || getClass() != o.getClass()) return false;
if (!super.equals(o)) return false; if (!super.equals(o)) return false;
Organization that = (Organization) o; Organization that = (Organization) o;
return Objects.equals(legalshortname, that.legalshortname) && return Objects.equals(legalshortname, that.legalshortname)
Objects.equals(legalname, that.legalname) && && Objects.equals(legalname, that.legalname)
Objects.equals(alternativeNames, that.alternativeNames) && && Objects.equals(alternativeNames, that.alternativeNames)
Objects.equals(websiteurl, that.websiteurl) && && Objects.equals(websiteurl, that.websiteurl)
Objects.equals(logourl, that.logourl) && && Objects.equals(logourl, that.logourl)
Objects.equals(eclegalbody, that.eclegalbody) && && Objects.equals(eclegalbody, that.eclegalbody)
Objects.equals(eclegalperson, that.eclegalperson) && && Objects.equals(eclegalperson, that.eclegalperson)
Objects.equals(ecnonprofit, that.ecnonprofit) && && Objects.equals(ecnonprofit, that.ecnonprofit)
Objects.equals(ecresearchorganization, that.ecresearchorganization) && && Objects.equals(ecresearchorganization, that.ecresearchorganization)
Objects.equals(echighereducation, that.echighereducation) && && Objects.equals(echighereducation, that.echighereducation)
Objects.equals(ecinternationalorganizationeurinterests, that.ecinternationalorganizationeurinterests) && && Objects.equals(
Objects.equals(ecinternationalorganization, that.ecinternationalorganization) && ecinternationalorganizationeurinterests,
Objects.equals(ecenterprise, that.ecenterprise) && that.ecinternationalorganizationeurinterests)
Objects.equals(ecsmevalidated, that.ecsmevalidated) && && Objects.equals(ecinternationalorganization, that.ecinternationalorganization)
Objects.equals(ecnutscode, that.ecnutscode) && && Objects.equals(ecenterprise, that.ecenterprise)
Objects.equals(country, that.country); && Objects.equals(ecsmevalidated, that.ecsmevalidated)
&& Objects.equals(ecnutscode, that.ecnutscode)
&& Objects.equals(country, that.country);
} }
@Override @Override
public int hashCode() { public int hashCode() {
return Objects.hash(super.hashCode(), legalshortname, legalname, alternativeNames, websiteurl, logourl, eclegalbody, eclegalperson, ecnonprofit, ecresearchorganization, echighereducation, ecinternationalorganizationeurinterests, ecinternationalorganization, ecenterprise, ecsmevalidated, ecnutscode, country); return Objects.hash(
super.hashCode(),
legalshortname,
legalname,
alternativeNames,
websiteurl,
logourl,
eclegalbody,
eclegalperson,
ecnonprofit,
ecresearchorganization,
echighereducation,
ecinternationalorganizationeurinterests,
ecinternationalorganization,
ecenterprise,
ecsmevalidated,
ecnutscode,
country);
} }
} }

View File

@ -70,16 +70,17 @@ public class OriginDescription implements Serializable {
if (this == o) return true; if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false; if (o == null || getClass() != o.getClass()) return false;
OriginDescription that = (OriginDescription) o; OriginDescription that = (OriginDescription) o;
return Objects.equals(harvestDate, that.harvestDate) && return Objects.equals(harvestDate, that.harvestDate)
Objects.equals(altered, that.altered) && && Objects.equals(altered, that.altered)
Objects.equals(baseURL, that.baseURL) && && Objects.equals(baseURL, that.baseURL)
Objects.equals(identifier, that.identifier) && && Objects.equals(identifier, that.identifier)
Objects.equals(datestamp, that.datestamp) && && Objects.equals(datestamp, that.datestamp)
Objects.equals(metadataNamespace, that.metadataNamespace); && Objects.equals(metadataNamespace, that.metadataNamespace);
} }
@Override @Override
public int hashCode() { public int hashCode() {
return Objects.hash(harvestDate, altered, baseURL, identifier, datestamp, metadataNamespace); return Objects.hash(
harvestDate, altered, baseURL, identifier, datestamp, metadataNamespace);
} }
} }

View File

@ -40,11 +40,11 @@ public class OtherResearchProduct extends Result implements Serializable {
public void mergeFrom(OafEntity e) { public void mergeFrom(OafEntity e) {
super.mergeFrom(e); super.mergeFrom(e);
if (!OtherResearchProduct.class.isAssignableFrom(e.getClass())){ if (!OtherResearchProduct.class.isAssignableFrom(e.getClass())) {
return; return;
} }
OtherResearchProduct o = (OtherResearchProduct)e; OtherResearchProduct o = (OtherResearchProduct) e;
contactperson = mergeLists(contactperson, o.getContactperson()); contactperson = mergeLists(contactperson, o.getContactperson());
contactgroup = mergeLists(contactgroup, o.getContactgroup()); contactgroup = mergeLists(contactgroup, o.getContactgroup());
@ -58,9 +58,9 @@ public class OtherResearchProduct extends Result implements Serializable {
if (o == null || getClass() != o.getClass()) return false; if (o == null || getClass() != o.getClass()) return false;
if (!super.equals(o)) return false; if (!super.equals(o)) return false;
OtherResearchProduct that = (OtherResearchProduct) o; OtherResearchProduct that = (OtherResearchProduct) o;
return Objects.equals(contactperson, that.contactperson) && return Objects.equals(contactperson, that.contactperson)
Objects.equals(contactgroup, that.contactgroup) && && Objects.equals(contactgroup, that.contactgroup)
Objects.equals(tool, that.tool); && Objects.equals(tool, that.tool);
} }
@Override @Override

View File

@ -266,44 +266,91 @@ public class Project extends OafEntity implements Serializable {
this.fundedamount = fundedamount; this.fundedamount = fundedamount;
} }
@Override @Override
public void mergeFrom(OafEntity e) { public void mergeFrom(OafEntity e) {
super.mergeFrom(e); super.mergeFrom(e);
if (!Project.class.isAssignableFrom(e.getClass())){ if (!Project.class.isAssignableFrom(e.getClass())) {
return; return;
} }
Project p = (Project)e; Project p = (Project) e;
websiteurl= p.getWebsiteurl()!= null && compareTrust(this,e)<0?p.getWebsiteurl():websiteurl; websiteurl =
code= p.getCode()!=null && compareTrust(this,e)<0?p.getCode():code; p.getWebsiteurl() != null && compareTrust(this, e) < 0
acronym= p.getAcronym()!= null && compareTrust(this,e)<0?p.getAcronym():acronym; ? p.getWebsiteurl()
title= p.getTitle()!= null && compareTrust(this,e)<0?p.getTitle():title; : websiteurl;
startdate= p.getStartdate()!=null && compareTrust(this,e)<0?p.getStartdate():startdate; code = p.getCode() != null && compareTrust(this, e) < 0 ? p.getCode() : code;
enddate= p.getEnddate()!=null && compareTrust(this,e)<0?p.getEnddate():enddate; acronym = p.getAcronym() != null && compareTrust(this, e) < 0 ? p.getAcronym() : acronym;
callidentifier= p.getCallidentifier()!=null && compareTrust(this,e)<0?p.getCallidentifier():callidentifier; title = p.getTitle() != null && compareTrust(this, e) < 0 ? p.getTitle() : title;
keywords= p.getKeywords()!=null && compareTrust(this,e)<0?p.getKeywords():keywords; startdate =
duration= p.getDuration()!=null && compareTrust(this,e)<0?p.getDuration():duration; p.getStartdate() != null && compareTrust(this, e) < 0
ecsc39= p.getEcsc39()!=null && compareTrust(this,e)<0?p.getEcsc39():ecsc39; ? p.getStartdate()
oamandatepublications= p.getOamandatepublications()!=null && compareTrust(this,e)<0?p.getOamandatepublications():oamandatepublications; : startdate;
ecarticle29_3= p.getEcarticle29_3()!=null && compareTrust(this,e)<0?p.getEcarticle29_3():ecarticle29_3; enddate = p.getEnddate() != null && compareTrust(this, e) < 0 ? p.getEnddate() : enddate;
subjects= mergeLists(subjects, p.getSubjects()); callidentifier =
fundingtree= mergeLists(fundingtree, p.getFundingtree()); p.getCallidentifier() != null && compareTrust(this, e) < 0
contracttype= p.getContracttype()!=null && compareTrust(this,e)<0?p.getContracttype():contracttype; ? p.getCallidentifier()
optional1= p.getOptional1()!=null && compareTrust(this,e)<0?p.getOptional1():optional1; : callidentifier;
optional2= p.getOptional2()!=null && compareTrust(this,e)<0?p.getOptional2():optional2; keywords =
jsonextrainfo= p.getJsonextrainfo()!=null && compareTrust(this,e)<0?p.getJsonextrainfo():jsonextrainfo; p.getKeywords() != null && compareTrust(this, e) < 0 ? p.getKeywords() : keywords;
contactfullname= p.getContactfullname()!=null && compareTrust(this,e)<0?p.getContactfullname():contactfullname; duration =
contactfax= p.getContactfax()!=null && compareTrust(this,e)<0?p.getContactfax():contactfax; p.getDuration() != null && compareTrust(this, e) < 0 ? p.getDuration() : duration;
contactphone= p.getContactphone()!=null && compareTrust(this,e)<0?p.getContactphone():contactphone; ecsc39 = p.getEcsc39() != null && compareTrust(this, e) < 0 ? p.getEcsc39() : ecsc39;
contactemail= p.getContactemail()!=null && compareTrust(this,e)<0?p.getContactemail():contactemail; oamandatepublications =
summary= p.getSummary()!=null && compareTrust(this,e)<0?p.getSummary():summary; p.getOamandatepublications() != null && compareTrust(this, e) < 0
currency= p.getCurrency()!=null && compareTrust(this,e)<0?p.getCurrency():currency; ? p.getOamandatepublications()
totalcost= p.getTotalcost()!=null && compareTrust(this,e)<0?p.getTotalcost():totalcost; : oamandatepublications;
fundedamount= p.getFundedamount()!= null && compareTrust(this,e)<0?p.getFundedamount():fundedamount; ecarticle29_3 =
mergeOAFDataInfo(e); p.getEcarticle29_3() != null && compareTrust(this, e) < 0
? p.getEcarticle29_3()
: ecarticle29_3;
subjects = mergeLists(subjects, p.getSubjects());
fundingtree = mergeLists(fundingtree, p.getFundingtree());
contracttype =
p.getContracttype() != null && compareTrust(this, e) < 0
? p.getContracttype()
: contracttype;
optional1 =
p.getOptional1() != null && compareTrust(this, e) < 0
? p.getOptional1()
: optional1;
optional2 =
p.getOptional2() != null && compareTrust(this, e) < 0
? p.getOptional2()
: optional2;
jsonextrainfo =
p.getJsonextrainfo() != null && compareTrust(this, e) < 0
? p.getJsonextrainfo()
: jsonextrainfo;
contactfullname =
p.getContactfullname() != null && compareTrust(this, e) < 0
? p.getContactfullname()
: contactfullname;
contactfax =
p.getContactfax() != null && compareTrust(this, e) < 0
? p.getContactfax()
: contactfax;
contactphone =
p.getContactphone() != null && compareTrust(this, e) < 0
? p.getContactphone()
: contactphone;
contactemail =
p.getContactemail() != null && compareTrust(this, e) < 0
? p.getContactemail()
: contactemail;
summary = p.getSummary() != null && compareTrust(this, e) < 0 ? p.getSummary() : summary;
currency =
p.getCurrency() != null && compareTrust(this, e) < 0 ? p.getCurrency() : currency;
totalcost =
p.getTotalcost() != null && compareTrust(this, e) < 0
? p.getTotalcost()
: totalcost;
fundedamount =
p.getFundedamount() != null && compareTrust(this, e) < 0
? p.getFundedamount()
: fundedamount;
mergeOAFDataInfo(e);
} }
@Override @Override
@ -312,36 +359,63 @@ public class Project extends OafEntity implements Serializable {
if (o == null || getClass() != o.getClass()) return false; if (o == null || getClass() != o.getClass()) return false;
if (!super.equals(o)) return false; if (!super.equals(o)) return false;
Project project = (Project) o; Project project = (Project) o;
return Objects.equals(websiteurl, project.websiteurl) && return Objects.equals(websiteurl, project.websiteurl)
Objects.equals(code, project.code) && && Objects.equals(code, project.code)
Objects.equals(acronym, project.acronym) && && Objects.equals(acronym, project.acronym)
Objects.equals(title, project.title) && && Objects.equals(title, project.title)
Objects.equals(startdate, project.startdate) && && Objects.equals(startdate, project.startdate)
Objects.equals(enddate, project.enddate) && && Objects.equals(enddate, project.enddate)
Objects.equals(callidentifier, project.callidentifier) && && Objects.equals(callidentifier, project.callidentifier)
Objects.equals(keywords, project.keywords) && && Objects.equals(keywords, project.keywords)
Objects.equals(duration, project.duration) && && Objects.equals(duration, project.duration)
Objects.equals(ecsc39, project.ecsc39) && && Objects.equals(ecsc39, project.ecsc39)
Objects.equals(oamandatepublications, project.oamandatepublications) && && Objects.equals(oamandatepublications, project.oamandatepublications)
Objects.equals(ecarticle29_3, project.ecarticle29_3) && && Objects.equals(ecarticle29_3, project.ecarticle29_3)
Objects.equals(subjects, project.subjects) && && Objects.equals(subjects, project.subjects)
Objects.equals(fundingtree, project.fundingtree) && && Objects.equals(fundingtree, project.fundingtree)
Objects.equals(contracttype, project.contracttype) && && Objects.equals(contracttype, project.contracttype)
Objects.equals(optional1, project.optional1) && && Objects.equals(optional1, project.optional1)
Objects.equals(optional2, project.optional2) && && Objects.equals(optional2, project.optional2)
Objects.equals(jsonextrainfo, project.jsonextrainfo) && && Objects.equals(jsonextrainfo, project.jsonextrainfo)
Objects.equals(contactfullname, project.contactfullname) && && Objects.equals(contactfullname, project.contactfullname)
Objects.equals(contactfax, project.contactfax) && && Objects.equals(contactfax, project.contactfax)
Objects.equals(contactphone, project.contactphone) && && Objects.equals(contactphone, project.contactphone)
Objects.equals(contactemail, project.contactemail) && && Objects.equals(contactemail, project.contactemail)
Objects.equals(summary, project.summary) && && Objects.equals(summary, project.summary)
Objects.equals(currency, project.currency) && && Objects.equals(currency, project.currency)
Objects.equals(totalcost, project.totalcost) && && Objects.equals(totalcost, project.totalcost)
Objects.equals(fundedamount, project.fundedamount); && Objects.equals(fundedamount, project.fundedamount);
} }
@Override @Override
public int hashCode() { public int hashCode() {
return Objects.hash(super.hashCode(), websiteurl, code, acronym, title, startdate, enddate, callidentifier, keywords, duration, ecsc39, oamandatepublications, ecarticle29_3, subjects, fundingtree, contracttype, optional1, optional2, jsonextrainfo, contactfullname, contactfax, contactphone, contactemail, summary, currency, totalcost, fundedamount); return Objects.hash(
super.hashCode(),
websiteurl,
code,
acronym,
title,
startdate,
enddate,
callidentifier,
keywords,
duration,
ecsc39,
oamandatepublications,
ecarticle29_3,
subjects,
fundingtree,
contracttype,
optional1,
optional2,
jsonextrainfo,
contactfullname,
contactfax,
contactphone,
contactemail,
summary,
currency,
totalcost,
fundedamount);
} }
} }

View File

@ -20,14 +20,13 @@ public class Publication extends Result implements Serializable {
public void mergeFrom(OafEntity e) { public void mergeFrom(OafEntity e) {
super.mergeFrom(e); super.mergeFrom(e);
if (!Publication.class.isAssignableFrom(e.getClass())){ if (!Publication.class.isAssignableFrom(e.getClass())) {
return; return;
} }
Publication p = (Publication) e; Publication p = (Publication) e;
if (p.getJournal() != null && compareTrust(this, e)<0) if (p.getJournal() != null && compareTrust(this, e) < 0) journal = p.getJournal();
journal = p.getJournal();
mergeOAFDataInfo(e); mergeOAFDataInfo(e);
} }

View File

@ -1,9 +1,8 @@
package eu.dnetlib.dhp.schema.oaf; package eu.dnetlib.dhp.schema.oaf;
import com.fasterxml.jackson.annotation.JsonIgnore; import com.fasterxml.jackson.annotation.JsonIgnore;
import org.apache.commons.lang3.StringUtils;
import java.io.Serializable; import java.io.Serializable;
import org.apache.commons.lang3.StringUtils;
public class Qualifier implements Serializable { public class Qualifier implements Serializable {
@ -45,20 +44,24 @@ public class Qualifier implements Serializable {
} }
public String toComparableString() { public String toComparableString() {
return isBlank()?"": String.format("%s::%s::%s::%s", return isBlank()
classid != null ? classid : "", ? ""
classname != null ? classname : "", : String.format(
schemeid != null ? schemeid : "", "%s::%s::%s::%s",
schemename != null ? schemename : ""); classid != null ? classid : "",
classname != null ? classname : "",
schemeid != null ? schemeid : "",
schemename != null ? schemename : "");
} }
@JsonIgnore @JsonIgnore
public boolean isBlank() { public boolean isBlank() {
return StringUtils.isBlank(classid) && return StringUtils.isBlank(classid)
StringUtils.isBlank(classname) && && StringUtils.isBlank(classname)
StringUtils.isBlank(schemeid) && && StringUtils.isBlank(schemeid)
StringUtils.isBlank(schemename); && StringUtils.isBlank(schemename);
} }
@Override @Override
public int hashCode() { public int hashCode() {
return toComparableString().hashCode(); return toComparableString().hashCode();
@ -66,16 +69,12 @@ public class Qualifier implements Serializable {
@Override @Override
public boolean equals(Object obj) { public boolean equals(Object obj) {
if (this == obj) if (this == obj) return true;
return true; if (obj == null) return false;
if (obj == null) if (getClass() != obj.getClass()) return false;
return false;
if (getClass() != obj.getClass())
return false;
Qualifier other = (Qualifier) obj; Qualifier other = (Qualifier) obj;
return toComparableString() return toComparableString().equals(other.toComparableString());
.equals(other.toComparableString());
} }
} }

View File

@ -1,104 +1,98 @@
package eu.dnetlib.dhp.schema.oaf; package eu.dnetlib.dhp.schema.oaf;
import static com.google.common.base.Preconditions.checkArgument;
import java.util.*; import java.util.*;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import java.util.stream.Stream; import java.util.stream.Stream;
import static com.google.common.base.Preconditions.checkArgument;
public class Relation extends Oaf { public class Relation extends Oaf {
private String relType; private String relType;
private String subRelType; private String subRelType;
private String relClass; private String relClass;
private String source; private String source;
private String target; private String target;
private List<KeyValue> collectedFrom = new ArrayList<>(); public String getRelType() {
return relType;
}
public String getRelType() { public void setRelType(final String relType) {
return relType; this.relType = relType;
} }
public void setRelType(final String relType) { public String getSubRelType() {
this.relType = relType; return subRelType;
} }
public String getSubRelType() { public void setSubRelType(final String subRelType) {
return subRelType; this.subRelType = subRelType;
} }
public void setSubRelType(final String subRelType) { public String getRelClass() {
this.subRelType = subRelType; return relClass;
} }
public String getRelClass() { public void setRelClass(final String relClass) {
return relClass; this.relClass = relClass;
} }
public void setRelClass(final String relClass) { public String getSource() {
this.relClass = relClass; return source;
} }
public String getSource() { public void setSource(final String source) {
return source; this.source = source;
} }
public void setSource(final String source) { public String getTarget() {
this.source = source; return target;
} }
public String getTarget() { public void setTarget(final String target) {
return target; this.target = target;
} }
public void setTarget(final String target) { public void mergeFrom(final Relation r) {
this.target = target;
}
public List<KeyValue> getCollectedFrom() { checkArgument(Objects.equals(getSource(), r.getSource()), "source ids must be equal");
return collectedFrom; checkArgument(Objects.equals(getTarget(), r.getTarget()), "target ids must be equal");
} checkArgument(Objects.equals(getRelType(), r.getRelType()), "relType(s) must be equal");
checkArgument(
Objects.equals(getSubRelType(), r.getSubRelType()), "subRelType(s) must be equal");
checkArgument(Objects.equals(getRelClass(), r.getRelClass()), "relClass(es) must be equal");
public void setCollectedFrom(final List<KeyValue> collectedFrom) { setCollectedfrom(
this.collectedFrom = collectedFrom; Stream.concat(
} Optional.ofNullable(getCollectedfrom())
.map(Collection::stream)
.orElse(Stream.empty()),
Optional.ofNullable(r.getCollectedfrom())
.map(Collection::stream)
.orElse(Stream.empty()))
.distinct() // relies on KeyValue.equals
.collect(Collectors.toList()));
}
public void mergeFrom(final Relation r) { @Override
public boolean equals(Object o) {
checkArgument(Objects.equals(getSource(), r.getSource()),"source ids must be equal"); if (this == o) return true;
checkArgument(Objects.equals(getTarget(), r.getTarget()),"target ids must be equal"); if (o == null || getClass() != o.getClass()) return false;
checkArgument(Objects.equals(getRelType(), r.getRelType()),"relType(s) must be equal"); Relation relation = (Relation) o;
checkArgument(Objects.equals(getSubRelType(), r.getSubRelType()),"subRelType(s) must be equal"); return relType.equals(relation.relType)
checkArgument(Objects.equals(getRelClass(), r.getRelClass()),"relClass(es) must be equal"); && subRelType.equals(relation.subRelType)
&& relClass.equals(relation.relClass)
setCollectedFrom( && source.equals(relation.source)
Stream && target.equals(relation.target);
.concat(Optional.ofNullable(getCollectedFrom()).map(Collection::stream).orElse(Stream.empty()), }
Optional.ofNullable(r.getCollectedFrom()).map(Collection::stream).orElse(Stream.empty()))
.distinct() // relies on KeyValue.equals
.collect(Collectors.toList()));
}
@Override
public boolean equals(Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
Relation relation = (Relation) o;
return relType.equals(relation.relType) &&
subRelType.equals(relation.subRelType) &&
relClass.equals(relation.relClass) &&
source.equals(relation.source) &&
target.equals(relation.target);
}
@Override
public int hashCode() {
return Objects.hash(relType, subRelType, relClass, source, target, collectedFrom);
}
@Override
public int hashCode() {
return Objects.hash(relType, subRelType, relClass, source, target, collectedfrom);
}
} }

View File

@ -223,7 +223,7 @@ public class Result extends OafEntity implements Serializable {
public void mergeFrom(OafEntity e) { public void mergeFrom(OafEntity e) {
super.mergeFrom(e); super.mergeFrom(e);
if (!Result.class.isAssignableFrom(e.getClass())){ if (!Result.class.isAssignableFrom(e.getClass())) {
return; return;
} }
@ -231,11 +231,9 @@ public class Result extends OafEntity implements Serializable {
instance = mergeLists(instance, r.getInstance()); instance = mergeLists(instance, r.getInstance());
if (r.getResulttype() != null && compareTrust(this, r) < 0) if (r.getResulttype() != null && compareTrust(this, r) < 0) resulttype = r.getResulttype();
resulttype = r.getResulttype();
if (r.getLanguage() != null && compareTrust(this, r) < 0) if (r.getLanguage() != null && compareTrust(this, r) < 0) language = r.getLanguage();
language = r.getLanguage();
country = mergeLists(country, r.getCountry()); country = mergeLists(country, r.getCountry());
@ -247,8 +245,7 @@ public class Result extends OafEntity implements Serializable {
description = longestLists(description, r.getDescription()); description = longestLists(description, r.getDescription());
if (r.getPublisher() != null && compareTrust(this, r) < 0) if (r.getPublisher() != null && compareTrust(this, r) < 0) publisher = r.getPublisher();
publisher = r.getPublisher();
if (r.getEmbargoenddate() != null && compareTrust(this, r) < 0) if (r.getEmbargoenddate() != null && compareTrust(this, r) < 0)
embargoenddate = r.getEmbargoenddate(); embargoenddate = r.getEmbargoenddate();
@ -261,8 +258,7 @@ public class Result extends OafEntity implements Serializable {
contributor = mergeLists(contributor, r.getContributor()); contributor = mergeLists(contributor, r.getContributor());
if (r.getResourcetype() != null) if (r.getResourcetype() != null) resourcetype = r.getResourcetype();
resourcetype = r.getResourcetype();
coverage = mergeLists(coverage, r.getCoverage()); coverage = mergeLists(coverage, r.getCoverage());
@ -271,13 +267,21 @@ public class Result extends OafEntity implements Serializable {
externalReference = mergeLists(externalReference, r.getExternalReference()); externalReference = mergeLists(externalReference, r.getExternalReference());
} }
private List<Field<String>> longestLists(List<Field<String>> a, List<Field<String>> b) { private List<Field<String>> longestLists(List<Field<String>> a, List<Field<String>> b) {
if (a == null || b == null) if (a == null || b == null) return a == null ? b : a;
return a == null ? b : a;
if (a.size() == b.size()) { if (a.size() == b.size()) {
int msa = a.stream().filter(i -> i.getValue() != null).map(i -> i.getValue().length()).max(Comparator.naturalOrder()).orElse(0); int msa =
int msb = b.stream().filter(i -> i.getValue() != null).map(i -> i.getValue().length()).max(Comparator.naturalOrder()).orElse(0); a.stream()
.filter(i -> i.getValue() != null)
.map(i -> i.getValue().length())
.max(Comparator.naturalOrder())
.orElse(0);
int msb =
b.stream()
.filter(i -> i.getValue() != null)
.map(i -> i.getValue().length())
.max(Comparator.naturalOrder())
.orElse(0);
return msa > msb ? a : b; return msa > msb ? a : b;
} }
return a.size() > b.size() ? a : b; return a.size() > b.size() ? a : b;
@ -289,31 +293,53 @@ public class Result extends OafEntity implements Serializable {
if (o == null || getClass() != o.getClass()) return false; if (o == null || getClass() != o.getClass()) return false;
if (!super.equals(o)) return false; if (!super.equals(o)) return false;
Result result = (Result) o; Result result = (Result) o;
return Objects.equals(author, result.author) && return Objects.equals(author, result.author)
Objects.equals(resulttype, result.resulttype) && && Objects.equals(resulttype, result.resulttype)
Objects.equals(language, result.language) && && Objects.equals(language, result.language)
Objects.equals(country, result.country) && && Objects.equals(country, result.country)
Objects.equals(subject, result.subject) && && Objects.equals(subject, result.subject)
Objects.equals(title, result.title) && && Objects.equals(title, result.title)
Objects.equals(relevantdate, result.relevantdate) && && Objects.equals(relevantdate, result.relevantdate)
Objects.equals(description, result.description) && && Objects.equals(description, result.description)
Objects.equals(dateofacceptance, result.dateofacceptance) && && Objects.equals(dateofacceptance, result.dateofacceptance)
Objects.equals(publisher, result.publisher) && && Objects.equals(publisher, result.publisher)
Objects.equals(embargoenddate, result.embargoenddate) && && Objects.equals(embargoenddate, result.embargoenddate)
Objects.equals(source, result.source) && && Objects.equals(source, result.source)
Objects.equals(fulltext, result.fulltext) && && Objects.equals(fulltext, result.fulltext)
Objects.equals(format, result.format) && && Objects.equals(format, result.format)
Objects.equals(contributor, result.contributor) && && Objects.equals(contributor, result.contributor)
Objects.equals(resourcetype, result.resourcetype) && && Objects.equals(resourcetype, result.resourcetype)
Objects.equals(coverage, result.coverage) && && Objects.equals(coverage, result.coverage)
Objects.equals(bestaccessright, result.bestaccessright) && && Objects.equals(bestaccessright, result.bestaccessright)
Objects.equals(context, result.context) && && Objects.equals(context, result.context)
Objects.equals(externalReference, result.externalReference) && && Objects.equals(externalReference, result.externalReference)
Objects.equals(instance, result.instance); && Objects.equals(instance, result.instance);
} }
@Override @Override
public int hashCode() { public int hashCode() {
return Objects.hash(super.hashCode(), author, resulttype, language, country, subject, title, relevantdate, description, dateofacceptance, publisher, embargoenddate, source, fulltext, format, contributor, resourcetype, coverage, bestaccessright, context, externalReference, instance); return Objects.hash(
super.hashCode(),
author,
resulttype,
language,
country,
subject,
title,
relevantdate,
description,
dateofacceptance,
publisher,
embargoenddate,
source,
fulltext,
format,
contributor,
resourcetype,
coverage,
bestaccessright,
context,
externalReference,
instance);
} }
} }

View File

@ -50,7 +50,7 @@ public class Software extends Result implements Serializable {
public void mergeFrom(OafEntity e) { public void mergeFrom(OafEntity e) {
super.mergeFrom(e); super.mergeFrom(e);
if (!Software.class.isAssignableFrom(e.getClass())){ if (!Software.class.isAssignableFrom(e.getClass())) {
return; return;
} }
@ -59,9 +59,15 @@ public class Software extends Result implements Serializable {
license = mergeLists(license, s.getLicense()); license = mergeLists(license, s.getLicense());
codeRepositoryUrl = s.getCodeRepositoryUrl()!= null && compareTrust(this, s)<0?s.getCodeRepositoryUrl():codeRepositoryUrl; codeRepositoryUrl =
s.getCodeRepositoryUrl() != null && compareTrust(this, s) < 0
? s.getCodeRepositoryUrl()
: codeRepositoryUrl;
programmingLanguage= s.getProgrammingLanguage()!= null && compareTrust(this, s)<0?s.getProgrammingLanguage():programmingLanguage; programmingLanguage =
s.getProgrammingLanguage() != null && compareTrust(this, s) < 0
? s.getProgrammingLanguage()
: programmingLanguage;
mergeOAFDataInfo(e); mergeOAFDataInfo(e);
} }
@ -72,14 +78,19 @@ public class Software extends Result implements Serializable {
if (o == null || getClass() != o.getClass()) return false; if (o == null || getClass() != o.getClass()) return false;
if (!super.equals(o)) return false; if (!super.equals(o)) return false;
Software software = (Software) o; Software software = (Software) o;
return Objects.equals(documentationUrl, software.documentationUrl) && return Objects.equals(documentationUrl, software.documentationUrl)
Objects.equals(license, software.license) && && Objects.equals(license, software.license)
Objects.equals(codeRepositoryUrl, software.codeRepositoryUrl) && && Objects.equals(codeRepositoryUrl, software.codeRepositoryUrl)
Objects.equals(programmingLanguage, software.programmingLanguage); && Objects.equals(programmingLanguage, software.programmingLanguage);
} }
@Override @Override
public int hashCode() { public int hashCode() {
return Objects.hash(super.hashCode(), documentationUrl, license, codeRepositoryUrl, programmingLanguage); return Objects.hash(
super.hashCode(),
documentationUrl,
license,
codeRepositoryUrl,
programmingLanguage);
} }
} }

View File

@ -2,7 +2,7 @@ package eu.dnetlib.dhp.schema.oaf;
import java.io.Serializable; import java.io.Serializable;
public class StructuredProperty implements Serializable { public class StructuredProperty implements Serializable {
private String value; private String value;
@ -34,8 +34,8 @@ public class StructuredProperty implements Serializable {
this.dataInfo = dataInfo; this.dataInfo = dataInfo;
} }
public String toComparableString(){ public String toComparableString() {
return value != null ? value.toLowerCase() : ""; return value != null ? value.toLowerCase() : "";
} }
@Override @Override
@ -45,16 +45,12 @@ public class StructuredProperty implements Serializable {
@Override @Override
public boolean equals(Object obj) { public boolean equals(Object obj) {
if (this == obj) if (this == obj) return true;
return true; if (obj == null) return false;
if (obj == null) if (getClass() != obj.getClass()) return false;
return false;
if (getClass() != obj.getClass())
return false;
StructuredProperty other = (StructuredProperty) obj; StructuredProperty other = (StructuredProperty) obj;
return toComparableString() return toComparableString().equals(other.toComparableString());
.equals(other.toComparableString());
} }
} }

View File

@ -2,12 +2,11 @@ package eu.dnetlib.dhp.schema.scholexplorer;
import eu.dnetlib.dhp.schema.oaf.Dataset; import eu.dnetlib.dhp.schema.oaf.Dataset;
import eu.dnetlib.dhp.schema.oaf.OafEntity; import eu.dnetlib.dhp.schema.oaf.OafEntity;
import org.apache.commons.lang3.StringUtils;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.HashMap; import java.util.HashMap;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import org.apache.commons.lang3.StringUtils;
public class DLIDataset extends Dataset { public class DLIDataset extends Dataset {
@ -47,33 +46,45 @@ public class DLIDataset extends Dataset {
DLIDataset p = (DLIDataset) e; DLIDataset p = (DLIDataset) e;
if (StringUtils.isBlank(completionStatus) && StringUtils.isNotBlank(p.completionStatus)) if (StringUtils.isBlank(completionStatus) && StringUtils.isNotBlank(p.completionStatus))
completionStatus = p.completionStatus; completionStatus = p.completionStatus;
if ("complete".equalsIgnoreCase(p.completionStatus)) if ("complete".equalsIgnoreCase(p.completionStatus)) completionStatus = "complete";
completionStatus = "complete";
dlicollectedfrom = mergeProvenance(dlicollectedfrom, p.getDlicollectedfrom()); dlicollectedfrom = mergeProvenance(dlicollectedfrom, p.getDlicollectedfrom());
} }
private List<ProvenaceInfo> mergeProvenance(final List<ProvenaceInfo> a, final List<ProvenaceInfo> b) { private List<ProvenaceInfo> mergeProvenance(
final List<ProvenaceInfo> a, final List<ProvenaceInfo> b) {
Map<String, ProvenaceInfo> result = new HashMap<>(); Map<String, ProvenaceInfo> result = new HashMap<>();
if (a != null) if (a != null)
a.forEach(p -> { a.forEach(
if (p != null && StringUtils.isNotBlank(p.getId()) && result.containsKey(p.getId())) { p -> {
if ("incomplete".equalsIgnoreCase(result.get(p.getId()).getCompletionStatus()) && StringUtils.isNotBlank(p.getCompletionStatus())) { if (p != null
result.put(p.getId(), p); && StringUtils.isNotBlank(p.getId())
} && result.containsKey(p.getId())) {
if ("incomplete"
.equalsIgnoreCase(
result.get(p.getId()).getCompletionStatus())
&& StringUtils.isNotBlank(p.getCompletionStatus())) {
result.put(p.getId(), p);
}
} else if (p != null && p.getId() != null && !result.containsKey(p.getId())) } else if (p != null && p.getId() != null && !result.containsKey(p.getId()))
result.put(p.getId(), p); result.put(p.getId(), p);
}); });
if (b != null) if (b != null)
b.forEach(p -> { b.forEach(
if (p != null && StringUtils.isNotBlank(p.getId()) && result.containsKey(p.getId())) { p -> {
if ("incomplete".equalsIgnoreCase(result.get(p.getId()).getCompletionStatus()) && StringUtils.isNotBlank(p.getCompletionStatus())) { if (p != null
result.put(p.getId(), p); && StringUtils.isNotBlank(p.getId())
} && result.containsKey(p.getId())) {
if ("incomplete"
.equalsIgnoreCase(
result.get(p.getId()).getCompletionStatus())
&& StringUtils.isNotBlank(p.getCompletionStatus())) {
result.put(p.getId(), p);
}
} else if (p != null && p.getId() != null && !result.containsKey(p.getId())) } else if (p != null && p.getId() != null && !result.containsKey(p.getId()))
result.put(p.getId(), p); result.put(p.getId(), p);
}); });
return new ArrayList<>(result.values()); return new ArrayList<>(result.values());
} }

View File

@ -2,9 +2,9 @@ package eu.dnetlib.dhp.schema.scholexplorer;
import eu.dnetlib.dhp.schema.oaf.OafEntity; import eu.dnetlib.dhp.schema.oaf.OafEntity;
import eu.dnetlib.dhp.schema.oaf.Publication; import eu.dnetlib.dhp.schema.oaf.Publication;
import org.apache.commons.lang3.StringUtils;
import java.io.Serializable; import java.io.Serializable;
import java.util.*; import java.util.*;
import org.apache.commons.lang3.StringUtils;
public class DLIPublication extends Publication implements Serializable { public class DLIPublication extends Publication implements Serializable {
@ -44,33 +44,45 @@ public class DLIPublication extends Publication implements Serializable {
DLIPublication p = (DLIPublication) e; DLIPublication p = (DLIPublication) e;
if (StringUtils.isBlank(completionStatus) && StringUtils.isNotBlank(p.completionStatus)) if (StringUtils.isBlank(completionStatus) && StringUtils.isNotBlank(p.completionStatus))
completionStatus = p.completionStatus; completionStatus = p.completionStatus;
if ("complete".equalsIgnoreCase(p.completionStatus)) if ("complete".equalsIgnoreCase(p.completionStatus)) completionStatus = "complete";
completionStatus = "complete";
dlicollectedfrom = mergeProvenance(dlicollectedfrom, p.getDlicollectedfrom()); dlicollectedfrom = mergeProvenance(dlicollectedfrom, p.getDlicollectedfrom());
} }
private List<ProvenaceInfo> mergeProvenance(final List<ProvenaceInfo> a, final List<ProvenaceInfo> b) { private List<ProvenaceInfo> mergeProvenance(
final List<ProvenaceInfo> a, final List<ProvenaceInfo> b) {
Map<String, ProvenaceInfo> result = new HashMap<>(); Map<String, ProvenaceInfo> result = new HashMap<>();
if (a != null) if (a != null)
a.forEach(p -> { a.forEach(
if (p != null && StringUtils.isNotBlank(p.getId()) && result.containsKey(p.getId())) { p -> {
if ("incomplete".equalsIgnoreCase(result.get(p.getId()).getCompletionStatus()) && StringUtils.isNotBlank(p.getCompletionStatus())) { if (p != null
result.put(p.getId(), p); && StringUtils.isNotBlank(p.getId())
} && result.containsKey(p.getId())) {
if ("incomplete"
.equalsIgnoreCase(
result.get(p.getId()).getCompletionStatus())
&& StringUtils.isNotBlank(p.getCompletionStatus())) {
result.put(p.getId(), p);
}
} else if (p != null && p.getId() != null && !result.containsKey(p.getId())) } else if (p != null && p.getId() != null && !result.containsKey(p.getId()))
result.put(p.getId(), p); result.put(p.getId(), p);
}); });
if (b != null) if (b != null)
b.forEach(p -> { b.forEach(
if (p != null && StringUtils.isNotBlank(p.getId()) && result.containsKey(p.getId())) { p -> {
if ("incomplete".equalsIgnoreCase(result.get(p.getId()).getCompletionStatus()) && StringUtils.isNotBlank(p.getCompletionStatus())) { if (p != null
result.put(p.getId(), p); && StringUtils.isNotBlank(p.getId())
} && result.containsKey(p.getId())) {
if ("incomplete"
.equalsIgnoreCase(
result.get(p.getId()).getCompletionStatus())
&& StringUtils.isNotBlank(p.getCompletionStatus())) {
result.put(p.getId(), p);
}
} else if (p != null && p.getId() != null && !result.containsKey(p.getId())) } else if (p != null && p.getId() != null && !result.containsKey(p.getId()))
result.put(p.getId(), p); result.put(p.getId(), p);
}); });
return new ArrayList<>(result.values()); return new ArrayList<>(result.values());
} }

View File

@ -1,15 +1,13 @@
package eu.dnetlib.dhp.schema.scholexplorer; package eu.dnetlib.dhp.schema.scholexplorer;
import eu.dnetlib.dhp.schema.oaf.Oaf; import eu.dnetlib.dhp.schema.oaf.Oaf;
import eu.dnetlib.dhp.schema.oaf.OafEntity;
import eu.dnetlib.dhp.schema.oaf.StructuredProperty; import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
import org.apache.commons.lang3.StringUtils;
import java.io.Serializable; import java.io.Serializable;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.HashMap; import java.util.HashMap;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import org.apache.commons.lang3.StringUtils;
public class DLIUnknown extends Oaf implements Serializable { public class DLIUnknown extends Oaf implements Serializable {
@ -49,7 +47,6 @@ public class DLIUnknown extends Oaf implements Serializable {
this.id = id; this.id = id;
} }
public List<StructuredProperty> getPid() { public List<StructuredProperty> getPid() {
return pid; return pid;
} }
@ -75,33 +72,45 @@ public class DLIUnknown extends Oaf implements Serializable {
} }
public void mergeFrom(DLIUnknown p) { public void mergeFrom(DLIUnknown p) {
if ("complete".equalsIgnoreCase(p.completionStatus)) if ("complete".equalsIgnoreCase(p.completionStatus)) completionStatus = "complete";
completionStatus = "complete";
dlicollectedfrom = mergeProvenance(dlicollectedfrom, p.getDlicollectedfrom()); dlicollectedfrom = mergeProvenance(dlicollectedfrom, p.getDlicollectedfrom());
} }
private List<ProvenaceInfo> mergeProvenance(final List<ProvenaceInfo> a, final List<ProvenaceInfo> b) { private List<ProvenaceInfo> mergeProvenance(
final List<ProvenaceInfo> a, final List<ProvenaceInfo> b) {
Map<String, ProvenaceInfo> result = new HashMap<>(); Map<String, ProvenaceInfo> result = new HashMap<>();
if (a != null) if (a != null)
a.forEach(p -> { a.forEach(
if (p != null && StringUtils.isNotBlank(p.getId()) && result.containsKey(p.getId())) { p -> {
if ("incomplete".equalsIgnoreCase(result.get(p.getId()).getCompletionStatus()) && StringUtils.isNotBlank(p.getCompletionStatus())) { if (p != null
result.put(p.getId(), p); && StringUtils.isNotBlank(p.getId())
} && result.containsKey(p.getId())) {
if ("incomplete"
.equalsIgnoreCase(
result.get(p.getId()).getCompletionStatus())
&& StringUtils.isNotBlank(p.getCompletionStatus())) {
result.put(p.getId(), p);
}
} else if (p != null && p.getId() != null && !result.containsKey(p.getId())) } else if (p != null && p.getId() != null && !result.containsKey(p.getId()))
result.put(p.getId(), p); result.put(p.getId(), p);
}); });
if (b != null) if (b != null)
b.forEach(p -> { b.forEach(
if (p != null && StringUtils.isNotBlank(p.getId()) && result.containsKey(p.getId())) { p -> {
if ("incomplete".equalsIgnoreCase(result.get(p.getId()).getCompletionStatus()) && StringUtils.isNotBlank(p.getCompletionStatus())) { if (p != null
result.put(p.getId(), p); && StringUtils.isNotBlank(p.getId())
} && result.containsKey(p.getId())) {
if ("incomplete"
.equalsIgnoreCase(
result.get(p.getId()).getCompletionStatus())
&& StringUtils.isNotBlank(p.getCompletionStatus())) {
result.put(p.getId(), p);
}
} else if (p != null && p.getId() != null && !result.containsKey(p.getId())) } else if (p != null && p.getId() != null && !result.containsKey(p.getId()))
result.put(p.getId(), p); result.put(p.getId(), p);
}); });
return new ArrayList<>(result.values()); return new ArrayList<>(result.values());
} }

View File

@ -10,7 +10,7 @@ public class ProvenaceInfo implements Serializable {
private String completionStatus; private String completionStatus;
private String collectionMode ="collected"; private String collectionMode = "collected";
public String getId() { public String getId() {
return id; return id;

View File

@ -1,18 +1,14 @@
package eu.dnetlib.dhp.schema.action; package eu.dnetlib.dhp.schema.action;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.schema.oaf.Relation;
import org.apache.commons.lang3.StringUtils;
import org.junit.jupiter.api.Test;
import java.io.IOException;
import static org.junit.jupiter.api.Assertions.*; import static org.junit.jupiter.api.Assertions.*;
/** import com.fasterxml.jackson.databind.ObjectMapper;
* @author claudio.atzori import eu.dnetlib.dhp.schema.oaf.Relation;
*/ import java.io.IOException;
import org.apache.commons.lang3.StringUtils;
import org.junit.jupiter.api.Test;
/** @author claudio.atzori */
public class AtomicActionTest { public class AtomicActionTest {
@Test @Test
@ -36,7 +32,5 @@ public class AtomicActionTest {
assertEquals(aa1.getClazz(), aa2.getClazz()); assertEquals(aa1.getClazz(), aa2.getClazz());
assertEquals(aa1.getPayload(), aa2.getPayload()); assertEquals(aa1.getPayload(), aa2.getPayload());
} }
} }

View File

@ -1,14 +1,14 @@
package eu.dnetlib.dhp.schema.common; package eu.dnetlib.dhp.schema.common;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertTrue;
import eu.dnetlib.dhp.schema.oaf.OafEntity; import eu.dnetlib.dhp.schema.oaf.OafEntity;
import eu.dnetlib.dhp.schema.oaf.Relation; import eu.dnetlib.dhp.schema.oaf.Relation;
import eu.dnetlib.dhp.schema.oaf.Result; import eu.dnetlib.dhp.schema.oaf.Result;
import org.junit.jupiter.api.Nested; import org.junit.jupiter.api.Nested;
import org.junit.jupiter.api.Test; import org.junit.jupiter.api.Test;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertTrue;
public class ModelSupportTest { public class ModelSupportTest {
@Nested @Nested

View File

@ -1,11 +1,11 @@
package eu.dnetlib.dhp.schema.oaf; package eu.dnetlib.dhp.schema.oaf;
import static org.junit.jupiter.api.Assertions.*; import static org.junit.jupiter.api.Assertions.*;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import java.util.Arrays; import java.util.Arrays;
import java.util.List; import java.util.List;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
public class MergeTest { public class MergeTest {
@ -19,7 +19,7 @@ public class MergeTest {
@Test @Test
public void mergeListsTest() { public void mergeListsTest() {
//string list merge test // string list merge test
List<String> a = Arrays.asList("a", "b", "c", "e"); List<String> a = Arrays.asList("a", "b", "c", "e");
List<String> b = Arrays.asList("a", "b", "c", "d"); List<String> b = Arrays.asList("a", "b", "c", "d");
List<String> c = null; List<String> c = null;
@ -44,7 +44,6 @@ public class MergeTest {
assertNotNull(a.getCollectedfrom()); assertNotNull(a.getCollectedfrom());
assertEquals(3, a.getCollectedfrom().size()); assertEquals(3, a.getCollectedfrom().size());
} }
@Test @Test
@ -60,7 +59,6 @@ public class MergeTest {
assertNotNull(a.getSubject()); assertNotNull(a.getSubject());
assertEquals(3, a.getSubject().size()); assertEquals(3, a.getSubject().size());
} }
private KeyValue setKV(final String key, final String value) { private KeyValue setKV(final String key, final String value) {
@ -73,7 +71,8 @@ public class MergeTest {
return k; return k;
} }
private StructuredProperty setSP(final String value, final String schema, final String classname) { private StructuredProperty setSP(
final String value, final String schema, final String classname) {
StructuredProperty s = new StructuredProperty(); StructuredProperty s = new StructuredProperty();
s.setValue(value); s.setValue(value);
Qualifier q = new Qualifier(); Qualifier q = new Qualifier();

View File

@ -6,48 +6,44 @@ import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.SerializationFeature; import com.fasterxml.jackson.databind.SerializationFeature;
import eu.dnetlib.dhp.schema.oaf.Qualifier; import eu.dnetlib.dhp.schema.oaf.Qualifier;
import eu.dnetlib.dhp.schema.oaf.StructuredProperty; import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
import org.junit.jupiter.api.Test;
import java.io.IOException; import java.io.IOException;
import java.util.Arrays; import java.util.Arrays;
import java.util.Collections; import java.util.Collections;
import org.junit.jupiter.api.Test;
public class DLItest { public class DLItest {
@Test @Test
public void testMergePublication() throws JsonProcessingException { public void testMergePublication() throws JsonProcessingException {
DLIPublication a1 = new DLIPublication(); DLIPublication a1 = new DLIPublication();
a1.setPid(Arrays.asList( createSP("123456","pdb","dnet:pid_types"))); a1.setPid(Arrays.asList(createSP("123456", "pdb", "dnet:pid_types")));
a1.setTitle(Collections.singletonList(createSP("Un Titolo", "title", "dnetTitle"))); a1.setTitle(Collections.singletonList(createSP("Un Titolo", "title", "dnetTitle")));
a1.setDlicollectedfrom(Arrays.asList(createCollectedFrom("znd","Zenodo","complete"))); a1.setDlicollectedfrom(Arrays.asList(createCollectedFrom("znd", "Zenodo", "complete")));
a1.setCompletionStatus("complete"); a1.setCompletionStatus("complete");
DLIPublication a = new DLIPublication(); DLIPublication a = new DLIPublication();
a.setPid(Arrays.asList(createSP("10.11","doi","dnet:pid_types"), createSP("123456","pdb","dnet:pid_types"))); a.setPid(
Arrays.asList(
createSP("10.11", "doi", "dnet:pid_types"),
createSP("123456", "pdb", "dnet:pid_types")));
a.setTitle(Collections.singletonList(createSP("A Title", "title", "dnetTitle"))); a.setTitle(Collections.singletonList(createSP("A Title", "title", "dnetTitle")));
a.setDlicollectedfrom(Arrays.asList(createCollectedFrom("dct","datacite","complete"),createCollectedFrom("dct","datacite","incomplete"))); a.setDlicollectedfrom(
Arrays.asList(
createCollectedFrom("dct", "datacite", "complete"),
createCollectedFrom("dct", "datacite", "incomplete")));
a.setCompletionStatus("incomplete"); a.setCompletionStatus("incomplete");
a.mergeFrom(a1); a.mergeFrom(a1);
ObjectMapper mapper = new ObjectMapper(); ObjectMapper mapper = new ObjectMapper();
System.out.println(mapper.writeValueAsString(a)); System.out.println(mapper.writeValueAsString(a));
} }
@Test @Test
public void testDeserialization() throws IOException { public void testDeserialization() throws IOException {
final String json ="{\"dataInfo\":{\"invisible\":false,\"inferred\":null,\"deletedbyinference\":false,\"trust\":\"0.9\",\"inferenceprovenance\":null,\"provenanceaction\":null},\"lastupdatetimestamp\":null,\"id\":\"60|bd9352547098929a394655ad1a44a479\",\"originalId\":[\"bd9352547098929a394655ad1a44a479\"],\"collectedfrom\":[{\"key\":\"dli_________::datacite\",\"value\":\"Datasets in Datacite\",\"dataInfo\":null,\"blank\":false}],\"pid\":[{\"value\":\"10.7925/DRS1.DUCHAS_5078760\",\"qualifier\":{\"classid\":\"doi\",\"classname\":\"doi\",\"schemeid\":\"dnet:pid_types\",\"schemename\":\"dnet:pid_types\",\"blank\":false},\"dataInfo\":null}],\"dateofcollection\":\"2020-01-09T08:29:31.885Z\",\"dateoftransformation\":null,\"extraInfo\":null,\"oaiprovenance\":null,\"author\":[{\"fullname\":\"Cathail, S. Ó\",\"name\":null,\"surname\":null,\"rank\":null,\"pid\":null,\"affiliation\":null},{\"fullname\":\"Donnell, Breda Mc\",\"name\":null,\"surname\":null,\"rank\":null,\"pid\":null,\"affiliation\":null},{\"fullname\":\"Ireland. Department of Arts, Culture, and the Gaeltacht\",\"name\":null,\"surname\":null,\"rank\":null,\"pid\":null,\"affiliation\":null},{\"fullname\":\"University College Dublin\",\"name\":null,\"surname\":null,\"rank\":null,\"pid\":null,\"affiliation\":null},{\"fullname\":\"National Folklore Foundation\",\"name\":null,\"surname\":null,\"rank\":null,\"pid\":null,\"affiliation\":null},{\"fullname\":\"Cathail, S. Ó\",\"name\":null,\"surname\":null,\"rank\":null,\"pid\":null,\"affiliation\":null},{\"fullname\":\"Donnell, Breda Mc\",\"name\":null,\"surname\":null,\"rank\":null,\"pid\":null,\"affiliation\":null}],\"resulttype\":null,\"language\":null,\"country\":null,\"subject\":[{\"value\":\"Recreation\",\"qualifier\":{\"classid\":\"dnet:subject\",\"classname\":\"dnet:subject\",\"schemeid\":\"unknown\",\"schemename\":\"unknown\",\"blank\":false},\"dataInfo\":null},{\"value\":\"Entertainments and recreational activities\",\"qualifier\":{\"classid\":\"dnet:subject\",\"classname\":\"dnet:subject\",\"schemeid\":\"unknown\",\"schemename\":\"unknown\",\"blank\":false},\"dataInfo\":null},{\"value\":\"Siamsaíocht agus caitheamh aimsire\",\"qualifier\":{\"classid\":\"dnet:subject\",\"classname\":\"dnet:subject\",\"schemeid\":\"unknown\",\"schemename\":\"unknown\",\"blank\":false},\"dataInfo\":null}],\"title\":[{\"value\":\"Games We Play\",\"qualifier\":null,\"dataInfo\":null}],\"relevantdate\":[{\"value\":\"1938-09-28\",\"qualifier\":{\"classid\":\"date\",\"classname\":\"date\",\"schemeid\":\"dnet::date\",\"schemename\":\"dnet::date\",\"blank\":false},\"dataInfo\":null}],\"description\":[{\"value\":\"Story collected by Breda Mc Donnell, a student at Tenure school (Tinure, Co. Louth) (no informant identified).\",\"dataInfo\":null}],\"dateofacceptance\":null,\"publisher\":{\"value\":\"University College Dublin\",\"dataInfo\":null},\"embargoenddate\":null,\"source\":null,\"fulltext\":null,\"format\":null,\"contributor\":null,\"resourcetype\":null,\"coverage\":null,\"refereed\":null,\"context\":null,\"processingchargeamount\":null,\"processingchargecurrency\":null,\"externalReference\":null,\"instance\":[],\"storagedate\":null,\"device\":null,\"size\":null,\"version\":null,\"lastmetadataupdate\":null,\"metadataversionnumber\":null,\"geolocation\":null,\"dlicollectedfrom\":[{\"id\":\"dli_________::datacite\",\"name\":\"Datasets in Datacite\",\"completionStatus\":\"complete\",\"collectionMode\":\"resolved\"}],\"completionStatus\":\"complete\"}"; final String json =
"{\"dataInfo\":{\"invisible\":false,\"inferred\":null,\"deletedbyinference\":false,\"trust\":\"0.9\",\"inferenceprovenance\":null,\"provenanceaction\":null},\"lastupdatetimestamp\":null,\"id\":\"60|bd9352547098929a394655ad1a44a479\",\"originalId\":[\"bd9352547098929a394655ad1a44a479\"],\"collectedfrom\":[{\"key\":\"dli_________::datacite\",\"value\":\"Datasets in Datacite\",\"dataInfo\":null,\"blank\":false}],\"pid\":[{\"value\":\"10.7925/DRS1.DUCHAS_5078760\",\"qualifier\":{\"classid\":\"doi\",\"classname\":\"doi\",\"schemeid\":\"dnet:pid_types\",\"schemename\":\"dnet:pid_types\",\"blank\":false},\"dataInfo\":null}],\"dateofcollection\":\"2020-01-09T08:29:31.885Z\",\"dateoftransformation\":null,\"extraInfo\":null,\"oaiprovenance\":null,\"author\":[{\"fullname\":\"Cathail, S. Ó\",\"name\":null,\"surname\":null,\"rank\":null,\"pid\":null,\"affiliation\":null},{\"fullname\":\"Donnell, Breda Mc\",\"name\":null,\"surname\":null,\"rank\":null,\"pid\":null,\"affiliation\":null},{\"fullname\":\"Ireland. Department of Arts, Culture, and the Gaeltacht\",\"name\":null,\"surname\":null,\"rank\":null,\"pid\":null,\"affiliation\":null},{\"fullname\":\"University College Dublin\",\"name\":null,\"surname\":null,\"rank\":null,\"pid\":null,\"affiliation\":null},{\"fullname\":\"National Folklore Foundation\",\"name\":null,\"surname\":null,\"rank\":null,\"pid\":null,\"affiliation\":null},{\"fullname\":\"Cathail, S. Ó\",\"name\":null,\"surname\":null,\"rank\":null,\"pid\":null,\"affiliation\":null},{\"fullname\":\"Donnell, Breda Mc\",\"name\":null,\"surname\":null,\"rank\":null,\"pid\":null,\"affiliation\":null}],\"resulttype\":null,\"language\":null,\"country\":null,\"subject\":[{\"value\":\"Recreation\",\"qualifier\":{\"classid\":\"dnet:subject\",\"classname\":\"dnet:subject\",\"schemeid\":\"unknown\",\"schemename\":\"unknown\",\"blank\":false},\"dataInfo\":null},{\"value\":\"Entertainments and recreational activities\",\"qualifier\":{\"classid\":\"dnet:subject\",\"classname\":\"dnet:subject\",\"schemeid\":\"unknown\",\"schemename\":\"unknown\",\"blank\":false},\"dataInfo\":null},{\"value\":\"Siamsaíocht agus caitheamh aimsire\",\"qualifier\":{\"classid\":\"dnet:subject\",\"classname\":\"dnet:subject\",\"schemeid\":\"unknown\",\"schemename\":\"unknown\",\"blank\":false},\"dataInfo\":null}],\"title\":[{\"value\":\"Games We Play\",\"qualifier\":null,\"dataInfo\":null}],\"relevantdate\":[{\"value\":\"1938-09-28\",\"qualifier\":{\"classid\":\"date\",\"classname\":\"date\",\"schemeid\":\"dnet::date\",\"schemename\":\"dnet::date\",\"blank\":false},\"dataInfo\":null}],\"description\":[{\"value\":\"Story collected by Breda Mc Donnell, a student at Tenure school (Tinure, Co. Louth) (no informant identified).\",\"dataInfo\":null}],\"dateofacceptance\":null,\"publisher\":{\"value\":\"University College Dublin\",\"dataInfo\":null},\"embargoenddate\":null,\"source\":null,\"fulltext\":null,\"format\":null,\"contributor\":null,\"resourcetype\":null,\"coverage\":null,\"refereed\":null,\"context\":null,\"processingchargeamount\":null,\"processingchargecurrency\":null,\"externalReference\":null,\"instance\":[],\"storagedate\":null,\"device\":null,\"size\":null,\"version\":null,\"lastmetadataupdate\":null,\"metadataversionnumber\":null,\"geolocation\":null,\"dlicollectedfrom\":[{\"id\":\"dli_________::datacite\",\"name\":\"Datasets in Datacite\",\"completionStatus\":\"complete\",\"collectionMode\":\"resolved\"}],\"completionStatus\":\"complete\"}";
ObjectMapper mapper = new ObjectMapper(); ObjectMapper mapper = new ObjectMapper();
mapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); mapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false);
@ -56,7 +52,8 @@ public class DLItest {
System.out.println(mapper.writeValueAsString(dliDataset)); System.out.println(mapper.writeValueAsString(dliDataset));
} }
private ProvenaceInfo createCollectedFrom(final String id, final String name, final String completionStatus) { private ProvenaceInfo createCollectedFrom(
final String id, final String name, final String completionStatus) {
ProvenaceInfo p = new ProvenaceInfo(); ProvenaceInfo p = new ProvenaceInfo();
p.setId(id); p.setId(id);
p.setName(name); p.setName(name);
@ -64,8 +61,8 @@ public class DLItest {
return p; return p;
} }
private StructuredProperty createSP(
private StructuredProperty createSP(final String value, final String className, final String schemeName) { final String value, final String className, final String schemeName) {
StructuredProperty p = new StructuredProperty(); StructuredProperty p = new StructuredProperty();
p.setValue(value); p.setValue(value);
Qualifier schema = new Qualifier(); Qualifier schema = new Qualifier();
@ -76,6 +73,4 @@ public class DLItest {
p.setQualifier(schema); p.setQualifier(schema);
return p; return p;
} }
} }

View File

@ -10,117 +10,130 @@ import eu.dnetlib.dhp.actionmanager.partition.PartitionActionSetsByPayloadTypeJo
import eu.dnetlib.dhp.utils.ISLookupClientFactory; import eu.dnetlib.dhp.utils.ISLookupClientFactory;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
import org.dom4j.Document;
import org.dom4j.Element;
import org.dom4j.io.SAXReader;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.Serializable; import java.io.Serializable;
import java.io.StringReader; import java.io.StringReader;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.List; import java.util.List;
import java.util.NoSuchElementException; import java.util.NoSuchElementException;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import org.dom4j.Document;
import org.dom4j.Element;
import org.dom4j.io.SAXReader;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class ISClient implements Serializable { public class ISClient implements Serializable {
private static final Logger log = LoggerFactory.getLogger(PartitionActionSetsByPayloadTypeJob.class); private static final Logger log =
LoggerFactory.getLogger(PartitionActionSetsByPayloadTypeJob.class);
private static final String INPUT_ACTION_SET_ID_SEPARATOR = ","; private static final String INPUT_ACTION_SET_ID_SEPARATOR = ",";
private ISLookUpService isLookup; private ISLookUpService isLookup;
public ISClient(String isLookupUrl) { public ISClient(String isLookupUrl) {
isLookup = ISLookupClientFactory.getLookUpService(isLookupUrl); isLookup = ISLookupClientFactory.getLookUpService(isLookupUrl);
} }
public List<String> getLatestRawsetPaths(String setIds) { public List<String> getLatestRawsetPaths(String setIds) {
List<String> ids = Lists.newArrayList(Splitter.on(INPUT_ACTION_SET_ID_SEPARATOR) List<String> ids =
.omitEmptyStrings() Lists.newArrayList(
.trimResults() Splitter.on(INPUT_ACTION_SET_ID_SEPARATOR)
.split(setIds)); .omitEmptyStrings()
.trimResults()
.split(setIds));
return ids.stream() return ids.stream()
.map(id -> getSet(isLookup, id)) .map(id -> getSet(isLookup, id))
.map(as -> as.getPathToLatest()) .map(as -> as.getPathToLatest())
.collect(Collectors.toCollection(ArrayList::new)); .collect(Collectors.toCollection(ArrayList::new));
} }
private ActionManagerSet getSet(ISLookUpService isLookup, final String setId) { private ActionManagerSet getSet(ISLookUpService isLookup, final String setId) {
final String q = "for $x in collection('/db/DRIVER/ActionManagerSetDSResources/ActionManagerSetDSResourceType') " final String q =
+ "where $x//SET/@id = '" + setId + "' return $x"; "for $x in collection('/db/DRIVER/ActionManagerSetDSResources/ActionManagerSetDSResourceType') "
+ "where $x//SET/@id = '"
+ setId
+ "' return $x";
try { try {
final String basePath = getBasePathHDFS(isLookup); final String basePath = getBasePathHDFS(isLookup);
final String setProfile = isLookup.getResourceProfileByQuery(q); final String setProfile = isLookup.getResourceProfileByQuery(q);
return getActionManagerSet(basePath, setProfile); return getActionManagerSet(basePath, setProfile);
} catch (ISLookUpException | ActionManagerException e) { } catch (ISLookUpException | ActionManagerException e) {
throw new RuntimeException("Error accessing Sets, using query: " + q); throw new RuntimeException("Error accessing Sets, using query: " + q);
} }
} }
private ActionManagerSet getActionManagerSet(final String basePath, final String profile) throws ActionManagerException { private ActionManagerSet getActionManagerSet(final String basePath, final String profile)
final SAXReader reader = new SAXReader(); throws ActionManagerException {
final ActionManagerSet set = new ActionManagerSet(); final SAXReader reader = new SAXReader();
final ActionManagerSet set = new ActionManagerSet();
try { try {
final Document doc = reader.read(new StringReader(profile)); final Document doc = reader.read(new StringReader(profile));
set.setId(doc.valueOf("//SET/@id").trim()); set.setId(doc.valueOf("//SET/@id").trim());
set.setName(doc.valueOf("//SET").trim()); set.setName(doc.valueOf("//SET").trim());
set.setImpact(ImpactTypes.valueOf(doc.valueOf("//IMPACT").trim())); set.setImpact(ImpactTypes.valueOf(doc.valueOf("//IMPACT").trim()));
set.setLatest(doc.valueOf("//RAW_SETS/LATEST/@id"), doc.valueOf("//RAW_SETS/LATEST/@creationDate"), doc.valueOf("//RAW_SETS/LATEST/@lastUpdate")); set.setLatest(
set.setDirectory(doc.valueOf("//SET/@directory")); doc.valueOf("//RAW_SETS/LATEST/@id"),
final List expiredNodes = doc.selectNodes("//RAW_SETS/EXPIRED"); doc.valueOf("//RAW_SETS/LATEST/@creationDate"),
if (expiredNodes != null) { doc.valueOf("//RAW_SETS/LATEST/@lastUpdate"));
for (int i = 0; i < expiredNodes.size(); i++) { set.setDirectory(doc.valueOf("//SET/@directory"));
Element ex = (Element) expiredNodes.get(i); final List expiredNodes = doc.selectNodes("//RAW_SETS/EXPIRED");
set.addExpired(ex.attributeValue("id"), ex.attributeValue("creationDate"), ex.attributeValue("lastUpdate")); if (expiredNodes != null) {
} for (int i = 0; i < expiredNodes.size(); i++) {
} Element ex = (Element) expiredNodes.get(i);
set.addExpired(
ex.attributeValue("id"),
ex.attributeValue("creationDate"),
ex.attributeValue("lastUpdate"));
}
}
final StringBuilder sb = new StringBuilder(); final StringBuilder sb = new StringBuilder();
sb.append(basePath); sb.append(basePath);
sb.append("/"); sb.append("/");
sb.append(doc.valueOf("//SET/@directory")); sb.append(doc.valueOf("//SET/@directory"));
sb.append("/"); sb.append("/");
sb.append(doc.valueOf("//RAW_SETS/LATEST/@id")); sb.append(doc.valueOf("//RAW_SETS/LATEST/@id"));
set.setPathToLatest(sb.toString()); set.setPathToLatest(sb.toString());
return set; return set;
} catch (Exception e) { } catch (Exception e) {
throw new ActionManagerException("Error creating set from profile: " + profile, e); throw new ActionManagerException("Error creating set from profile: " + profile, e);
} }
} }
private String getBasePathHDFS(ISLookUpService isLookup) throws ActionManagerException {
return queryServiceProperty(isLookup, "basePath");
}
private String queryServiceProperty(ISLookUpService isLookup, final String propertyName) throws ActionManagerException {
final String q = "for $x in /RESOURCE_PROFILE[.//RESOURCE_TYPE/@value='ActionManagerServiceResourceType'] return $x//SERVICE_PROPERTIES/PROPERTY[./@ key='"
+ propertyName + "']/@value/string()";
log.debug("quering for service property: " + q);
try {
final List<String> value = isLookup.quickSearchProfile(q);
return Iterables.getOnlyElement(value);
} catch (ISLookUpException e) {
String msg = "Error accessing service profile, using query: " + q;
log.error(msg, e);
throw new ActionManagerException(msg, e);
} catch (NoSuchElementException e) {
String msg = "missing service property: " + propertyName;
log.error(msg, e);
throw new ActionManagerException(msg, e);
} catch (IllegalArgumentException e) {
String msg = "found more than one service property: " + propertyName;
log.error(msg, e);
throw new ActionManagerException(msg, e);
}
}
private String getBasePathHDFS(ISLookUpService isLookup) throws ActionManagerException {
return queryServiceProperty(isLookup, "basePath");
}
private String queryServiceProperty(ISLookUpService isLookup, final String propertyName)
throws ActionManagerException {
final String q =
"for $x in /RESOURCE_PROFILE[.//RESOURCE_TYPE/@value='ActionManagerServiceResourceType'] return $x//SERVICE_PROPERTIES/PROPERTY[./@ key='"
+ propertyName
+ "']/@value/string()";
log.debug("quering for service property: " + q);
try {
final List<String> value = isLookup.quickSearchProfile(q);
return Iterables.getOnlyElement(value);
} catch (ISLookUpException e) {
String msg = "Error accessing service profile, using query: " + q;
log.error(msg, e);
throw new ActionManagerException(msg, e);
} catch (NoSuchElementException e) {
String msg = "missing service property: " + propertyName;
log.error(msg, e);
throw new ActionManagerException(msg, e);
} catch (IllegalArgumentException e) {
String msg = "found more than one service property: " + propertyName;
log.error(msg, e);
throw new ActionManagerException(msg, e);
}
}
} }

View File

@ -1,9 +1,15 @@
package eu.dnetlib.dhp.actionmanager.partition; package eu.dnetlib.dhp.actionmanager.partition;
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
import static org.apache.spark.sql.functions.*;
import eu.dnetlib.dhp.actionmanager.ISClient; import eu.dnetlib.dhp.actionmanager.ISClient;
import eu.dnetlib.dhp.common.HdfsSupport;
import eu.dnetlib.dhp.actionmanager.promote.PromoteActionPayloadForGraphTableJob; import eu.dnetlib.dhp.actionmanager.promote.PromoteActionPayloadForGraphTableJob;
import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.common.HdfsSupport;
import java.util.Arrays;
import java.util.List;
import java.util.Optional;
import org.apache.commons.io.IOUtils; import org.apache.commons.io.IOUtils;
import org.apache.hadoop.io.Text; import org.apache.hadoop.io.Text;
import org.apache.spark.SparkConf; import org.apache.spark.SparkConf;
@ -14,32 +20,27 @@ import org.apache.spark.sql.types.*;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import java.util.Arrays; /** Partitions given set of action sets by payload type. */
import java.util.Collections;
import java.util.List;
import java.util.Optional;
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
import static org.apache.spark.sql.functions.*;
/**
* Partitions given set of action sets by payload type.
*/
public class PartitionActionSetsByPayloadTypeJob { public class PartitionActionSetsByPayloadTypeJob {
private static final Logger logger = LoggerFactory.getLogger(PartitionActionSetsByPayloadTypeJob.class); private static final Logger logger =
LoggerFactory.getLogger(PartitionActionSetsByPayloadTypeJob.class);
private static final StructType KV_SCHEMA = StructType$.MODULE$.apply( private static final StructType KV_SCHEMA =
Arrays.asList( StructType$.MODULE$.apply(
StructField$.MODULE$.apply("key", DataTypes.StringType, false, Metadata.empty()), Arrays.asList(
StructField$.MODULE$.apply("value", DataTypes.StringType, false, Metadata.empty()) StructField$.MODULE$.apply(
)); "key", DataTypes.StringType, false, Metadata.empty()),
StructField$.MODULE$.apply(
"value", DataTypes.StringType, false, Metadata.empty())));
private static final StructType ATOMIC_ACTION_SCHEMA = StructType$.MODULE$.apply( private static final StructType ATOMIC_ACTION_SCHEMA =
Arrays.asList( StructType$.MODULE$.apply(
StructField$.MODULE$.apply("clazz", DataTypes.StringType, false, Metadata.empty()), Arrays.asList(
StructField$.MODULE$.apply("payload", DataTypes.StringType, false, Metadata.empty()) StructField$.MODULE$.apply(
)); "clazz", DataTypes.StringType, false, Metadata.empty()),
StructField$.MODULE$.apply(
"payload", DataTypes.StringType, false, Metadata.empty())));
private ISClient isClient; private ISClient isClient;
@ -47,20 +48,20 @@ public class PartitionActionSetsByPayloadTypeJob {
this.isClient = new ISClient(isLookupUrl); this.isClient = new ISClient(isLookupUrl);
} }
public PartitionActionSetsByPayloadTypeJob() { public PartitionActionSetsByPayloadTypeJob() {}
}
public static void main(String[] args) throws Exception { public static void main(String[] args) throws Exception {
String jsonConfiguration = IOUtils.toString( String jsonConfiguration =
PromoteActionPayloadForGraphTableJob.class IOUtils.toString(
.getResourceAsStream("/eu/dnetlib/dhp/actionmanager/partition/partition_action_sets_by_payload_type_input_parameters.json")); PromoteActionPayloadForGraphTableJob.class.getResourceAsStream(
"/eu/dnetlib/dhp/actionmanager/partition/partition_action_sets_by_payload_type_input_parameters.json"));
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
parser.parseArgument(args); parser.parseArgument(args);
Boolean isSparkSessionManaged = Optional Boolean isSparkSessionManaged =
.ofNullable(parser.get("isSparkSessionManaged")) Optional.ofNullable(parser.get("isSparkSessionManaged"))
.map(Boolean::valueOf) .map(Boolean::valueOf)
.orElse(Boolean.TRUE); .orElse(Boolean.TRUE);
logger.info("isSparkSessionManaged: {}", isSparkSessionManaged); logger.info("isSparkSessionManaged: {}", isSparkSessionManaged);
String inputActionSetIds = parser.get("inputActionSetIds"); String inputActionSetIds = parser.get("inputActionSetIds");
@ -72,7 +73,8 @@ public class PartitionActionSetsByPayloadTypeJob {
String isLookupUrl = parser.get("isLookupUrl"); String isLookupUrl = parser.get("isLookupUrl");
logger.info("isLookupUrl: {}", isLookupUrl); logger.info("isLookupUrl: {}", isLookupUrl);
new PartitionActionSetsByPayloadTypeJob(isLookupUrl).run(isSparkSessionManaged, inputActionSetIds, outputPath); new PartitionActionSetsByPayloadTypeJob(isLookupUrl)
.run(isSparkSessionManaged, inputActionSetIds, outputPath);
} }
protected void run(Boolean isSparkSessionManaged, String inputActionSetIds, String outputPath) { protected void run(Boolean isSparkSessionManaged, String inputActionSetIds, String outputPath) {
@ -83,51 +85,51 @@ public class PartitionActionSetsByPayloadTypeJob {
SparkConf conf = new SparkConf(); SparkConf conf = new SparkConf();
conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer"); conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer");
runWithSparkSession(conf, isSparkSessionManaged, runWithSparkSession(
conf,
isSparkSessionManaged,
spark -> { spark -> {
removeOutputDir(spark, outputPath); removeOutputDir(spark, outputPath);
readAndWriteActionSetsFromPaths(spark, inputActionSetPaths, outputPath); readAndWriteActionSetsFromPaths(spark, inputActionSetPaths, outputPath);
}); });
} }
private static void removeOutputDir(SparkSession spark, private static void removeOutputDir(SparkSession spark, String path) {
String path) {
HdfsSupport.remove(path, spark.sparkContext().hadoopConfiguration()); HdfsSupport.remove(path, spark.sparkContext().hadoopConfiguration());
} }
private static void readAndWriteActionSetsFromPaths(SparkSession spark, private static void readAndWriteActionSetsFromPaths(
List<String> inputActionSetPaths, SparkSession spark, List<String> inputActionSetPaths, String outputPath) {
String outputPath) { inputActionSetPaths.stream()
inputActionSetPaths .filter(
.forEach(inputActionSetPath -> { path ->
Dataset<Row> actionDS = readActionSetFromPath(spark, inputActionSetPath); HdfsSupport.exists(
saveActions(actionDS, outputPath); path, spark.sparkContext().hadoopConfiguration()))
}); .forEach(
inputActionSetPath -> {
Dataset<Row> actionDS =
readActionSetFromPath(spark, inputActionSetPath);
saveActions(actionDS, outputPath);
});
} }
private static Dataset<Row> readActionSetFromPath(SparkSession spark, private static Dataset<Row> readActionSetFromPath(SparkSession spark, String path) {
String path) {
logger.info("Reading actions from path: {}", path); logger.info("Reading actions from path: {}", path);
JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
JavaRDD<Row> rdd = sc JavaRDD<Row> rdd =
.sequenceFile(path, Text.class, Text.class) sc.sequenceFile(path, Text.class, Text.class)
.map(x -> RowFactory.create(x._1().toString(), x._2().toString())); .map(x -> RowFactory.create(x._1().toString(), x._2().toString()));
return spark.createDataFrame(rdd, KV_SCHEMA) return spark.createDataFrame(rdd, KV_SCHEMA)
.withColumn("atomic_action", from_json(col("value"), ATOMIC_ACTION_SCHEMA)) .withColumn("atomic_action", from_json(col("value"), ATOMIC_ACTION_SCHEMA))
.select(expr("atomic_action.*")); .select(expr("atomic_action.*"));
} }
private static void saveActions(Dataset<Row> actionDS, private static void saveActions(Dataset<Row> actionDS, String path) {
String path) {
logger.info("Saving actions to path: {}", path); logger.info("Saving actions to path: {}", path);
actionDS actionDS.write().partitionBy("clazz").mode(SaveMode.Append).parquet(path);
.write()
.partitionBy("clazz")
.mode(SaveMode.Append)
.parquet(path);
} }
public ISClient getIsClient() { public ISClient getIsClient() {

View File

@ -1,41 +1,39 @@
package eu.dnetlib.dhp.actionmanager.promote; package eu.dnetlib.dhp.actionmanager.promote;
import static eu.dnetlib.dhp.schema.common.ModelSupport.isSubClass;
import eu.dnetlib.dhp.common.FunctionalInterfaceSupport.SerializableSupplier; import eu.dnetlib.dhp.common.FunctionalInterfaceSupport.SerializableSupplier;
import eu.dnetlib.dhp.schema.oaf.Oaf; import eu.dnetlib.dhp.schema.oaf.Oaf;
import eu.dnetlib.dhp.schema.oaf.OafEntity; import eu.dnetlib.dhp.schema.oaf.OafEntity;
import eu.dnetlib.dhp.schema.oaf.Relation; import eu.dnetlib.dhp.schema.oaf.Relation;
import java.util.function.BiFunction; import java.util.function.BiFunction;
import static eu.dnetlib.dhp.schema.common.ModelSupport.isSubClass; /** OAF model merging support. */
/**
* OAF model merging support.
*/
public class MergeAndGet { public class MergeAndGet {
private MergeAndGet() { private MergeAndGet() {}
}
/** /**
* Strategy for merging OAF model objects. * Strategy for merging OAF model objects.
* <p> *
* MERGE_FROM_AND_GET: use OAF 'mergeFrom' method * <p>MERGE_FROM_AND_GET: use OAF 'mergeFrom' method SELECT_NEWER_AND_GET: use last update
* SELECT_NEWER_AND_GET: use last update timestamp to return newer instance * timestamp to return newer instance
*/ */
public enum Strategy { public enum Strategy {
MERGE_FROM_AND_GET, SELECT_NEWER_AND_GET MERGE_FROM_AND_GET,
SELECT_NEWER_AND_GET
} }
/** /**
* Returns a function for merging OAF model objects. * Returns a function for merging OAF model objects.
* *
* @param strategy Strategy to be used to merge objects * @param strategy Strategy to be used to merge objects
* @param <G> Graph table type * @param <G> Graph table type
* @param <A> Action payload type * @param <A> Action payload type
* @return BiFunction to be used to merge OAF objects * @return BiFunction to be used to merge OAF objects
*/ */
public static <G extends Oaf, A extends Oaf> SerializableSupplier<BiFunction<G, A, G>> functionFor(Strategy strategy) { public static <G extends Oaf, A extends Oaf>
SerializableSupplier<BiFunction<G, A, G>> functionFor(Strategy strategy) {
switch (strategy) { switch (strategy) {
case MERGE_FROM_AND_GET: case MERGE_FROM_AND_GET:
return () -> MergeAndGet::mergeFromAndGet; return () -> MergeAndGet::mergeFromAndGet;
@ -49,26 +47,36 @@ public class MergeAndGet {
if (isSubClass(x, Relation.class) && isSubClass(y, Relation.class)) { if (isSubClass(x, Relation.class) && isSubClass(y, Relation.class)) {
((Relation) x).mergeFrom((Relation) y); ((Relation) x).mergeFrom((Relation) y);
return x; return x;
} else if (isSubClass(x, OafEntity.class) && isSubClass(y, OafEntity.class) && isSubClass(x, y)) { } else if (isSubClass(x, OafEntity.class)
&& isSubClass(y, OafEntity.class)
&& isSubClass(x, y)) {
((OafEntity) x).mergeFrom((OafEntity) y); ((OafEntity) x).mergeFrom((OafEntity) y);
return x; return x;
} }
throw new RuntimeException(String.format("MERGE_FROM_AND_GET incompatible types: %s, %s", throw new RuntimeException(
x.getClass().getCanonicalName(), y.getClass().getCanonicalName())); String.format(
"MERGE_FROM_AND_GET incompatible types: %s, %s",
x.getClass().getCanonicalName(), y.getClass().getCanonicalName()));
} }
private static <G extends Oaf, A extends Oaf> G selectNewerAndGet(G x, A y) { private static <G extends Oaf, A extends Oaf> G selectNewerAndGet(G x, A y) {
if (x.getClass().equals(y.getClass()) && x.getLastupdatetimestamp() > y.getLastupdatetimestamp()) { if (x.getClass().equals(y.getClass())
&& x.getLastupdatetimestamp() > y.getLastupdatetimestamp()) {
return x; return x;
} else if (x.getClass().equals(y.getClass()) && x.getLastupdatetimestamp() < y.getLastupdatetimestamp()) { } else if (x.getClass().equals(y.getClass())
&& x.getLastupdatetimestamp() < y.getLastupdatetimestamp()) {
return (G) y; return (G) y;
} else if (isSubClass(x, y) && x.getLastupdatetimestamp() > y.getLastupdatetimestamp()) { } else if (isSubClass(x, y) && x.getLastupdatetimestamp() > y.getLastupdatetimestamp()) {
return x; return x;
} else if (isSubClass(x, y) && x.getLastupdatetimestamp() < y.getLastupdatetimestamp()) { } else if (isSubClass(x, y) && x.getLastupdatetimestamp() < y.getLastupdatetimestamp()) {
throw new RuntimeException(String.format("SELECT_NEWER_AND_GET cannot return right type when it is not the same as left type: %s, %s", throw new RuntimeException(
x.getClass().getCanonicalName(), y.getClass().getCanonicalName())); String.format(
"SELECT_NEWER_AND_GET cannot return right type when it is not the same as left type: %s, %s",
x.getClass().getCanonicalName(), y.getClass().getCanonicalName()));
} }
throw new RuntimeException(String.format("SELECT_NEWER_AND_GET cannot be used when left is not subtype of right: %s, %s", throw new RuntimeException(
x.getClass().getCanonicalName(), y.getClass().getCanonicalName())); String.format(
"SELECT_NEWER_AND_GET cannot be used when left is not subtype of right: %s, %s",
x.getClass().getCanonicalName(), y.getClass().getCanonicalName()));
} }
} }

View File

@ -1,14 +1,20 @@
package eu.dnetlib.dhp.actionmanager.promote; package eu.dnetlib.dhp.actionmanager.promote;
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
import static eu.dnetlib.dhp.schema.common.ModelSupport.isSubClass;
import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.common.FunctionalInterfaceSupport.SerializableSupplier; import eu.dnetlib.dhp.common.FunctionalInterfaceSupport.SerializableSupplier;
import eu.dnetlib.dhp.common.HdfsSupport; import eu.dnetlib.dhp.common.HdfsSupport;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.schema.common.ModelSupport; import eu.dnetlib.dhp.schema.common.ModelSupport;
import eu.dnetlib.dhp.schema.oaf.*; import eu.dnetlib.dhp.schema.oaf.*;
import java.util.Objects;
import java.util.Optional;
import java.util.function.BiFunction;
import java.util.function.Function;
import org.apache.commons.io.IOUtils; import org.apache.commons.io.IOUtils;
import org.apache.spark.SparkConf; import org.apache.spark.SparkConf;
import org.apache.spark.api.java.function.FilterFunction;
import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Encoders; import org.apache.spark.sql.Encoders;
@ -17,33 +23,25 @@ import org.apache.spark.sql.SparkSession;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import java.util.Objects; /** Applies a given action payload file to graph table of compatible type. */
import java.util.Optional;
import java.util.function.BiFunction;
import java.util.function.Function;
import static eu.dnetlib.dhp.schema.common.ModelSupport.isSubClass;
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
/**
* Applies a given action payload file to graph table of compatible type.
*/
public class PromoteActionPayloadForGraphTableJob { public class PromoteActionPayloadForGraphTableJob {
private static final Logger logger = LoggerFactory.getLogger(PromoteActionPayloadForGraphTableJob.class); private static final Logger logger =
LoggerFactory.getLogger(PromoteActionPayloadForGraphTableJob.class);
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
public static void main(String[] args) throws Exception { public static void main(String[] args) throws Exception {
String jsonConfiguration = IOUtils.toString( String jsonConfiguration =
PromoteActionPayloadForGraphTableJob.class IOUtils.toString(
.getResourceAsStream("/eu/dnetlib/dhp/actionmanager/promote/promote_action_payload_for_graph_table_input_parameters.json")); PromoteActionPayloadForGraphTableJob.class.getResourceAsStream(
"/eu/dnetlib/dhp/actionmanager/promote/promote_action_payload_for_graph_table_input_parameters.json"));
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
parser.parseArgument(args); parser.parseArgument(args);
Boolean isSparkSessionManaged = Optional Boolean isSparkSessionManaged =
.ofNullable(parser.get("isSparkSessionManaged")) Optional.ofNullable(parser.get("isSparkSessionManaged"))
.map(Boolean::valueOf) .map(Boolean::valueOf)
.orElse(Boolean.TRUE); .orElse(Boolean.TRUE);
logger.info("isSparkSessionManaged: {}", isSparkSessionManaged); logger.info("isSparkSessionManaged: {}", isSparkSessionManaged);
String inputGraphTablePath = parser.get("inputGraphTablePath"); String inputGraphTablePath = parser.get("inputGraphTablePath");
@ -61,11 +59,13 @@ public class PromoteActionPayloadForGraphTableJob {
String outputGraphTablePath = parser.get("outputGraphTablePath"); String outputGraphTablePath = parser.get("outputGraphTablePath");
logger.info("outputGraphTablePath: {}", outputGraphTablePath); logger.info("outputGraphTablePath: {}", outputGraphTablePath);
MergeAndGet.Strategy strategy = MergeAndGet.Strategy.valueOf(parser.get("mergeAndGetStrategy").toUpperCase()); MergeAndGet.Strategy strategy =
MergeAndGet.Strategy.valueOf(parser.get("mergeAndGetStrategy").toUpperCase());
logger.info("strategy: {}", strategy); logger.info("strategy: {}", strategy);
Class<? extends Oaf> rowClazz = (Class<? extends Oaf>) Class.forName(graphTableClassName); Class<? extends Oaf> rowClazz = (Class<? extends Oaf>) Class.forName(graphTableClassName);
Class<? extends Oaf> actionPayloadClazz = (Class<? extends Oaf>) Class.forName(actionPayloadClassName); Class<? extends Oaf> actionPayloadClazz =
(Class<? extends Oaf>) Class.forName(actionPayloadClassName);
throwIfGraphTableClassIsNotSubClassOfActionPayloadClass(rowClazz, actionPayloadClazz); throwIfGraphTableClassIsNotSubClassOfActionPayloadClass(rowClazz, actionPayloadClazz);
@ -73,10 +73,13 @@ public class PromoteActionPayloadForGraphTableJob {
conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer"); conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer");
conf.registerKryoClasses(ModelSupport.getOafModelClasses()); conf.registerKryoClasses(ModelSupport.getOafModelClasses());
runWithSparkSession(conf, isSparkSessionManaged, runWithSparkSession(
conf,
isSparkSessionManaged,
spark -> { spark -> {
removeOutputDir(spark, outputGraphTablePath); removeOutputDir(spark, outputGraphTablePath);
promoteActionPayloadForGraphTable(spark, promoteActionPayloadForGraphTable(
spark,
inputGraphTablePath, inputGraphTablePath,
inputActionPayloadPath, inputActionPayloadPath,
outputGraphTablePath, outputGraphTablePath,
@ -86,44 +89,50 @@ public class PromoteActionPayloadForGraphTableJob {
}); });
} }
private static void throwIfGraphTableClassIsNotSubClassOfActionPayloadClass(Class<? extends Oaf> rowClazz, private static void throwIfGraphTableClassIsNotSubClassOfActionPayloadClass(
Class<? extends Oaf> actionPayloadClazz) { Class<? extends Oaf> rowClazz, Class<? extends Oaf> actionPayloadClazz) {
if (!isSubClass(rowClazz, actionPayloadClazz)) { if (!isSubClass(rowClazz, actionPayloadClazz)) {
String msg = String.format("graph table class is not a subclass of action payload class: graph=%s, action=%s", String msg =
rowClazz.getCanonicalName(), actionPayloadClazz.getCanonicalName()); String.format(
"graph table class is not a subclass of action payload class: graph=%s, action=%s",
rowClazz.getCanonicalName(), actionPayloadClazz.getCanonicalName());
throw new RuntimeException(msg); throw new RuntimeException(msg);
} }
} }
private static void removeOutputDir(SparkSession spark, private static void removeOutputDir(SparkSession spark, String path) {
String path) {
HdfsSupport.remove(path, spark.sparkContext().hadoopConfiguration()); HdfsSupport.remove(path, spark.sparkContext().hadoopConfiguration());
} }
private static <G extends Oaf, A extends Oaf> void promoteActionPayloadForGraphTable(SparkSession spark, private static <G extends Oaf, A extends Oaf> void promoteActionPayloadForGraphTable(
String inputGraphTablePath, SparkSession spark,
String inputActionPayloadPath, String inputGraphTablePath,
String outputGraphTablePath, String inputActionPayloadPath,
MergeAndGet.Strategy strategy, String outputGraphTablePath,
Class<G> rowClazz, MergeAndGet.Strategy strategy,
Class<A> actionPayloadClazz) { Class<G> rowClazz,
Class<A> actionPayloadClazz) {
Dataset<G> rowDS = readGraphTable(spark, inputGraphTablePath, rowClazz); Dataset<G> rowDS = readGraphTable(spark, inputGraphTablePath, rowClazz);
Dataset<A> actionPayloadDS = readActionPayload(spark, inputActionPayloadPath, actionPayloadClazz); Dataset<A> actionPayloadDS =
readActionPayload(spark, inputActionPayloadPath, actionPayloadClazz);
Dataset<G> result = promoteActionPayloadForGraphTable(rowDS, actionPayloadDS, strategy, rowClazz, actionPayloadClazz) Dataset<G> result =
.map((MapFunction<G, G>) value -> value, Encoders.bean(rowClazz)); promoteActionPayloadForGraphTable(
rowDS, actionPayloadDS, strategy, rowClazz, actionPayloadClazz)
.map((MapFunction<G, G>) value -> value, Encoders.bean(rowClazz));
saveGraphTable(result, outputGraphTablePath); saveGraphTable(result, outputGraphTablePath);
} }
private static <G extends Oaf> Dataset<G> readGraphTable(SparkSession spark, private static <G extends Oaf> Dataset<G> readGraphTable(
String path, SparkSession spark, String path, Class<G> rowClazz) {
Class<G> rowClazz) {
logger.info("Reading graph table from path: {}", path); logger.info("Reading graph table from path: {}", path);
return spark.read() return spark.read()
.textFile(path) .textFile(path)
.map((MapFunction<String, G>) value -> OBJECT_MAPPER.readValue(value, rowClazz), Encoders.bean(rowClazz)); .map(
(MapFunction<String, G>) value -> OBJECT_MAPPER.readValue(value, rowClazz),
Encoders.bean(rowClazz));
/* /*
return spark return spark
@ -133,33 +142,44 @@ public class PromoteActionPayloadForGraphTableJob {
*/ */
} }
private static <A extends Oaf> Dataset<A> readActionPayload(SparkSession spark, private static <A extends Oaf> Dataset<A> readActionPayload(
String path, SparkSession spark, String path, Class<A> actionPayloadClazz) {
Class<A> actionPayloadClazz) {
logger.info("Reading action payload from path: {}", path); logger.info("Reading action payload from path: {}", path);
return spark return spark.read()
.read()
.parquet(path) .parquet(path)
.map((MapFunction<Row, A>) value -> OBJECT_MAPPER.readValue(value.<String>getAs("payload"), .map(
actionPayloadClazz), Encoders.bean(actionPayloadClazz)); (MapFunction<Row, A>)
value ->
OBJECT_MAPPER.readValue(
value.<String>getAs("payload"), actionPayloadClazz),
Encoders.bean(actionPayloadClazz));
} }
private static <G extends Oaf, A extends Oaf> Dataset<G> promoteActionPayloadForGraphTable(Dataset<G> rowDS, private static <G extends Oaf, A extends Oaf> Dataset<G> promoteActionPayloadForGraphTable(
Dataset<A> actionPayloadDS, Dataset<G> rowDS,
MergeAndGet.Strategy strategy, Dataset<A> actionPayloadDS,
Class<G> rowClazz, MergeAndGet.Strategy strategy,
Class<A> actionPayloadClazz) { Class<G> rowClazz,
logger.info("Promoting action payload for graph table: payload={}, table={}", actionPayloadClazz.getSimpleName(), rowClazz.getSimpleName()); Class<A> actionPayloadClazz) {
logger.info(
"Promoting action payload for graph table: payload={}, table={}",
actionPayloadClazz.getSimpleName(),
rowClazz.getSimpleName());
SerializableSupplier<Function<G, String>> rowIdFn = PromoteActionPayloadForGraphTableJob::idFn; SerializableSupplier<Function<G, String>> rowIdFn =
SerializableSupplier<Function<A, String>> actionPayloadIdFn = PromoteActionPayloadForGraphTableJob::idFn; PromoteActionPayloadForGraphTableJob::idFn;
SerializableSupplier<BiFunction<G, A, G>> mergeRowWithActionPayloadAndGetFn = MergeAndGet.functionFor(strategy); SerializableSupplier<Function<A, String>> actionPayloadIdFn =
SerializableSupplier<BiFunction<G, G, G>> mergeRowsAndGetFn = MergeAndGet.functionFor(strategy); PromoteActionPayloadForGraphTableJob::idFn;
SerializableSupplier<BiFunction<G, A, G>> mergeRowWithActionPayloadAndGetFn =
MergeAndGet.functionFor(strategy);
SerializableSupplier<BiFunction<G, G, G>> mergeRowsAndGetFn =
MergeAndGet.functionFor(strategy);
SerializableSupplier<G> zeroFn = zeroFn(rowClazz); SerializableSupplier<G> zeroFn = zeroFn(rowClazz);
SerializableSupplier<Function<G, Boolean>> isNotZeroFn = PromoteActionPayloadForGraphTableJob::isNotZeroFnUsingIdOrSource; SerializableSupplier<Function<G, Boolean>> isNotZeroFn =
PromoteActionPayloadForGraphTableJob::isNotZeroFnUsingIdOrSource;
Dataset<G> joinedAndMerged = PromoteActionPayloadFunctions Dataset<G> joinedAndMerged =
.joinGraphTableWithActionPayloadAndMerge( PromoteActionPayloadFunctions.joinGraphTableWithActionPayloadAndMerge(
rowDS, rowDS,
actionPayloadDS, actionPayloadDS,
rowIdFn, rowIdFn,
@ -168,14 +188,8 @@ public class PromoteActionPayloadForGraphTableJob {
rowClazz, rowClazz,
actionPayloadClazz); actionPayloadClazz);
return PromoteActionPayloadFunctions return PromoteActionPayloadFunctions.groupGraphTableByIdAndMerge(
.groupGraphTableByIdAndMerge( joinedAndMerged, rowIdFn, mergeRowsAndGetFn, zeroFn, isNotZeroFn, rowClazz);
joinedAndMerged,
rowIdFn,
mergeRowsAndGetFn,
zeroFn,
isNotZeroFn,
rowClazz);
} }
private static <T extends Oaf> Function<T, String> idFn() { private static <T extends Oaf> Function<T, String> idFn() {
@ -190,19 +204,49 @@ public class PromoteActionPayloadForGraphTableJob {
private static <T extends Oaf> String idFnForRelation(T t) { private static <T extends Oaf> String idFnForRelation(T t) {
Relation r = (Relation) t; Relation r = (Relation) t;
return Optional.ofNullable(r.getSource()) return Optional.ofNullable(r.getSource())
.map(source -> Optional.ofNullable(r.getTarget()) .map(
.map(target -> Optional.ofNullable(r.getRelType()) source ->
.map(relType -> Optional.ofNullable(r.getSubRelType()) Optional.ofNullable(r.getTarget())
.map(subRelType -> Optional.ofNullable(r.getRelClass()) .map(
.map(relClass -> String.join(source, target, relType, subRelType, relClass)) target ->
.orElse(String.join(source, target, relType, subRelType)) Optional.ofNullable(r.getRelType())
) .map(
.orElse(String.join(source, target, relType)) relType ->
) Optional.ofNullable(
.orElse(String.join(source, target)) r
) .getSubRelType())
.orElse(source) .map(
) subRelType ->
Optional
.ofNullable(
r
.getRelClass())
.map(
relClass ->
String
.join(
source,
target,
relType,
subRelType,
relClass))
.orElse(
String
.join(
source,
target,
relType,
subRelType)))
.orElse(
String
.join(
source,
target,
relType)))
.orElse(
String.join(
source, target)))
.orElse(source))
.orElse(null); .orElse(null);
} }
@ -242,13 +286,8 @@ public class PromoteActionPayloadForGraphTableJob {
}; };
} }
private static <G extends Oaf> void saveGraphTable(Dataset<G> result, private static <G extends Oaf> void saveGraphTable(Dataset<G> result, String path) {
String path) {
logger.info("Saving graph table to path: {}", path); logger.info("Saving graph table to path: {}", path);
result result.toJSON().write().option("compression", "gzip").text(path);
.toJSON()
.write()
.option("compression", "gzip")
.text(path);
} }
} }

View File

@ -1,7 +1,13 @@
package eu.dnetlib.dhp.actionmanager.promote; package eu.dnetlib.dhp.actionmanager.promote;
import static eu.dnetlib.dhp.schema.common.ModelSupport.isSubClass;
import eu.dnetlib.dhp.common.FunctionalInterfaceSupport.SerializableSupplier; import eu.dnetlib.dhp.common.FunctionalInterfaceSupport.SerializableSupplier;
import eu.dnetlib.dhp.schema.oaf.Oaf; import eu.dnetlib.dhp.schema.oaf.Oaf;
import java.util.Objects;
import java.util.Optional;
import java.util.function.BiFunction;
import java.util.function.Function;
import org.apache.spark.api.java.function.FilterFunction; import org.apache.spark.api.java.function.FilterFunction;
import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Dataset;
@ -11,95 +17,113 @@ import org.apache.spark.sql.TypedColumn;
import org.apache.spark.sql.expressions.Aggregator; import org.apache.spark.sql.expressions.Aggregator;
import scala.Tuple2; import scala.Tuple2;
import java.util.Objects; /** Promote action payload functions. */
import java.util.Optional;
import java.util.function.BiFunction;
import java.util.function.Function;
import static eu.dnetlib.dhp.schema.common.ModelSupport.isSubClass;
/**
* Promote action payload functions.
*/
public class PromoteActionPayloadFunctions { public class PromoteActionPayloadFunctions {
private PromoteActionPayloadFunctions() { private PromoteActionPayloadFunctions() {}
}
/** /**
* Joins dataset representing graph table with dataset representing action payload using supplied functions. * Joins dataset representing graph table with dataset representing action payload using
* supplied functions.
* *
* @param rowDS Dataset representing graph table * @param rowDS Dataset representing graph table
* @param actionPayloadDS Dataset representing action payload * @param actionPayloadDS Dataset representing action payload
* @param rowIdFn Function used to get the id of graph table row * @param rowIdFn Function used to get the id of graph table row
* @param actionPayloadIdFn Function used to get id of action payload instance * @param actionPayloadIdFn Function used to get id of action payload instance
* @param mergeAndGetFn Function used to merge graph table row and action payload instance * @param mergeAndGetFn Function used to merge graph table row and action payload instance
* @param rowClazz Class of graph table * @param rowClazz Class of graph table
* @param actionPayloadClazz Class of action payload * @param actionPayloadClazz Class of action payload
* @param <G> Type of graph table row * @param <G> Type of graph table row
* @param <A> Type of action payload instance * @param <A> Type of action payload instance
* @return Dataset of merged graph table rows and action payload instances * @return Dataset of merged graph table rows and action payload instances
*/ */
public static <G extends Oaf, A extends Oaf> Dataset<G> joinGraphTableWithActionPayloadAndMerge(Dataset<G> rowDS, public static <G extends Oaf, A extends Oaf> Dataset<G> joinGraphTableWithActionPayloadAndMerge(
Dataset<A> actionPayloadDS, Dataset<G> rowDS,
SerializableSupplier<Function<G, String>> rowIdFn, Dataset<A> actionPayloadDS,
SerializableSupplier<Function<A, String>> actionPayloadIdFn, SerializableSupplier<Function<G, String>> rowIdFn,
SerializableSupplier<BiFunction<G, A, G>> mergeAndGetFn, SerializableSupplier<Function<A, String>> actionPayloadIdFn,
Class<G> rowClazz, SerializableSupplier<BiFunction<G, A, G>> mergeAndGetFn,
Class<A> actionPayloadClazz) { Class<G> rowClazz,
Class<A> actionPayloadClazz) {
if (!isSubClass(rowClazz, actionPayloadClazz)) { if (!isSubClass(rowClazz, actionPayloadClazz)) {
throw new RuntimeException("action payload type must be the same or be a super type of table row type"); throw new RuntimeException(
"action payload type must be the same or be a super type of table row type");
} }
Dataset<Tuple2<String, G>> rowWithIdDS = mapToTupleWithId(rowDS, rowIdFn, rowClazz); Dataset<Tuple2<String, G>> rowWithIdDS = mapToTupleWithId(rowDS, rowIdFn, rowClazz);
Dataset<Tuple2<String, A>> actionPayloadWithIdDS = mapToTupleWithId(actionPayloadDS, actionPayloadIdFn, actionPayloadClazz); Dataset<Tuple2<String, A>> actionPayloadWithIdDS =
mapToTupleWithId(actionPayloadDS, actionPayloadIdFn, actionPayloadClazz);
return rowWithIdDS return rowWithIdDS
.joinWith(actionPayloadWithIdDS, rowWithIdDS.col("_1").equalTo(actionPayloadWithIdDS.col("_1")), "full_outer") .joinWith(
.map((MapFunction<Tuple2<Tuple2<String, G>, Tuple2<String, A>>, G>) value -> { actionPayloadWithIdDS,
Optional<G> rowOpt = Optional.ofNullable(value._1()).map(Tuple2::_2); rowWithIdDS.col("_1").equalTo(actionPayloadWithIdDS.col("_1")),
Optional<A> actionPayloadOpt = Optional.ofNullable(value._2()).map(Tuple2::_2); "full_outer")
return rowOpt .map(
.map(row -> actionPayloadOpt (MapFunction<Tuple2<Tuple2<String, G>, Tuple2<String, A>>, G>)
.map(actionPayload -> mergeAndGetFn.get().apply(row, actionPayload)) value -> {
.orElse(row)) Optional<G> rowOpt =
.orElseGet(() -> actionPayloadOpt Optional.ofNullable(value._1()).map(Tuple2::_2);
.filter(actionPayload -> actionPayload.getClass().equals(rowClazz)) Optional<A> actionPayloadOpt =
.map(rowClazz::cast) Optional.ofNullable(value._2()).map(Tuple2::_2);
.orElse(null)); return rowOpt.map(
}, Encoders.kryo(rowClazz)) row ->
actionPayloadOpt
.map(
actionPayload ->
mergeAndGetFn
.get()
.apply(
row,
actionPayload))
.orElse(row))
.orElseGet(
() ->
actionPayloadOpt
.filter(
actionPayload ->
actionPayload
.getClass()
.equals(
rowClazz))
.map(rowClazz::cast)
.orElse(null));
},
Encoders.kryo(rowClazz))
.filter((FilterFunction<G>) Objects::nonNull); .filter((FilterFunction<G>) Objects::nonNull);
} }
private static <T extends Oaf> Dataset<Tuple2<String, T>> mapToTupleWithId(Dataset<T> ds, private static <T extends Oaf> Dataset<Tuple2<String, T>> mapToTupleWithId(
SerializableSupplier<Function<T, String>> idFn, Dataset<T> ds, SerializableSupplier<Function<T, String>> idFn, Class<T> clazz) {
Class<T> clazz) { return ds.map(
return ds (MapFunction<T, Tuple2<String, T>>)
.map((MapFunction<T, Tuple2<String, T>>) value -> new Tuple2<>(idFn.get().apply(value), value), value -> new Tuple2<>(idFn.get().apply(value), value),
Encoders.tuple(Encoders.STRING(), Encoders.kryo(clazz))); Encoders.tuple(Encoders.STRING(), Encoders.kryo(clazz)));
} }
/** /**
* Groups graph table by id and aggregates using supplied functions. * Groups graph table by id and aggregates using supplied functions.
* *
* @param rowDS Dataset representing graph table * @param rowDS Dataset representing graph table
* @param rowIdFn Function used to get the id of graph table row * @param rowIdFn Function used to get the id of graph table row
* @param mergeAndGetFn Function used to merge graph table rows * @param mergeAndGetFn Function used to merge graph table rows
* @param zeroFn Function to create a zero/empty instance of graph table row * @param zeroFn Function to create a zero/empty instance of graph table row
* @param isNotZeroFn Function to check if graph table row is not zero/empty * @param isNotZeroFn Function to check if graph table row is not zero/empty
* @param rowClazz Class of graph table * @param rowClazz Class of graph table
* @param <G> Type of graph table row * @param <G> Type of graph table row
* @return Dataset of aggregated graph table rows * @return Dataset of aggregated graph table rows
*/ */
public static <G extends Oaf> Dataset<G> groupGraphTableByIdAndMerge(Dataset<G> rowDS, public static <G extends Oaf> Dataset<G> groupGraphTableByIdAndMerge(
SerializableSupplier<Function<G, String>> rowIdFn, Dataset<G> rowDS,
SerializableSupplier<BiFunction<G, G, G>> mergeAndGetFn, SerializableSupplier<Function<G, String>> rowIdFn,
SerializableSupplier<G> zeroFn, SerializableSupplier<BiFunction<G, G, G>> mergeAndGetFn,
SerializableSupplier<Function<G, Boolean>> isNotZeroFn, SerializableSupplier<G> zeroFn,
Class<G> rowClazz) { SerializableSupplier<Function<G, Boolean>> isNotZeroFn,
TypedColumn<G, G> aggregator = new TableAggregator<>(zeroFn, mergeAndGetFn, isNotZeroFn, rowClazz).toColumn(); Class<G> rowClazz) {
return rowDS TypedColumn<G, G> aggregator =
.groupByKey((MapFunction<G, String>) x -> rowIdFn.get().apply(x), Encoders.STRING()) new TableAggregator<>(zeroFn, mergeAndGetFn, isNotZeroFn, rowClazz).toColumn();
return rowDS.groupByKey(
(MapFunction<G, String>) x -> rowIdFn.get().apply(x), Encoders.STRING())
.agg(aggregator) .agg(aggregator)
.map((MapFunction<Tuple2<String, G>, G>) Tuple2::_2, Encoders.kryo(rowClazz)); .map((MapFunction<Tuple2<String, G>, G>) Tuple2::_2, Encoders.kryo(rowClazz));
} }
@ -115,10 +139,11 @@ public class PromoteActionPayloadFunctions {
private SerializableSupplier<Function<G, Boolean>> isNotZeroFn; private SerializableSupplier<Function<G, Boolean>> isNotZeroFn;
private Class<G> rowClazz; private Class<G> rowClazz;
public TableAggregator(SerializableSupplier<G> zeroFn, public TableAggregator(
SerializableSupplier<BiFunction<G, G, G>> mergeAndGetFn, SerializableSupplier<G> zeroFn,
SerializableSupplier<Function<G, Boolean>> isNotZeroFn, SerializableSupplier<BiFunction<G, G, G>> mergeAndGetFn,
Class<G> rowClazz) { SerializableSupplier<Function<G, Boolean>> isNotZeroFn,
Class<G> rowClazz) {
this.zeroFn = zeroFn; this.zeroFn = zeroFn;
this.mergeAndGetFn = mergeAndGetFn; this.mergeAndGetFn = mergeAndGetFn;
this.isNotZeroFn = isNotZeroFn; this.isNotZeroFn = isNotZeroFn;
@ -149,7 +174,8 @@ public class PromoteActionPayloadFunctions {
} else if (!isNotZero.apply(left) && isNotZero.apply(right)) { } else if (!isNotZero.apply(left) && isNotZero.apply(right)) {
return right; return right;
} }
throw new RuntimeException("internal aggregation error: left and right objects are zero"); throw new RuntimeException(
"internal aggregation error: left and right objects are zero");
} }
@Override @Override

View File

@ -1,10 +1,20 @@
package eu.dnetlib.dhp.actionmanager.partition; package eu.dnetlib.dhp.actionmanager.partition;
import static eu.dnetlib.dhp.common.ThrowingSupport.rethrowAsRuntimeException;
import static org.apache.spark.sql.functions.*;
import static org.junit.jupiter.api.Assertions.assertIterableEquals;
import static scala.collection.JavaConversions.mutableSeqAsJavaList;
import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.common.collect.Lists;
import eu.dnetlib.dhp.actionmanager.ISClient; import eu.dnetlib.dhp.actionmanager.ISClient;
import eu.dnetlib.dhp.actionmanager.promote.PromoteActionPayloadForGraphTableJobTest; import eu.dnetlib.dhp.actionmanager.promote.PromoteActionPayloadForGraphTableJobTest;
import eu.dnetlib.dhp.schema.oaf.*; import eu.dnetlib.dhp.schema.oaf.*;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.*;
import java.util.stream.Collectors;
import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.Text; import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Job;
@ -25,32 +35,23 @@ import org.mockito.junit.jupiter.MockitoExtension;
import scala.Tuple2; import scala.Tuple2;
import scala.collection.mutable.Seq; import scala.collection.mutable.Seq;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.*;
import java.util.stream.Collectors;
import static eu.dnetlib.dhp.common.ThrowingSupport.rethrowAsRuntimeException;
import static org.apache.spark.sql.functions.*;
import static org.junit.jupiter.api.Assertions.assertIterableEquals;
import static scala.collection.JavaConversions.mutableSeqAsJavaList;
@ExtendWith(MockitoExtension.class) @ExtendWith(MockitoExtension.class)
public class PartitionActionSetsByPayloadTypeJobTest { public class PartitionActionSetsByPayloadTypeJobTest {
private static final ClassLoader cl = PartitionActionSetsByPayloadTypeJobTest.class.getClassLoader(); private static final ClassLoader cl =
PartitionActionSetsByPayloadTypeJobTest.class.getClassLoader();
private static Configuration configuration; private static Configuration configuration;
private static SparkSession spark; private static SparkSession spark;
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
private static final StructType ATOMIC_ACTION_SCHEMA = StructType$.MODULE$.apply( private static final StructType ATOMIC_ACTION_SCHEMA =
Arrays.asList( StructType$.MODULE$.apply(
StructField$.MODULE$.apply("clazz", DataTypes.StringType, false, Metadata.empty()), Arrays.asList(
StructField$.MODULE$.apply("payload", DataTypes.StringType, false, Metadata.empty()) StructField$.MODULE$.apply(
)); "clazz", DataTypes.StringType, false, Metadata.empty()),
StructField$.MODULE$.apply(
"payload", DataTypes.StringType, false, Metadata.empty())));
@BeforeAll @BeforeAll
public static void beforeAll() throws IOException { public static void beforeAll() throws IOException {
@ -71,11 +72,11 @@ public class PartitionActionSetsByPayloadTypeJobTest {
@Nested @Nested
class Main { class Main {
@Mock @Mock private ISClient isClient;
private ISClient isClient;
@Test @Test
public void shouldPartitionActionSetsByPayloadType(@TempDir Path workingDir) throws Exception { public void shouldPartitionActionSetsByPayloadType(@TempDir Path workingDir)
throws Exception {
// given // given
Path inputActionSetsBaseDir = workingDir.resolve("input").resolve("action_sets"); Path inputActionSetsBaseDir = workingDir.resolve("input").resolve("action_sets");
Path outputDir = workingDir.resolve("output"); Path outputDir = workingDir.resolve("output");
@ -85,15 +86,16 @@ public class PartitionActionSetsByPayloadTypeJobTest {
List<String> inputActionSetsPaths = resolveInputActionSetPaths(inputActionSetsBaseDir); List<String> inputActionSetsPaths = resolveInputActionSetPaths(inputActionSetsBaseDir);
// when // when
Mockito.when(isClient.getLatestRawsetPaths(Mockito.anyString())).thenReturn(inputActionSetsPaths); Mockito.when(isClient.getLatestRawsetPaths(Mockito.anyString()))
.thenReturn(inputActionSetsPaths);
PartitionActionSetsByPayloadTypeJob job = new PartitionActionSetsByPayloadTypeJob(); PartitionActionSetsByPayloadTypeJob job = new PartitionActionSetsByPayloadTypeJob();
job.setIsClient(isClient); job.setIsClient(isClient);
job.run( job.run(
Boolean.FALSE, Boolean.FALSE,
"", // it can be empty we're mocking the response from isClient to resolve the paths "", // it can be empty we're mocking the response from isClient to resolve the
outputDir.toString() // paths
); outputDir.toString());
// then // then
Files.exists(outputDir); Files.exists(outputDir);
@ -110,112 +112,134 @@ public class PartitionActionSetsByPayloadTypeJobTest {
} }
} }
private List<String> resolveInputActionSetPaths(Path inputActionSetsBaseDir) throws IOException { private List<String> resolveInputActionSetPaths(Path inputActionSetsBaseDir)
throws IOException {
Path inputActionSetJsonDumpsDir = getInputActionSetJsonDumpsDir(); Path inputActionSetJsonDumpsDir = getInputActionSetJsonDumpsDir();
return Files return Files.list(inputActionSetJsonDumpsDir)
.list(inputActionSetJsonDumpsDir) .map(
.map(path -> { path -> {
String inputActionSetId = path.getFileName().toString(); String inputActionSetId = path.getFileName().toString();
return inputActionSetsBaseDir.resolve(inputActionSetId).toString(); return inputActionSetsBaseDir.resolve(inputActionSetId).toString();
}) })
.collect(Collectors.toCollection(ArrayList::new)); .collect(Collectors.toCollection(ArrayList::new));
} }
private static Map<String, List<String>> createActionSets(Path inputActionSetsDir) throws IOException { private static Map<String, List<String>> createActionSets(Path inputActionSetsDir)
throws IOException {
Path inputActionSetJsonDumpsDir = getInputActionSetJsonDumpsDir(); Path inputActionSetJsonDumpsDir = getInputActionSetJsonDumpsDir();
Map<String, List<String>> oafsByType = new HashMap<>(); Map<String, List<String>> oafsByType = new HashMap<>();
Files Files.list(inputActionSetJsonDumpsDir)
.list(inputActionSetJsonDumpsDir) .forEach(
.forEach(inputActionSetJsonDumpFile -> { inputActionSetJsonDumpFile -> {
String inputActionSetId = inputActionSetJsonDumpFile.getFileName().toString(); String inputActionSetId =
Path inputActionSetDir = inputActionSetsDir.resolve(inputActionSetId); inputActionSetJsonDumpFile.getFileName().toString();
Path inputActionSetDir = inputActionSetsDir.resolve(inputActionSetId);
Dataset<String> actionDS = readActionsFromJsonDump(inputActionSetJsonDumpFile.toString()) Dataset<String> actionDS =
.cache(); readActionsFromJsonDump(inputActionSetJsonDumpFile.toString())
.cache();
writeActionsAsJobInput(actionDS, inputActionSetId, inputActionSetDir.toString()); writeActionsAsJobInput(
actionDS, inputActionSetId, inputActionSetDir.toString());
Map<String, List<String>> actionSetOafsByType = actionDS Map<String, List<String>> actionSetOafsByType =
.withColumn("atomic_action", from_json(col("value"), ATOMIC_ACTION_SCHEMA)) actionDS
.select(expr("atomic_action.*")) .withColumn(
.groupBy(col("clazz")) "atomic_action",
.agg(collect_list(col("payload")).as("payload_list")) from_json(col("value"), ATOMIC_ACTION_SCHEMA))
.collectAsList() .select(expr("atomic_action.*")).groupBy(col("clazz"))
.stream() .agg(collect_list(col("payload")).as("payload_list"))
.map(row -> new AbstractMap.SimpleEntry<>(row.<String>getAs("clazz"), .collectAsList().stream()
mutableSeqAsJavaList(row.<Seq<String>>getAs("payload_list")))) .map(
.collect(Collectors.toMap(AbstractMap.SimpleEntry::getKey, AbstractMap.SimpleEntry::getValue)); row ->
new AbstractMap.SimpleEntry<>(
row.<String>getAs("clazz"),
mutableSeqAsJavaList(
row.<Seq<String>>getAs(
"payload_list"))))
.collect(
Collectors.toMap(
AbstractMap.SimpleEntry::getKey,
AbstractMap.SimpleEntry::getValue));
actionSetOafsByType.keySet() actionSetOafsByType
.forEach(x -> { .keySet()
if (oafsByType.containsKey(x)) { .forEach(
List<String> collected = new ArrayList<>(); x -> {
collected.addAll(oafsByType.get(x)); if (oafsByType.containsKey(x)) {
collected.addAll(actionSetOafsByType.get(x)); List<String> collected = new ArrayList<>();
oafsByType.put(x, collected); collected.addAll(oafsByType.get(x));
} else { collected.addAll(actionSetOafsByType.get(x));
oafsByType.put(x, actionSetOafsByType.get(x)); oafsByType.put(x, collected);
} } else {
}); oafsByType.put(x, actionSetOafsByType.get(x));
}); }
});
});
return oafsByType; return oafsByType;
} }
private static Path getInputActionSetJsonDumpsDir() { private static Path getInputActionSetJsonDumpsDir() {
return Paths return Paths.get(
.get(Objects.requireNonNull(cl.getResource("eu/dnetlib/dhp/actionmanager/partition/input/")) Objects.requireNonNull(
cl.getResource("eu/dnetlib/dhp/actionmanager/partition/input/"))
.getFile()); .getFile());
} }
private static Dataset<String> readActionsFromJsonDump(String path) { private static Dataset<String> readActionsFromJsonDump(String path) {
return spark return spark.read().textFile(path);
.read()
.textFile(path);
} }
private static void writeActionsAsJobInput(Dataset<String> actionDS, private static void writeActionsAsJobInput(
String inputActionSetId, Dataset<String> actionDS, String inputActionSetId, String path) {
String path) { actionDS.javaRDD()
actionDS
.javaRDD()
.mapToPair(json -> new Tuple2<>(new Text(inputActionSetId), new Text(json))) .mapToPair(json -> new Tuple2<>(new Text(inputActionSetId), new Text(json)))
.saveAsNewAPIHadoopFile(path, .saveAsNewAPIHadoopFile(
path,
Text.class, Text.class,
Text.class, Text.class,
SequenceFileOutputFormat.class, SequenceFileOutputFormat.class,
configuration); configuration);
} }
private static <T extends Oaf> void assertForOafType(Path outputDir, Map<String, List<String>> oafsByClassName, Class<T> clazz) { private static <T extends Oaf> void assertForOafType(
Path outputDatasetDir = outputDir.resolve(String.format("clazz=%s", clazz.getCanonicalName())); Path outputDir, Map<String, List<String>> oafsByClassName, Class<T> clazz) {
Path outputDatasetDir =
outputDir.resolve(String.format("clazz=%s", clazz.getCanonicalName()));
Files.exists(outputDatasetDir); Files.exists(outputDatasetDir);
List<T> actuals = readActionPayloadFromJobOutput(outputDatasetDir.toString(), clazz).collectAsList(); List<T> actuals =
readActionPayloadFromJobOutput(outputDatasetDir.toString(), clazz).collectAsList();
actuals.sort(Comparator.comparingInt(Object::hashCode)); actuals.sort(Comparator.comparingInt(Object::hashCode));
List<T> expecteds = oafsByClassName.get(clazz.getCanonicalName()).stream() List<T> expecteds =
.map(json -> mapToOaf(json, clazz)) oafsByClassName.get(clazz.getCanonicalName()).stream()
.sorted(Comparator.comparingInt(Object::hashCode)) .map(json -> mapToOaf(json, clazz))
.collect(Collectors.toList()); .sorted(Comparator.comparingInt(Object::hashCode))
.collect(Collectors.toList());
assertIterableEquals(expecteds, actuals); assertIterableEquals(expecteds, actuals);
} }
private static <T extends Oaf> Dataset<T> readActionPayloadFromJobOutput(String path, private static <T extends Oaf> Dataset<T> readActionPayloadFromJobOutput(
Class<T> clazz) { String path, Class<T> clazz) {
return spark return spark.read()
.read()
.parquet(path) .parquet(path)
.map((MapFunction<Row, T>) value -> OBJECT_MAPPER.readValue(value.<String>getAs("payload"), clazz), .map(
(MapFunction<Row, T>)
value ->
OBJECT_MAPPER.readValue(
value.<String>getAs("payload"), clazz),
Encoders.bean(clazz)); Encoders.bean(clazz));
} }
private static <T extends Oaf> T mapToOaf(String json, Class<T> clazz) { private static <T extends Oaf> T mapToOaf(String json, Class<T> clazz) {
return rethrowAsRuntimeException( return rethrowAsRuntimeException(
() -> OBJECT_MAPPER.readValue(json, clazz), () -> OBJECT_MAPPER.readValue(json, clazz),
String.format("failed to map json to class: json=%s, class=%s", json, clazz.getCanonicalName()) String.format(
); "failed to map json to class: json=%s, class=%s",
json, clazz.getCanonicalName()));
} }
} }

View File

@ -1,17 +1,16 @@
package eu.dnetlib.dhp.actionmanager.promote; package eu.dnetlib.dhp.actionmanager.promote;
import eu.dnetlib.dhp.common.FunctionalInterfaceSupport.SerializableSupplier;
import eu.dnetlib.dhp.schema.oaf.*;
import org.junit.jupiter.api.Nested;
import org.junit.jupiter.api.Test;
import java.util.function.BiFunction;
import static eu.dnetlib.dhp.actionmanager.promote.MergeAndGet.Strategy; import static eu.dnetlib.dhp.actionmanager.promote.MergeAndGet.Strategy;
import static eu.dnetlib.dhp.actionmanager.promote.MergeAndGet.functionFor; import static eu.dnetlib.dhp.actionmanager.promote.MergeAndGet.functionFor;
import static org.junit.jupiter.api.Assertions.*; import static org.junit.jupiter.api.Assertions.*;
import static org.mockito.Mockito.*; import static org.mockito.Mockito.*;
import eu.dnetlib.dhp.common.FunctionalInterfaceSupport.SerializableSupplier;
import eu.dnetlib.dhp.schema.oaf.*;
import java.util.function.BiFunction;
import org.junit.jupiter.api.Nested;
import org.junit.jupiter.api.Test;
public class MergeAndGetTest { public class MergeAndGetTest {
@Nested @Nested
@ -24,11 +23,11 @@ public class MergeAndGetTest {
Oaf b = mock(Oaf.class); Oaf b = mock(Oaf.class);
// when // when
SerializableSupplier<BiFunction<Oaf, Oaf, Oaf>> fn = functionFor(Strategy.MERGE_FROM_AND_GET); SerializableSupplier<BiFunction<Oaf, Oaf, Oaf>> fn =
functionFor(Strategy.MERGE_FROM_AND_GET);
// then // then
assertThrows(RuntimeException.class, () -> assertThrows(RuntimeException.class, () -> fn.get().apply(a, b));
fn.get().apply(a, b));
} }
@Test @Test
@ -38,11 +37,11 @@ public class MergeAndGetTest {
Relation b = mock(Relation.class); Relation b = mock(Relation.class);
// when // when
SerializableSupplier<BiFunction<Oaf, Oaf, Oaf>> fn = functionFor(Strategy.MERGE_FROM_AND_GET); SerializableSupplier<BiFunction<Oaf, Oaf, Oaf>> fn =
functionFor(Strategy.MERGE_FROM_AND_GET);
// then // then
assertThrows(RuntimeException.class, () -> assertThrows(RuntimeException.class, () -> fn.get().apply(a, b));
fn.get().apply(a, b));
} }
@Test @Test
@ -52,11 +51,11 @@ public class MergeAndGetTest {
OafEntity b = mock(OafEntity.class); OafEntity b = mock(OafEntity.class);
// when // when
SerializableSupplier<BiFunction<Oaf, Oaf, Oaf>> fn = functionFor(Strategy.MERGE_FROM_AND_GET); SerializableSupplier<BiFunction<Oaf, Oaf, Oaf>> fn =
functionFor(Strategy.MERGE_FROM_AND_GET);
// then // then
assertThrows(RuntimeException.class, () -> assertThrows(RuntimeException.class, () -> fn.get().apply(a, b));
fn.get().apply(a, b));
} }
@Test @Test
@ -66,11 +65,11 @@ public class MergeAndGetTest {
Oaf b = mock(Oaf.class); Oaf b = mock(Oaf.class);
// when // when
SerializableSupplier<BiFunction<Oaf, Oaf, Oaf>> fn = functionFor(Strategy.MERGE_FROM_AND_GET); SerializableSupplier<BiFunction<Oaf, Oaf, Oaf>> fn =
functionFor(Strategy.MERGE_FROM_AND_GET);
// then // then
assertThrows(RuntimeException.class, () -> assertThrows(RuntimeException.class, () -> fn.get().apply(a, b));
fn.get().apply(a, b));
} }
@Test @Test
@ -80,11 +79,11 @@ public class MergeAndGetTest {
OafEntity b = mock(OafEntity.class); OafEntity b = mock(OafEntity.class);
// when // when
SerializableSupplier<BiFunction<Oaf, Oaf, Oaf>> fn = functionFor(Strategy.MERGE_FROM_AND_GET); SerializableSupplier<BiFunction<Oaf, Oaf, Oaf>> fn =
functionFor(Strategy.MERGE_FROM_AND_GET);
// then // then
assertThrows(RuntimeException.class, () -> assertThrows(RuntimeException.class, () -> fn.get().apply(a, b));
fn.get().apply(a, b));
} }
@Test @Test
@ -94,7 +93,8 @@ public class MergeAndGetTest {
Relation b = mock(Relation.class); Relation b = mock(Relation.class);
// when // when
SerializableSupplier<BiFunction<Oaf, Oaf, Oaf>> fn = functionFor(Strategy.MERGE_FROM_AND_GET); SerializableSupplier<BiFunction<Oaf, Oaf, Oaf>> fn =
functionFor(Strategy.MERGE_FROM_AND_GET);
// then // then
Oaf x = fn.get().apply(a, b); Oaf x = fn.get().apply(a, b);
@ -110,11 +110,11 @@ public class MergeAndGetTest {
Oaf b = mock(Oaf.class); Oaf b = mock(Oaf.class);
// when // when
SerializableSupplier<BiFunction<Oaf, Oaf, Oaf>> fn = functionFor(Strategy.MERGE_FROM_AND_GET); SerializableSupplier<BiFunction<Oaf, Oaf, Oaf>> fn =
functionFor(Strategy.MERGE_FROM_AND_GET);
// then // then
assertThrows(RuntimeException.class, () -> assertThrows(RuntimeException.class, () -> fn.get().apply(a, b));
fn.get().apply(a, b));
} }
@Test @Test
@ -124,30 +124,28 @@ public class MergeAndGetTest {
Relation b = mock(Relation.class); Relation b = mock(Relation.class);
// when // when
SerializableSupplier<BiFunction<Oaf, Oaf, Oaf>> fn = functionFor(Strategy.MERGE_FROM_AND_GET); SerializableSupplier<BiFunction<Oaf, Oaf, Oaf>> fn =
functionFor(Strategy.MERGE_FROM_AND_GET);
// then // then
assertThrows(RuntimeException.class, () -> assertThrows(RuntimeException.class, () -> fn.get().apply(a, b));
fn.get().apply(a, b));
} }
@Test @Test
public void shouldThrowForOafEntityAndOafEntityButNotSubclasses() { public void shouldThrowForOafEntityAndOafEntityButNotSubclasses() {
// given // given
class OafEntitySub1 extends OafEntity { class OafEntitySub1 extends OafEntity {}
} class OafEntitySub2 extends OafEntity {}
class OafEntitySub2 extends OafEntity {
}
OafEntitySub1 a = mock(OafEntitySub1.class); OafEntitySub1 a = mock(OafEntitySub1.class);
OafEntitySub2 b = mock(OafEntitySub2.class); OafEntitySub2 b = mock(OafEntitySub2.class);
// when // when
SerializableSupplier<BiFunction<Oaf, Oaf, Oaf>> fn = functionFor(Strategy.MERGE_FROM_AND_GET); SerializableSupplier<BiFunction<Oaf, Oaf, Oaf>> fn =
functionFor(Strategy.MERGE_FROM_AND_GET);
// then // then
assertThrows(RuntimeException.class, () -> assertThrows(RuntimeException.class, () -> fn.get().apply(a, b));
fn.get().apply(a, b));
} }
@Test @Test
@ -157,7 +155,8 @@ public class MergeAndGetTest {
OafEntity b = mock(OafEntity.class); OafEntity b = mock(OafEntity.class);
// when // when
SerializableSupplier<BiFunction<Oaf, Oaf, Oaf>> fn = functionFor(Strategy.MERGE_FROM_AND_GET); SerializableSupplier<BiFunction<Oaf, Oaf, Oaf>> fn =
functionFor(Strategy.MERGE_FROM_AND_GET);
// then // then
Oaf x = fn.get().apply(a, b); Oaf x = fn.get().apply(a, b);
@ -177,11 +176,11 @@ public class MergeAndGetTest {
Relation b = mock(Relation.class); Relation b = mock(Relation.class);
// when // when
SerializableSupplier<BiFunction<Oaf, Oaf, Oaf>> fn = functionFor(Strategy.SELECT_NEWER_AND_GET); SerializableSupplier<BiFunction<Oaf, Oaf, Oaf>> fn =
functionFor(Strategy.SELECT_NEWER_AND_GET);
// then // then
assertThrows(RuntimeException.class, () -> assertThrows(RuntimeException.class, () -> fn.get().apply(a, b));
fn.get().apply(a, b));
} }
@Test @Test
@ -191,11 +190,11 @@ public class MergeAndGetTest {
OafEntity b = mock(OafEntity.class); OafEntity b = mock(OafEntity.class);
// when // when
SerializableSupplier<BiFunction<Oaf, Oaf, Oaf>> fn = functionFor(Strategy.SELECT_NEWER_AND_GET); SerializableSupplier<BiFunction<Oaf, Oaf, Oaf>> fn =
functionFor(Strategy.SELECT_NEWER_AND_GET);
// then // then
assertThrows(RuntimeException.class, () -> assertThrows(RuntimeException.class, () -> fn.get().apply(a, b));
fn.get().apply(a, b));
} }
@Test @Test
@ -205,28 +204,29 @@ public class MergeAndGetTest {
Result b = mock(Result.class); Result b = mock(Result.class);
// when // when
SerializableSupplier<BiFunction<Oaf, Oaf, Oaf>> fn = functionFor(Strategy.SELECT_NEWER_AND_GET); SerializableSupplier<BiFunction<Oaf, Oaf, Oaf>> fn =
functionFor(Strategy.SELECT_NEWER_AND_GET);
// then // then
assertThrows(RuntimeException.class, () -> assertThrows(RuntimeException.class, () -> fn.get().apply(a, b));
fn.get().apply(a, b));
} }
@Test @Test
public void shouldThrowWhenSuperTypeIsNewerForResultAndOafEntity() { public void shouldThrowWhenSuperTypeIsNewerForResultAndOafEntity() {
// given // given
// real types must be used because subclass-superclass resolution does not work for mocks // real types must be used because subclass-superclass resolution does not work for
// mocks
Dataset a = new Dataset(); Dataset a = new Dataset();
a.setLastupdatetimestamp(1L); a.setLastupdatetimestamp(1L);
Result b = new Result(); Result b = new Result();
b.setLastupdatetimestamp(2L); b.setLastupdatetimestamp(2L);
// when // when
SerializableSupplier<BiFunction<Oaf, Oaf, Oaf>> fn = functionFor(Strategy.SELECT_NEWER_AND_GET); SerializableSupplier<BiFunction<Oaf, Oaf, Oaf>> fn =
functionFor(Strategy.SELECT_NEWER_AND_GET);
// then // then
assertThrows(RuntimeException.class, () -> assertThrows(RuntimeException.class, () -> fn.get().apply(a, b));
fn.get().apply(a, b));
} }
@Test @Test

View File

@ -1,8 +1,20 @@
package eu.dnetlib.dhp.actionmanager.promote; package eu.dnetlib.dhp.actionmanager.promote;
import static org.junit.jupiter.api.Assertions.*;
import static org.junit.jupiter.params.provider.Arguments.arguments;
import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.schema.common.ModelSupport; import eu.dnetlib.dhp.schema.common.ModelSupport;
import eu.dnetlib.dhp.schema.oaf.*; import eu.dnetlib.dhp.schema.oaf.*;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.Comparator;
import java.util.List;
import java.util.Objects;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.apache.commons.io.FileUtils; import org.apache.commons.io.FileUtils;
import org.apache.spark.SparkConf; import org.apache.spark.SparkConf;
import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.api.java.function.MapFunction;
@ -14,21 +26,9 @@ import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.Arguments; import org.junit.jupiter.params.provider.Arguments;
import org.junit.jupiter.params.provider.MethodSource; import org.junit.jupiter.params.provider.MethodSource;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.Comparator;
import java.util.List;
import java.util.Objects;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import static org.junit.jupiter.api.Assertions.*;
import static org.junit.jupiter.params.provider.Arguments.arguments;
public class PromoteActionPayloadForGraphTableJobTest { public class PromoteActionPayloadForGraphTableJobTest {
private static final ClassLoader cl = PromoteActionPayloadForGraphTableJobTest.class.getClassLoader(); private static final ClassLoader cl =
PromoteActionPayloadForGraphTableJobTest.class.getClassLoader();
private static SparkSession spark; private static SparkSession spark;
@ -52,7 +52,9 @@ public class PromoteActionPayloadForGraphTableJobTest {
@BeforeEach @BeforeEach
public void beforeEach() throws IOException { public void beforeEach() throws IOException {
workingDir = Files.createTempDirectory(PromoteActionPayloadForGraphTableJobTest.class.getSimpleName()); workingDir =
Files.createTempDirectory(
PromoteActionPayloadForGraphTableJobTest.class.getSimpleName());
inputDir = workingDir.resolve("input"); inputDir = workingDir.resolve("input");
inputGraphRootDir = inputDir.resolve("graph"); inputGraphRootDir = inputDir.resolve("graph");
inputActionPayloadRootDir = inputDir.resolve("action_payload"); inputActionPayloadRootDir = inputDir.resolve("action_payload");
@ -80,87 +82,130 @@ public class PromoteActionPayloadForGraphTableJobTest {
Class<OafEntity> actionPayloadClazz = OafEntity.class; Class<OafEntity> actionPayloadClazz = OafEntity.class;
// when // when
RuntimeException exception = assertThrows(RuntimeException.class, () -> RuntimeException exception =
PromoteActionPayloadForGraphTableJob.main(new String[]{ assertThrows(
"-isSparkSessionManaged", Boolean.FALSE.toString(), RuntimeException.class,
"-inputGraphTablePath", "", () ->
"-graphTableClassName", rowClazz.getCanonicalName(), PromoteActionPayloadForGraphTableJob.main(
"-inputActionPayloadPath", "", new String[] {
"-actionPayloadClassName", actionPayloadClazz.getCanonicalName(), "-isSparkSessionManaged", Boolean.FALSE.toString(),
"-outputGraphTablePath", "", "-inputGraphTablePath", "",
"-mergeAndGetStrategy", MergeAndGet.Strategy.SELECT_NEWER_AND_GET.name() "-graphTableClassName", rowClazz.getCanonicalName(),
})); "-inputActionPayloadPath", "",
"-actionPayloadClassName",
actionPayloadClazz.getCanonicalName(),
"-outputGraphTablePath", "",
"-mergeAndGetStrategy",
MergeAndGet.Strategy.SELECT_NEWER_AND_GET
.name()
}));
// then // then
String msg = String.format("graph table class is not a subclass of action payload class: graph=%s, action=%s", String msg =
rowClazz.getCanonicalName(), actionPayloadClazz.getCanonicalName()); String.format(
"graph table class is not a subclass of action payload class: graph=%s, action=%s",
rowClazz.getCanonicalName(), actionPayloadClazz.getCanonicalName());
assertTrue(exception.getMessage().contains(msg)); assertTrue(exception.getMessage().contains(msg));
} }
@ParameterizedTest(name = "strategy: {0}, graph table: {1}, action payload: {2}") @ParameterizedTest(name = "strategy: {0}, graph table: {1}, action payload: {2}")
@MethodSource("eu.dnetlib.dhp.actionmanager.promote.PromoteActionPayloadForGraphTableJobTest#promoteJobTestParams") @MethodSource(
public void shouldPromoteActionPayloadForGraphTable(MergeAndGet.Strategy strategy, "eu.dnetlib.dhp.actionmanager.promote.PromoteActionPayloadForGraphTableJobTest#promoteJobTestParams")
Class<? extends Oaf> rowClazz, public void shouldPromoteActionPayloadForGraphTable(
Class<? extends Oaf> actionPayloadClazz) throws Exception { MergeAndGet.Strategy strategy,
Class<? extends Oaf> rowClazz,
Class<? extends Oaf> actionPayloadClazz)
throws Exception {
// given // given
Path inputGraphTableDir = createGraphTable(inputGraphRootDir, rowClazz); Path inputGraphTableDir = createGraphTable(inputGraphRootDir, rowClazz);
Path inputActionPayloadDir = createActionPayload(inputActionPayloadRootDir, rowClazz, actionPayloadClazz); Path inputActionPayloadDir =
Path outputGraphTableDir = outputDir.resolve("graph").resolve(rowClazz.getSimpleName().toLowerCase()); createActionPayload(inputActionPayloadRootDir, rowClazz, actionPayloadClazz);
Path outputGraphTableDir =
outputDir.resolve("graph").resolve(rowClazz.getSimpleName().toLowerCase());
// when // when
PromoteActionPayloadForGraphTableJob.main(new String[]{ PromoteActionPayloadForGraphTableJob.main(
"-isSparkSessionManaged", Boolean.FALSE.toString(), new String[] {
"-inputGraphTablePath", inputGraphTableDir.toString(), "-isSparkSessionManaged", Boolean.FALSE.toString(),
"-graphTableClassName", rowClazz.getCanonicalName(), "-inputGraphTablePath", inputGraphTableDir.toString(),
"-inputActionPayloadPath", inputActionPayloadDir.toString(), "-graphTableClassName", rowClazz.getCanonicalName(),
"-actionPayloadClassName", actionPayloadClazz.getCanonicalName(), "-inputActionPayloadPath", inputActionPayloadDir.toString(),
"-outputGraphTablePath", outputGraphTableDir.toString(), "-actionPayloadClassName", actionPayloadClazz.getCanonicalName(),
"-mergeAndGetStrategy", strategy.name() "-outputGraphTablePath", outputGraphTableDir.toString(),
}); "-mergeAndGetStrategy", strategy.name()
});
// then // then
assertTrue(Files.exists(outputGraphTableDir)); assertTrue(Files.exists(outputGraphTableDir));
List<? extends Oaf> actualOutputRows = readGraphTableFromJobOutput(outputGraphTableDir.toString(), rowClazz) List<? extends Oaf> actualOutputRows =
.collectAsList() readGraphTableFromJobOutput(outputGraphTableDir.toString(), rowClazz)
.stream() .collectAsList().stream()
.sorted(Comparator.comparingInt(Object::hashCode)) .sorted(Comparator.comparingInt(Object::hashCode))
.collect(Collectors.toList()); .collect(Collectors.toList());
String expectedOutputGraphTableJsonDumpPath = resultFileLocation(strategy, rowClazz, actionPayloadClazz); String expectedOutputGraphTableJsonDumpPath =
Path expectedOutputGraphTableJsonDumpFile = Paths resultFileLocation(strategy, rowClazz, actionPayloadClazz);
.get(Objects.requireNonNull(cl.getResource(expectedOutputGraphTableJsonDumpPath)).getFile()); Path expectedOutputGraphTableJsonDumpFile =
List<? extends Oaf> expectedOutputRows = readGraphTableFromJsonDump(expectedOutputGraphTableJsonDumpFile.toString(), rowClazz) Paths.get(
.collectAsList() Objects.requireNonNull(
.stream() cl.getResource(expectedOutputGraphTableJsonDumpPath))
.sorted(Comparator.comparingInt(Object::hashCode)) .getFile());
.collect(Collectors.toList()); List<? extends Oaf> expectedOutputRows =
readGraphTableFromJsonDump(
expectedOutputGraphTableJsonDumpFile.toString(), rowClazz)
.collectAsList().stream()
.sorted(Comparator.comparingInt(Object::hashCode))
.collect(Collectors.toList());
assertIterableEquals(expectedOutputRows, actualOutputRows); assertIterableEquals(expectedOutputRows, actualOutputRows);
} }
} }
public static Stream<Arguments> promoteJobTestParams() { public static Stream<Arguments> promoteJobTestParams() {
return Stream.of( return Stream.of(
arguments(MergeAndGet.Strategy.MERGE_FROM_AND_GET, eu.dnetlib.dhp.schema.oaf.Dataset.class, eu.dnetlib.dhp.schema.oaf.Dataset.class), arguments(
arguments(MergeAndGet.Strategy.MERGE_FROM_AND_GET, eu.dnetlib.dhp.schema.oaf.Dataset.class, eu.dnetlib.dhp.schema.oaf.Result.class), MergeAndGet.Strategy.MERGE_FROM_AND_GET,
arguments(MergeAndGet.Strategy.MERGE_FROM_AND_GET, Datasource.class, Datasource.class), eu.dnetlib.dhp.schema.oaf.Dataset.class,
arguments(MergeAndGet.Strategy.MERGE_FROM_AND_GET, Organization.class, Organization.class), eu.dnetlib.dhp.schema.oaf.Dataset.class),
arguments(MergeAndGet.Strategy.MERGE_FROM_AND_GET, OtherResearchProduct.class, OtherResearchProduct.class), arguments(
arguments(MergeAndGet.Strategy.MERGE_FROM_AND_GET, OtherResearchProduct.class, Result.class), MergeAndGet.Strategy.MERGE_FROM_AND_GET,
eu.dnetlib.dhp.schema.oaf.Dataset.class,
eu.dnetlib.dhp.schema.oaf.Result.class),
arguments(
MergeAndGet.Strategy.MERGE_FROM_AND_GET,
Datasource.class,
Datasource.class),
arguments(
MergeAndGet.Strategy.MERGE_FROM_AND_GET,
Organization.class,
Organization.class),
arguments(
MergeAndGet.Strategy.MERGE_FROM_AND_GET,
OtherResearchProduct.class,
OtherResearchProduct.class),
arguments(
MergeAndGet.Strategy.MERGE_FROM_AND_GET,
OtherResearchProduct.class,
Result.class),
arguments(MergeAndGet.Strategy.MERGE_FROM_AND_GET, Project.class, Project.class), arguments(MergeAndGet.Strategy.MERGE_FROM_AND_GET, Project.class, Project.class),
arguments(MergeAndGet.Strategy.MERGE_FROM_AND_GET, Publication.class, Publication.class), arguments(
MergeAndGet.Strategy.MERGE_FROM_AND_GET,
Publication.class,
Publication.class),
arguments(MergeAndGet.Strategy.MERGE_FROM_AND_GET, Publication.class, Result.class), arguments(MergeAndGet.Strategy.MERGE_FROM_AND_GET, Publication.class, Result.class),
arguments(MergeAndGet.Strategy.MERGE_FROM_AND_GET, Relation.class, Relation.class), arguments(MergeAndGet.Strategy.MERGE_FROM_AND_GET, Relation.class, Relation.class),
arguments(MergeAndGet.Strategy.MERGE_FROM_AND_GET, Software.class, Software.class), arguments(MergeAndGet.Strategy.MERGE_FROM_AND_GET, Software.class, Software.class),
arguments(MergeAndGet.Strategy.MERGE_FROM_AND_GET, Software.class, Result.class) arguments(MergeAndGet.Strategy.MERGE_FROM_AND_GET, Software.class, Result.class));
);
} }
private static <G extends Oaf> Path createGraphTable(Path inputGraphRootDir, private static <G extends Oaf> Path createGraphTable(
Class<G> rowClazz) { Path inputGraphRootDir, Class<G> rowClazz) {
String inputGraphTableJsonDumpPath = inputGraphTableJsonDumpLocation(rowClazz); String inputGraphTableJsonDumpPath = inputGraphTableJsonDumpLocation(rowClazz);
Path inputGraphTableJsonDumpFile = Paths Path inputGraphTableJsonDumpFile =
.get(Objects.requireNonNull(cl.getResource(inputGraphTableJsonDumpPath)).getFile()); Paths.get(
Dataset<G> rowDS = readGraphTableFromJsonDump(inputGraphTableJsonDumpFile.toString(), rowClazz); Objects.requireNonNull(cl.getResource(inputGraphTableJsonDumpPath))
.getFile());
Dataset<G> rowDS =
readGraphTableFromJsonDump(inputGraphTableJsonDumpFile.toString(), rowClazz);
String inputGraphTableName = rowClazz.getSimpleName().toLowerCase(); String inputGraphTableName = rowClazz.getSimpleName().toLowerCase();
Path inputGraphTableDir = inputGraphRootDir.resolve(inputGraphTableName); Path inputGraphTableDir = inputGraphRootDir.resolve(inputGraphTableName);
writeGraphTableAaJobInput(rowDS, inputGraphTableDir.toString()); writeGraphTableAaJobInput(rowDS, inputGraphTableDir.toString());
@ -169,71 +214,74 @@ public class PromoteActionPayloadForGraphTableJobTest {
private static String inputGraphTableJsonDumpLocation(Class<? extends Oaf> rowClazz) { private static String inputGraphTableJsonDumpLocation(Class<? extends Oaf> rowClazz) {
return String.format( return String.format(
"%s/%s.json", "eu/dnetlib/dhp/actionmanager/promote/input/graph", rowClazz.getSimpleName().toLowerCase()); "%s/%s.json",
"eu/dnetlib/dhp/actionmanager/promote/input/graph",
rowClazz.getSimpleName().toLowerCase());
} }
private static <G extends Oaf> Dataset<G> readGraphTableFromJsonDump(String path, private static <G extends Oaf> Dataset<G> readGraphTableFromJsonDump(
Class<G> rowClazz) { String path, Class<G> rowClazz) {
return spark return spark.read()
.read()
.textFile(path) .textFile(path)
.map((MapFunction<String, G>) json -> OBJECT_MAPPER.readValue(json, rowClazz), Encoders.bean(rowClazz)); .map(
(MapFunction<String, G>) json -> OBJECT_MAPPER.readValue(json, rowClazz),
Encoders.bean(rowClazz));
} }
private static <G extends Oaf> void writeGraphTableAaJobInput(Dataset<G> rowDS, private static <G extends Oaf> void writeGraphTableAaJobInput(Dataset<G> rowDS, String path) {
String path) { rowDS.write().option("compression", "gzip").json(path);
rowDS
.write()
.option("compression", "gzip")
.json(path);
} }
private static <G extends Oaf, A extends Oaf> Path createActionPayload(Path inputActionPayloadRootDir, private static <G extends Oaf, A extends Oaf> Path createActionPayload(
Class<G> rowClazz, Path inputActionPayloadRootDir, Class<G> rowClazz, Class<A> actionPayloadClazz) {
Class<A> actionPayloadClazz) { String inputActionPayloadJsonDumpPath =
String inputActionPayloadJsonDumpPath = inputActionPayloadJsonDumpLocation(rowClazz, actionPayloadClazz); inputActionPayloadJsonDumpLocation(rowClazz, actionPayloadClazz);
Path inputActionPayloadJsonDumpFile = Paths Path inputActionPayloadJsonDumpFile =
.get(Objects.requireNonNull(cl.getResource(inputActionPayloadJsonDumpPath)).getFile()); Paths.get(
Dataset<String> actionPayloadDS = readActionPayloadFromJsonDump(inputActionPayloadJsonDumpFile.toString()); Objects.requireNonNull(cl.getResource(inputActionPayloadJsonDumpPath))
Path inputActionPayloadDir = inputActionPayloadRootDir.resolve(actionPayloadClazz.getSimpleName().toLowerCase()); .getFile());
Dataset<String> actionPayloadDS =
readActionPayloadFromJsonDump(inputActionPayloadJsonDumpFile.toString());
Path inputActionPayloadDir =
inputActionPayloadRootDir.resolve(actionPayloadClazz.getSimpleName().toLowerCase());
writeActionPayloadAsJobInput(actionPayloadDS, inputActionPayloadDir.toString()); writeActionPayloadAsJobInput(actionPayloadDS, inputActionPayloadDir.toString());
return inputActionPayloadDir; return inputActionPayloadDir;
} }
private static String inputActionPayloadJsonDumpLocation(Class<? extends Oaf> rowClazz, private static String inputActionPayloadJsonDumpLocation(
Class<? extends Oaf> actionPayloadClazz) { Class<? extends Oaf> rowClazz, Class<? extends Oaf> actionPayloadClazz) {
return String.format("eu/dnetlib/dhp/actionmanager/promote/input/action_payload/%s_table/%s.json", return String.format(
rowClazz.getSimpleName().toLowerCase(), actionPayloadClazz.getSimpleName().toLowerCase()); "eu/dnetlib/dhp/actionmanager/promote/input/action_payload/%s_table/%s.json",
rowClazz.getSimpleName().toLowerCase(),
actionPayloadClazz.getSimpleName().toLowerCase());
} }
private static Dataset<String> readActionPayloadFromJsonDump(String path) { private static Dataset<String> readActionPayloadFromJsonDump(String path) {
return spark return spark.read().textFile(path);
.read()
.textFile(path);
} }
private static void writeActionPayloadAsJobInput(Dataset<String> actionPayloadDS, private static void writeActionPayloadAsJobInput(Dataset<String> actionPayloadDS, String path) {
String path) { actionPayloadDS.withColumnRenamed("value", "payload").write().parquet(path);
actionPayloadDS
.withColumnRenamed("value", "payload")
.write()
.parquet(path);
} }
private static <G extends Oaf> Dataset<G> readGraphTableFromJobOutput(String path, private static <G extends Oaf> Dataset<G> readGraphTableFromJobOutput(
Class<G> rowClazz) { String path, Class<G> rowClazz) {
return spark return spark.read()
.read()
.textFile(path) .textFile(path)
.map((MapFunction<String, G>) json -> OBJECT_MAPPER.readValue(json, rowClazz), Encoders.bean(rowClazz)); .map(
(MapFunction<String, G>) json -> OBJECT_MAPPER.readValue(json, rowClazz),
Encoders.bean(rowClazz));
} }
private static String resultFileLocation(MergeAndGet.Strategy strategy, private static String resultFileLocation(
Class<? extends Oaf> rowClazz, MergeAndGet.Strategy strategy,
Class<? extends Oaf> actionPayloadClazz) { Class<? extends Oaf> rowClazz,
return String Class<? extends Oaf> actionPayloadClazz) {
.format("eu/dnetlib/dhp/actionmanager/promote/output/graph/%s/%s/%s_action_payload/result.json", return String.format(
strategy.name().toLowerCase(), rowClazz.getSimpleName().toLowerCase(), actionPayloadClazz.getSimpleName().toLowerCase()); "eu/dnetlib/dhp/actionmanager/promote/output/graph/%s/%s/%s_action_payload/result.json",
strategy.name().toLowerCase(),
rowClazz.getSimpleName().toLowerCase(),
actionPayloadClazz.getSimpleName().toLowerCase());
} }
} }

View File

@ -1,7 +1,15 @@
package eu.dnetlib.dhp.actionmanager.promote; package eu.dnetlib.dhp.actionmanager.promote;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertThrows;
import eu.dnetlib.dhp.common.FunctionalInterfaceSupport.SerializableSupplier; import eu.dnetlib.dhp.common.FunctionalInterfaceSupport.SerializableSupplier;
import eu.dnetlib.dhp.schema.oaf.Oaf; import eu.dnetlib.dhp.schema.oaf.Oaf;
import java.util.Arrays;
import java.util.List;
import java.util.Objects;
import java.util.function.BiFunction;
import java.util.function.Function;
import org.apache.spark.SparkConf; import org.apache.spark.SparkConf;
import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Encoders; import org.apache.spark.sql.Encoders;
@ -11,15 +19,6 @@ import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Nested; import org.junit.jupiter.api.Nested;
import org.junit.jupiter.api.Test; import org.junit.jupiter.api.Test;
import java.util.Arrays;
import java.util.List;
import java.util.Objects;
import java.util.function.BiFunction;
import java.util.function.Function;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertThrows;
public class PromoteActionPayloadFunctionsTest { public class PromoteActionPayloadFunctionsTest {
private static SparkSession spark; private static SparkSession spark;
@ -44,20 +43,20 @@ public class PromoteActionPayloadFunctionsTest {
@Test @Test
public void shouldThrowWhenTableTypeIsNotSubtypeOfActionPayloadType() { public void shouldThrowWhenTableTypeIsNotSubtypeOfActionPayloadType() {
// given // given
class OafImpl extends Oaf { class OafImpl extends Oaf {}
}
// when // when
assertThrows(RuntimeException.class, () -> assertThrows(
PromoteActionPayloadFunctions RuntimeException.class,
.joinGraphTableWithActionPayloadAndMerge(null, () ->
PromoteActionPayloadFunctions.joinGraphTableWithActionPayloadAndMerge(
null,
null, null,
null, null,
null, null,
null, null,
OafImplSubSub.class, OafImplSubSub.class,
OafImpl.class OafImpl.class));
));
} }
@Test @Test
@ -68,40 +67,53 @@ public class PromoteActionPayloadFunctionsTest {
String id2 = "id2"; String id2 = "id2";
String id3 = "id3"; String id3 = "id3";
String id4 = "id4"; String id4 = "id4";
List<OafImplSubSub> rowData = Arrays.asList( List<OafImplSubSub> rowData =
createOafImplSubSub(id0), Arrays.asList(
createOafImplSubSub(id1), createOafImplSubSub(id0),
createOafImplSubSub(id2), createOafImplSubSub(id1),
createOafImplSubSub(id3) createOafImplSubSub(id2),
); createOafImplSubSub(id3));
Dataset<OafImplSubSub> rowDS = spark.createDataset(rowData, Encoders.bean(OafImplSubSub.class)); Dataset<OafImplSubSub> rowDS =
spark.createDataset(rowData, Encoders.bean(OafImplSubSub.class));
List<OafImplSubSub> actionPayloadData = Arrays.asList( List<OafImplSubSub> actionPayloadData =
createOafImplSubSub(id1), Arrays.asList(
createOafImplSubSub(id2), createOafImplSubSub(id2), createOafImplSubSub(id1),
createOafImplSubSub(id3), createOafImplSubSub(id3), createOafImplSubSub(id3), createOafImplSubSub(id2),
createOafImplSubSub(id4), createOafImplSubSub(id4), createOafImplSubSub(id4), createOafImplSubSub(id4) createOafImplSubSub(id2),
); createOafImplSubSub(id3),
Dataset<OafImplSubSub> actionPayloadDS = spark.createDataset(actionPayloadData, Encoders.bean(OafImplSubSub.class)); createOafImplSubSub(id3),
createOafImplSubSub(id3),
createOafImplSubSub(id4),
createOafImplSubSub(id4),
createOafImplSubSub(id4),
createOafImplSubSub(id4));
Dataset<OafImplSubSub> actionPayloadDS =
spark.createDataset(actionPayloadData, Encoders.bean(OafImplSubSub.class));
SerializableSupplier<Function<OafImplSubSub, String>> rowIdFn = () -> OafImplRoot::getId; SerializableSupplier<Function<OafImplSubSub, String>> rowIdFn =
SerializableSupplier<Function<OafImplSubSub, String>> actionPayloadIdFn = () -> OafImplRoot::getId; () -> OafImplRoot::getId;
SerializableSupplier<BiFunction<OafImplSubSub, OafImplSubSub, OafImplSubSub>> mergeAndGetFn = () -> (x, y) -> { SerializableSupplier<Function<OafImplSubSub, String>> actionPayloadIdFn =
x.merge(y); () -> OafImplRoot::getId;
return x; SerializableSupplier<BiFunction<OafImplSubSub, OafImplSubSub, OafImplSubSub>>
}; mergeAndGetFn =
() ->
(x, y) -> {
x.merge(y);
return x;
};
// when // when
List<OafImplSubSub> results = PromoteActionPayloadFunctions List<OafImplSubSub> results =
.joinGraphTableWithActionPayloadAndMerge(rowDS, PromoteActionPayloadFunctions.joinGraphTableWithActionPayloadAndMerge(
actionPayloadDS, rowDS,
rowIdFn, actionPayloadDS,
actionPayloadIdFn, rowIdFn,
mergeAndGetFn, actionPayloadIdFn,
OafImplSubSub.class, mergeAndGetFn,
OafImplSubSub.class OafImplSubSub.class,
) OafImplSubSub.class)
.collectAsList(); .collectAsList();
// then // then
assertEquals(11, results.size()); assertEquals(11, results.size());
@ -111,23 +123,24 @@ public class PromoteActionPayloadFunctionsTest {
assertEquals(3, results.stream().filter(x -> x.getId().equals(id3)).count()); assertEquals(3, results.stream().filter(x -> x.getId().equals(id3)).count());
assertEquals(4, results.stream().filter(x -> x.getId().equals(id4)).count()); assertEquals(4, results.stream().filter(x -> x.getId().equals(id4)).count());
results.forEach(result -> { results.forEach(
switch (result.getId()) { result -> {
case "id0": switch (result.getId()) {
assertEquals(1, result.getMerged()); case "id0":
break; assertEquals(1, result.getMerged());
case "id1": break;
case "id2": case "id1":
case "id3": case "id2":
assertEquals(2, result.getMerged()); case "id3":
break; assertEquals(2, result.getMerged());
case "id4": break;
assertEquals(1, result.getMerged()); case "id4":
break; assertEquals(1, result.getMerged());
default: break;
throw new RuntimeException(); default:
} throw new RuntimeException();
}); }
});
} }
@Test @Test
@ -138,40 +151,53 @@ public class PromoteActionPayloadFunctionsTest {
String id2 = "id2"; String id2 = "id2";
String id3 = "id3"; String id3 = "id3";
String id4 = "id4"; String id4 = "id4";
List<OafImplSubSub> rowData = Arrays.asList( List<OafImplSubSub> rowData =
createOafImplSubSub(id0), Arrays.asList(
createOafImplSubSub(id1), createOafImplSubSub(id0),
createOafImplSubSub(id2), createOafImplSubSub(id1),
createOafImplSubSub(id3) createOafImplSubSub(id2),
); createOafImplSubSub(id3));
Dataset<OafImplSubSub> rowDS = spark.createDataset(rowData, Encoders.bean(OafImplSubSub.class)); Dataset<OafImplSubSub> rowDS =
spark.createDataset(rowData, Encoders.bean(OafImplSubSub.class));
List<OafImplSub> actionPayloadData = Arrays.asList( List<OafImplSub> actionPayloadData =
createOafImplSub(id1), Arrays.asList(
createOafImplSub(id2), createOafImplSub(id2), createOafImplSub(id1),
createOafImplSub(id3), createOafImplSub(id3), createOafImplSub(id3), createOafImplSub(id2),
createOafImplSub(id4), createOafImplSub(id4), createOafImplSub(id4), createOafImplSub(id4) createOafImplSub(id2),
); createOafImplSub(id3),
Dataset<OafImplSub> actionPayloadDS = spark.createDataset(actionPayloadData, Encoders.bean(OafImplSub.class)); createOafImplSub(id3),
createOafImplSub(id3),
createOafImplSub(id4),
createOafImplSub(id4),
createOafImplSub(id4),
createOafImplSub(id4));
Dataset<OafImplSub> actionPayloadDS =
spark.createDataset(actionPayloadData, Encoders.bean(OafImplSub.class));
SerializableSupplier<Function<OafImplSubSub, String>> rowIdFn = () -> OafImplRoot::getId; SerializableSupplier<Function<OafImplSubSub, String>> rowIdFn =
SerializableSupplier<Function<OafImplSub, String>> actionPayloadIdFn = () -> OafImplRoot::getId; () -> OafImplRoot::getId;
SerializableSupplier<BiFunction<OafImplSubSub, OafImplSub, OafImplSubSub>> mergeAndGetFn = () -> (x, y) -> { SerializableSupplier<Function<OafImplSub, String>> actionPayloadIdFn =
x.merge(y); () -> OafImplRoot::getId;
return x; SerializableSupplier<BiFunction<OafImplSubSub, OafImplSub, OafImplSubSub>>
}; mergeAndGetFn =
() ->
(x, y) -> {
x.merge(y);
return x;
};
// when // when
List<OafImplSubSub> results = PromoteActionPayloadFunctions List<OafImplSubSub> results =
.joinGraphTableWithActionPayloadAndMerge(rowDS, PromoteActionPayloadFunctions.joinGraphTableWithActionPayloadAndMerge(
actionPayloadDS, rowDS,
rowIdFn, actionPayloadDS,
actionPayloadIdFn, rowIdFn,
mergeAndGetFn, actionPayloadIdFn,
OafImplSubSub.class, mergeAndGetFn,
OafImplSub.class OafImplSubSub.class,
) OafImplSub.class)
.collectAsList(); .collectAsList();
// then // then
assertEquals(7, results.size()); assertEquals(7, results.size());
@ -181,22 +207,22 @@ public class PromoteActionPayloadFunctionsTest {
assertEquals(3, results.stream().filter(x -> x.getId().equals(id3)).count()); assertEquals(3, results.stream().filter(x -> x.getId().equals(id3)).count());
assertEquals(0, results.stream().filter(x -> x.getId().equals(id4)).count()); assertEquals(0, results.stream().filter(x -> x.getId().equals(id4)).count());
results.forEach(result -> { results.forEach(
switch (result.getId()) { result -> {
case "id0": switch (result.getId()) {
assertEquals(1, result.getMerged()); case "id0":
break; assertEquals(1, result.getMerged());
case "id1": break;
case "id2": case "id1":
case "id3": case "id2":
assertEquals(2, result.getMerged()); case "id3":
break; assertEquals(2, result.getMerged());
default: break;
throw new RuntimeException(); default:
} throw new RuntimeException();
}); }
});
} }
} }
@Nested @Nested
@ -208,30 +234,40 @@ public class PromoteActionPayloadFunctionsTest {
String id1 = "id1"; String id1 = "id1";
String id2 = "id2"; String id2 = "id2";
String id3 = "id3"; String id3 = "id3";
List<OafImplSubSub> rowData = Arrays.asList( List<OafImplSubSub> rowData =
createOafImplSubSub(id1), Arrays.asList(
createOafImplSubSub(id2), createOafImplSubSub(id2), createOafImplSubSub(id1),
createOafImplSubSub(id3), createOafImplSubSub(id3), createOafImplSubSub(id3) createOafImplSubSub(id2),
); createOafImplSubSub(id2),
Dataset<OafImplSubSub> rowDS = spark.createDataset(rowData, Encoders.bean(OafImplSubSub.class)); createOafImplSubSub(id3),
createOafImplSubSub(id3),
createOafImplSubSub(id3));
Dataset<OafImplSubSub> rowDS =
spark.createDataset(rowData, Encoders.bean(OafImplSubSub.class));
SerializableSupplier<Function<OafImplSubSub, String>> rowIdFn = () -> OafImplRoot::getId; SerializableSupplier<Function<OafImplSubSub, String>> rowIdFn =
SerializableSupplier<BiFunction<OafImplSubSub, OafImplSubSub, OafImplSubSub>> mergeAndGetFn = () -> (x, y) -> { () -> OafImplRoot::getId;
x.merge(y); SerializableSupplier<BiFunction<OafImplSubSub, OafImplSubSub, OafImplSubSub>>
return x; mergeAndGetFn =
}; () ->
(x, y) -> {
x.merge(y);
return x;
};
SerializableSupplier<OafImplSubSub> zeroFn = OafImplSubSub::new; SerializableSupplier<OafImplSubSub> zeroFn = OafImplSubSub::new;
SerializableSupplier<Function<OafImplSubSub, Boolean>> isNotZeroFn = () -> x -> Objects.nonNull(x.getId()); SerializableSupplier<Function<OafImplSubSub, Boolean>> isNotZeroFn =
() -> x -> Objects.nonNull(x.getId());
// when // when
List<OafImplSubSub> results = PromoteActionPayloadFunctions List<OafImplSubSub> results =
.groupGraphTableByIdAndMerge(rowDS, PromoteActionPayloadFunctions.groupGraphTableByIdAndMerge(
rowIdFn, rowDS,
mergeAndGetFn, rowIdFn,
zeroFn, mergeAndGetFn,
isNotZeroFn, zeroFn,
OafImplSubSub.class) isNotZeroFn,
.collectAsList(); OafImplSubSub.class)
.collectAsList();
// then // then
assertEquals(3, results.size()); assertEquals(3, results.size());
@ -239,23 +275,23 @@ public class PromoteActionPayloadFunctionsTest {
assertEquals(1, results.stream().filter(x -> x.getId().equals(id2)).count()); assertEquals(1, results.stream().filter(x -> x.getId().equals(id2)).count());
assertEquals(1, results.stream().filter(x -> x.getId().equals(id3)).count()); assertEquals(1, results.stream().filter(x -> x.getId().equals(id3)).count());
results.forEach(result -> { results.forEach(
switch (result.getId()) { result -> {
case "id1": switch (result.getId()) {
assertEquals(1, result.getMerged()); case "id1":
break; assertEquals(1, result.getMerged());
case "id2": break;
assertEquals(2, result.getMerged()); case "id2":
break; assertEquals(2, result.getMerged());
case "id3": break;
assertEquals(3, result.getMerged()); case "id3":
break; assertEquals(3, result.getMerged());
default: break;
throw new RuntimeException(); default:
} throw new RuntimeException();
}); }
});
} }
} }
public static class OafImplRoot extends Oaf { public static class OafImplRoot extends Oaf {
@ -310,5 +346,4 @@ public class PromoteActionPayloadFunctionsTest {
x.setId(id); x.setId(id);
return x; return x;
} }
} }

View File

@ -1,10 +1,10 @@
{"collectedFrom":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":0,"relClass":"provides","relType":"datasourceOrganization","source":"10|CRIS_UNS____::f66f1bd369679b5b077dcdf006089556","subRelType":"provision","target":"20|openaire____::d0bbea1f5bed5864d1904eb602e608a6"} {"collectedfrom":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":0,"relClass":"provides","relType":"datasourceOrganization","source":"10|CRIS_UNS____::f66f1bd369679b5b077dcdf006089556","subRelType":"provision","target":"20|openaire____::d0bbea1f5bed5864d1904eb602e608a6"}
{"collectedFrom":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":0,"relClass":"provides","relType":"datasourceOrganization","source":"10|OpenstarTs__::f66f1bd369679b5b077dcdf006089556","subRelType":"provision","target":"20|openaire____::fc7459b8fed8c0d47947fe04275251c0"} {"collectedfrom":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":0,"relClass":"provides","relType":"datasourceOrganization","source":"10|OpenstarTs__::f66f1bd369679b5b077dcdf006089556","subRelType":"provision","target":"20|openaire____::fc7459b8fed8c0d47947fe04275251c0"}
{"collectedFrom":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":0,"relClass":"provides","relType":"datasourceOrganization","source":"10|NARCIS__cris::f66f1bd369679b5b077dcdf006089556","subRelType":"provision","target":"20|openaire____::c978e29d3b2ddf4f0c2b6e60d6613426"} {"collectedfrom":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":0,"relClass":"provides","relType":"datasourceOrganization","source":"10|NARCIS__cris::f66f1bd369679b5b077dcdf006089556","subRelType":"provision","target":"20|openaire____::c978e29d3b2ddf4f0c2b6e60d6613426"}
{"collectedFrom":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":0,"relClass":"provides","relType":"datasourceOrganization","source":"10|MetisRadboud::f66f1bd369679b5b077dcdf006089556","subRelType":"provision","target":"20|openaire____::b58bdbe8ae5acead04fc76777d2f8017"} {"collectedfrom":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":0,"relClass":"provides","relType":"datasourceOrganization","source":"10|MetisRadboud::f66f1bd369679b5b077dcdf006089556","subRelType":"provision","target":"20|openaire____::b58bdbe8ae5acead04fc76777d2f8017"}
{"collectedFrom":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":true,"invisible":false,"provenanceaction":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":0,"relClass":"provides","relType":"datasourceOrganization","source":"10|MetisRadboud::f66f1bd369679b5b077dcdf006089556","subRelType":"provision","target":"20|dedup_wf_001::8de0f5a712997aafe0d794a53e51b75a"} {"collectedfrom":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":true,"invisible":false,"provenanceaction":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":0,"relClass":"provides","relType":"datasourceOrganization","source":"10|MetisRadboud::f66f1bd369679b5b077dcdf006089556","subRelType":"provision","target":"20|dedup_wf_001::8de0f5a712997aafe0d794a53e51b75a"}
{"collectedFrom":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":0,"relClass":"provides","relType":"datasourceOrganization","source":"10|UnityFVG____::f66f1bd369679b5b077dcdf006089556","subRelType":"provision","target":"20|openaire____::89bab7c5a227fc27b2b9cadf475a6b71"} {"collectedfrom":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":0,"relClass":"provides","relType":"datasourceOrganization","source":"10|UnityFVG____::f66f1bd369679b5b077dcdf006089556","subRelType":"provision","target":"20|openaire____::89bab7c5a227fc27b2b9cadf475a6b71"}
{"collectedFrom":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":0,"relClass":"provides","relType":"datasourceOrganization","source":"10|4ScienceCRIS::f66f1bd369679b5b077dcdf006089556","subRelType":"provision","target":"20|openaire____::007a4870b31056f89b768cf508e1538e"} {"collectedfrom":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":0,"relClass":"provides","relType":"datasourceOrganization","source":"10|4ScienceCRIS::f66f1bd369679b5b077dcdf006089556","subRelType":"provision","target":"20|openaire____::007a4870b31056f89b768cf508e1538e"}
{"collectedFrom":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":0,"relClass":"provides","relType":"datasourceOrganization","source":"10|VTTRsInSsCrs::f66f1bd369679b5b077dcdf006089556","subRelType":"provision","target":"20|openaire____::735915884eb439d42953372eaf934782"} {"collectedfrom":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":0,"relClass":"provides","relType":"datasourceOrganization","source":"10|VTTRsInSsCrs::f66f1bd369679b5b077dcdf006089556","subRelType":"provision","target":"20|openaire____::735915884eb439d42953372eaf934782"}
{"collectedFrom":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":true,"invisible":false,"provenanceaction":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":0,"relClass":"provides","relType":"datasourceOrganization","source":"10|CRIS_UNS____::f66f1bd369679b5b077dcdf006089556","subRelType":"provision","target":"20|dedup_wf_001::9ea9c0996c87e1dc7fc69f94b5ed0010"} {"collectedfrom":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":true,"invisible":false,"provenanceaction":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":0,"relClass":"provides","relType":"datasourceOrganization","source":"10|CRIS_UNS____::f66f1bd369679b5b077dcdf006089556","subRelType":"provision","target":"20|dedup_wf_001::9ea9c0996c87e1dc7fc69f94b5ed0010"}
{"collectedFrom":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":0,"relClass":"provides","relType":"datasourceOrganization","source":"10|CSC_________::a2b9ce8435390bcbfc05f3cae3948747","subRelType":"provision","target":"20|openaire____::c24a458004a31f9687089ea3d249de51"} {"collectedfrom":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":0,"relClass":"provides","relType":"datasourceOrganization","source":"10|CSC_________::a2b9ce8435390bcbfc05f3cae3948747","subRelType":"provision","target":"20|openaire____::c24a458004a31f9687089ea3d249de51"}

View File

@ -7,6 +7,11 @@ import eu.dnetlib.dhp.model.mdstore.Provenance;
import eu.dnetlib.message.Message; import eu.dnetlib.message.Message;
import eu.dnetlib.message.MessageManager; import eu.dnetlib.message.MessageManager;
import eu.dnetlib.message.MessageType; import eu.dnetlib.message.MessageType;
import java.io.ByteArrayInputStream;
import java.nio.charset.StandardCharsets;
import java.util.HashMap;
import java.util.Map;
import java.util.Objects;
import org.apache.commons.cli.*; import org.apache.commons.cli.*;
import org.apache.commons.io.IOUtils; import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
@ -24,94 +29,135 @@ import org.dom4j.Document;
import org.dom4j.Node; import org.dom4j.Node;
import org.dom4j.io.SAXReader; import org.dom4j.io.SAXReader;
import java.io.ByteArrayInputStream;
import java.nio.charset.StandardCharsets;
import java.util.HashMap;
import java.util.Map;
import java.util.Objects;
public class GenerateNativeStoreSparkJob { public class GenerateNativeStoreSparkJob {
public static MetadataRecord parseRecord(
final String input,
final String xpath,
final String encoding,
final Provenance provenance,
final Long dateOfCollection,
final LongAccumulator totalItems,
final LongAccumulator invalidRecords) {
public static MetadataRecord parseRecord (final String input, final String xpath, final String encoding, final Provenance provenance, final Long dateOfCollection, final LongAccumulator totalItems, final LongAccumulator invalidRecords) { if (totalItems != null) totalItems.add(1);
if(totalItems != null)
totalItems.add(1);
try { try {
SAXReader reader = new SAXReader(); SAXReader reader = new SAXReader();
Document document = reader.read(new ByteArrayInputStream(input.getBytes(StandardCharsets.UTF_8))); Document document =
reader.read(new ByteArrayInputStream(input.getBytes(StandardCharsets.UTF_8)));
Node node = document.selectSingleNode(xpath); Node node = document.selectSingleNode(xpath);
final String originalIdentifier = node.getText(); final String originalIdentifier = node.getText();
if (StringUtils.isBlank(originalIdentifier)) { if (StringUtils.isBlank(originalIdentifier)) {
if (invalidRecords!= null) if (invalidRecords != null) invalidRecords.add(1);
invalidRecords.add(1);
return null; return null;
} }
return new MetadataRecord(originalIdentifier, encoding, provenance, input, dateOfCollection); return new MetadataRecord(
originalIdentifier, encoding, provenance, input, dateOfCollection);
} catch (Throwable e) { } catch (Throwable e) {
if (invalidRecords!= null) if (invalidRecords != null) invalidRecords.add(1);
invalidRecords.add(1);
e.printStackTrace(); e.printStackTrace();
return null; return null;
} }
} }
public static void main(String[] args) throws Exception { public static void main(String[] args) throws Exception {
final ArgumentApplicationParser parser = new ArgumentApplicationParser(IOUtils.toString(GenerateNativeStoreSparkJob.class.getResourceAsStream("/eu/dnetlib/dhp/collection/collection_input_parameters.json"))); final ArgumentApplicationParser parser =
new ArgumentApplicationParser(
IOUtils.toString(
GenerateNativeStoreSparkJob.class.getResourceAsStream(
"/eu/dnetlib/dhp/collection/collection_input_parameters.json")));
parser.parseArgument(args); parser.parseArgument(args);
final ObjectMapper jsonMapper = new ObjectMapper(); final ObjectMapper jsonMapper = new ObjectMapper();
final Provenance provenance = jsonMapper.readValue(parser.get("provenance"), Provenance.class); final Provenance provenance =
final long dateOfCollection = new Long(parser.get("dateOfCollection")); jsonMapper.readValue(parser.get("provenance"), Provenance.class);
final long dateOfCollection = new Long(parser.get("dateOfCollection"));
final SparkSession spark = SparkSession final SparkSession spark =
.builder() SparkSession.builder()
.appName("GenerateNativeStoreSparkJob") .appName("GenerateNativeStoreSparkJob")
.master(parser.get("master")) .master(parser.get("master"))
.getOrCreate(); .getOrCreate();
final Map<String, String> ongoingMap = new HashMap<>(); final Map<String, String> ongoingMap = new HashMap<>();
final Map<String, String> reportMap = new HashMap<>(); final Map<String, String> reportMap = new HashMap<>();
final boolean test = parser.get("isTest") == null ? false : Boolean.valueOf(parser.get("isTest")); final boolean test =
parser.get("isTest") == null ? false : Boolean.valueOf(parser.get("isTest"));
final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext()); final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext());
final JavaPairRDD<IntWritable, Text> inputRDD = sc.sequenceFile(parser.get("input"), IntWritable.class, Text.class); final JavaPairRDD<IntWritable, Text> inputRDD =
sc.sequenceFile(parser.get("input"), IntWritable.class, Text.class);
final LongAccumulator totalItems = sc.sc().longAccumulator("TotalItems"); final LongAccumulator totalItems = sc.sc().longAccumulator("TotalItems");
final LongAccumulator invalidRecords = sc.sc().longAccumulator("InvalidRecords"); final LongAccumulator invalidRecords = sc.sc().longAccumulator("InvalidRecords");
final MessageManager manager = new MessageManager(parser.get("rabbitHost"), parser.get("rabbitUser"), parser.get("rabbitPassword"), false, false, null); final MessageManager manager =
new MessageManager(
parser.get("rabbitHost"),
parser.get("rabbitUser"),
parser.get("rabbitPassword"),
false,
false,
null);
final JavaRDD<MetadataRecord> mappeRDD = inputRDD.map(item -> parseRecord(item._2().toString(), parser.get("xpath"), parser.get("encoding"), provenance, dateOfCollection, totalItems, invalidRecords)) final JavaRDD<MetadataRecord> mappeRDD =
.filter(Objects::nonNull).distinct(); inputRDD.map(
item ->
parseRecord(
item._2().toString(),
parser.get("xpath"),
parser.get("encoding"),
provenance,
dateOfCollection,
totalItems,
invalidRecords))
.filter(Objects::nonNull)
.distinct();
ongoingMap.put("ongoing", "0"); ongoingMap.put("ongoing", "0");
if (!test) { if (!test) {
manager.sendMessage(new Message(parser.get("workflowId"),"DataFrameCreation", MessageType.ONGOING, ongoingMap ), parser.get("rabbitOngoingQueue"), true, false); manager.sendMessage(
new Message(
parser.get("workflowId"),
"DataFrameCreation",
MessageType.ONGOING,
ongoingMap),
parser.get("rabbitOngoingQueue"),
true,
false);
} }
final Encoder<MetadataRecord> encoder = Encoders.bean(MetadataRecord.class); final Encoder<MetadataRecord> encoder = Encoders.bean(MetadataRecord.class);
final Dataset<MetadataRecord> mdstore = spark.createDataset(mappeRDD.rdd(), encoder); final Dataset<MetadataRecord> mdstore = spark.createDataset(mappeRDD.rdd(), encoder);
final LongAccumulator mdStoreRecords = sc.sc().longAccumulator("MDStoreRecords"); final LongAccumulator mdStoreRecords = sc.sc().longAccumulator("MDStoreRecords");
mdStoreRecords.add(mdstore.count()); mdStoreRecords.add(mdstore.count());
ongoingMap.put("ongoing", ""+ totalItems.value()); ongoingMap.put("ongoing", "" + totalItems.value());
if (!test) { if (!test) {
manager.sendMessage(new Message(parser.get("workflowId"), "DataFrameCreation", MessageType.ONGOING, ongoingMap), parser.get("rabbitOngoingQueue"), true, false); manager.sendMessage(
new Message(
parser.get("workflowId"),
"DataFrameCreation",
MessageType.ONGOING,
ongoingMap),
parser.get("rabbitOngoingQueue"),
true,
false);
} }
mdstore.write().format("parquet").save(parser.get("output")); mdstore.write().format("parquet").save(parser.get("output"));
reportMap.put("inputItem" , ""+ totalItems.value()); reportMap.put("inputItem", "" + totalItems.value());
reportMap.put("invalidRecords", "" + invalidRecords.value()); reportMap.put("invalidRecords", "" + invalidRecords.value());
reportMap.put("mdStoreSize", "" + mdStoreRecords.value()); reportMap.put("mdStoreSize", "" + mdStoreRecords.value());
if (!test) { if (!test) {
manager.sendMessage(new Message(parser.get("workflowId"), "Collection", MessageType.REPORT, reportMap), parser.get("rabbitReportQueue"), true, false); manager.sendMessage(
new Message(
parser.get("workflowId"), "Collection", MessageType.REPORT, reportMap),
parser.get("rabbitReportQueue"),
true,
false);
manager.close(); manager.close();
} }
} }
} }

View File

@ -2,10 +2,9 @@ package eu.dnetlib.dhp.collection.plugin;
import eu.dnetlib.collector.worker.model.ApiDescriptor; import eu.dnetlib.collector.worker.model.ApiDescriptor;
import eu.dnetlib.dhp.collection.worker.DnetCollectorException; import eu.dnetlib.dhp.collection.worker.DnetCollectorException;
import java.util.stream.Stream; import java.util.stream.Stream;
public interface CollectorPlugin { public interface CollectorPlugin {
Stream<String> collect(ApiDescriptor api) throws DnetCollectorException; Stream<String> collect(ApiDescriptor api) throws DnetCollectorException;
} }

View File

@ -1,5 +1,11 @@
package eu.dnetlib.dhp.collection.plugin.oai; package eu.dnetlib.dhp.collection.plugin.oai;
import com.google.common.base.Splitter;
import com.google.common.collect.Iterators;
import com.google.common.collect.Lists;
import eu.dnetlib.collector.worker.model.ApiDescriptor;
import eu.dnetlib.dhp.collection.plugin.CollectorPlugin;
import eu.dnetlib.dhp.collection.worker.DnetCollectorException;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Iterator; import java.util.Iterator;
import java.util.List; import java.util.List;
@ -7,62 +13,70 @@ import java.util.Spliterator;
import java.util.Spliterators; import java.util.Spliterators;
import java.util.stream.Stream; import java.util.stream.Stream;
import java.util.stream.StreamSupport; import java.util.stream.StreamSupport;
import com.google.common.base.Splitter;
import com.google.common.collect.Iterators;
import com.google.common.collect.Lists;
import eu.dnetlib.collector.worker.model.ApiDescriptor;
import eu.dnetlib.dhp.collection.plugin.CollectorPlugin;
import eu.dnetlib.dhp.collection.worker.DnetCollectorException;
public class OaiCollectorPlugin implements CollectorPlugin { public class OaiCollectorPlugin implements CollectorPlugin {
private static final String FORMAT_PARAM = "format"; private static final String FORMAT_PARAM = "format";
private static final String OAI_SET_PARAM = "set"; private static final String OAI_SET_PARAM = "set";
private static final Object OAI_FROM_DATE_PARAM = "fromDate"; private static final Object OAI_FROM_DATE_PARAM = "fromDate";
private static final Object OAI_UNTIL_DATE_PARAM = "untilDate"; private static final Object OAI_UNTIL_DATE_PARAM = "untilDate";
private OaiIteratorFactory oaiIteratorFactory;
private OaiIteratorFactory oaiIteratorFactory; @Override
public Stream<String> collect(final ApiDescriptor api) throws DnetCollectorException {
final String baseUrl = api.getBaseUrl();
final String mdFormat = api.getParams().get(FORMAT_PARAM);
final String setParam = api.getParams().get(OAI_SET_PARAM);
final String fromDate = api.getParams().get(OAI_FROM_DATE_PARAM);
final String untilDate = api.getParams().get(OAI_UNTIL_DATE_PARAM);
@Override final List<String> sets = new ArrayList<>();
public Stream<String> collect(final ApiDescriptor api) throws DnetCollectorException { if (setParam != null) {
final String baseUrl = api.getBaseUrl(); sets.addAll(
final String mdFormat = api.getParams().get(FORMAT_PARAM); Lists.newArrayList(
final String setParam = api.getParams().get(OAI_SET_PARAM); Splitter.on(",").omitEmptyStrings().trimResults().split(setParam)));
final String fromDate = api.getParams().get(OAI_FROM_DATE_PARAM); }
final String untilDate = api.getParams().get(OAI_UNTIL_DATE_PARAM); if (sets.isEmpty()) {
// If no set is defined, ALL the sets must be harvested
sets.add("");
}
final List<String> sets = new ArrayList<>(); if (baseUrl == null || baseUrl.isEmpty()) {
if (setParam != null) { throw new DnetCollectorException("Param 'baseurl' is null or empty");
sets.addAll(Lists.newArrayList(Splitter.on(",").omitEmptyStrings().trimResults().split(setParam))); }
}
if (sets.isEmpty()) {
// If no set is defined, ALL the sets must be harvested
sets.add("");
}
if (baseUrl == null || baseUrl.isEmpty()) { throw new DnetCollectorException("Param 'baseurl' is null or empty"); } if (mdFormat == null || mdFormat.isEmpty()) {
throw new DnetCollectorException("Param 'mdFormat' is null or empty");
}
if (mdFormat == null || mdFormat.isEmpty()) { throw new DnetCollectorException("Param 'mdFormat' is null or empty"); } if (fromDate != null && !fromDate.matches("\\d{4}-\\d{2}-\\d{2}")) {
throw new DnetCollectorException("Invalid date (YYYY-MM-DD): " + fromDate);
}
if (fromDate != null && !fromDate.matches("\\d{4}-\\d{2}-\\d{2}")) { throw new DnetCollectorException("Invalid date (YYYY-MM-DD): " + fromDate); } if (untilDate != null && !untilDate.matches("\\d{4}-\\d{2}-\\d{2}")) {
throw new DnetCollectorException("Invalid date (YYYY-MM-DD): " + untilDate);
}
if (untilDate != null && !untilDate.matches("\\d{4}-\\d{2}-\\d{2}")) { throw new DnetCollectorException("Invalid date (YYYY-MM-DD): " + untilDate); } final Iterator<Iterator<String>> iters =
sets.stream()
.map(
set ->
getOaiIteratorFactory()
.newIterator(
baseUrl, mdFormat, set, fromDate,
untilDate))
.iterator();
final Iterator<Iterator<String>> iters = sets.stream() return StreamSupport.stream(
.map(set -> getOaiIteratorFactory().newIterator(baseUrl, mdFormat, set, fromDate, untilDate)) Spliterators.spliteratorUnknownSize(Iterators.concat(iters), Spliterator.ORDERED),
.iterator(); false);
}
return StreamSupport.stream(Spliterators.spliteratorUnknownSize(Iterators.concat(iters), Spliterator.ORDERED), false); public OaiIteratorFactory getOaiIteratorFactory() {
} if (oaiIteratorFactory == null) {
oaiIteratorFactory = new OaiIteratorFactory();
public OaiIteratorFactory getOaiIteratorFactory() { }
if (oaiIteratorFactory == null){ return oaiIteratorFactory;
oaiIteratorFactory = new OaiIteratorFactory(); }
}
return oaiIteratorFactory;
}
} }

View File

@ -1,15 +1,14 @@
package eu.dnetlib.dhp.collection.plugin.oai; package eu.dnetlib.dhp.collection.plugin.oai;
import eu.dnetlib.dhp.collection.worker.DnetCollectorException;
import eu.dnetlib.dhp.collection.worker.utils.HttpConnector;
import eu.dnetlib.dhp.collection.worker.utils.XmlCleaner;
import java.io.StringReader; import java.io.StringReader;
import java.io.UnsupportedEncodingException; import java.io.UnsupportedEncodingException;
import java.net.URLEncoder; import java.net.URLEncoder;
import java.util.Iterator; import java.util.Iterator;
import java.util.Queue; import java.util.Queue;
import java.util.concurrent.PriorityBlockingQueue; import java.util.concurrent.PriorityBlockingQueue;
import eu.dnetlib.dhp.collection.worker.DnetCollectorException;
import eu.dnetlib.dhp.collection.worker.utils.HttpConnector;
import eu.dnetlib.dhp.collection.worker.utils.XmlCleaner;
import org.apache.commons.lang.StringUtils; import org.apache.commons.lang.StringUtils;
import org.apache.commons.logging.Log; import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory; import org.apache.commons.logging.LogFactory;
@ -18,146 +17,163 @@ import org.dom4j.DocumentException;
import org.dom4j.Node; import org.dom4j.Node;
import org.dom4j.io.SAXReader; import org.dom4j.io.SAXReader;
public class OaiIterator implements Iterator<String> { public class OaiIterator implements Iterator<String> {
private static final Log log = LogFactory.getLog(OaiIterator.class); // NOPMD by marko on 11/24/08 5:02 PM private static final Log log =
LogFactory.getLog(OaiIterator.class); // NOPMD by marko on 11/24/08 5:02 PM
private final Queue<String> queue = new PriorityBlockingQueue<>(); private final Queue<String> queue = new PriorityBlockingQueue<>();
private final SAXReader reader = new SAXReader(); private final SAXReader reader = new SAXReader();
private final String baseUrl; private final String baseUrl;
private final String set; private final String set;
private final String mdFormat; private final String mdFormat;
private final String fromDate; private final String fromDate;
private final String untilDate; private final String untilDate;
private String token; private String token;
private boolean started; private boolean started;
private final HttpConnector httpConnector; private final HttpConnector httpConnector;
public OaiIterator(final String baseUrl, final String mdFormat, final String set, final String fromDate, final String untilDate, public OaiIterator(
final HttpConnector httpConnector) { final String baseUrl,
this.baseUrl = baseUrl; final String mdFormat,
this.mdFormat = mdFormat; final String set,
this.set = set; final String fromDate,
this.fromDate = fromDate; final String untilDate,
this.untilDate = untilDate; final HttpConnector httpConnector) {
this.started = false; this.baseUrl = baseUrl;
this.httpConnector = httpConnector; this.mdFormat = mdFormat;
} this.set = set;
this.fromDate = fromDate;
this.untilDate = untilDate;
this.started = false;
this.httpConnector = httpConnector;
}
private void verifyStarted() { private void verifyStarted() {
if (!this.started) { if (!this.started) {
this.started = true; this.started = true;
try { try {
this.token = firstPage(); this.token = firstPage();
} catch (final DnetCollectorException e) { } catch (final DnetCollectorException e) {
throw new RuntimeException(e); throw new RuntimeException(e);
} }
} }
} }
@Override @Override
public boolean hasNext() { public boolean hasNext() {
synchronized (queue) { synchronized (queue) {
verifyStarted(); verifyStarted();
return !queue.isEmpty(); return !queue.isEmpty();
} }
} }
@Override @Override
public String next() { public String next() {
synchronized (queue) { synchronized (queue) {
verifyStarted(); verifyStarted();
final String res = queue.poll(); final String res = queue.poll();
while (queue.isEmpty() && token != null && !token.isEmpty()) { while (queue.isEmpty() && token != null && !token.isEmpty()) {
try { try {
token = otherPages(token); token = otherPages(token);
} catch (final DnetCollectorException e) { } catch (final DnetCollectorException e) {
throw new RuntimeException(e); throw new RuntimeException(e);
} }
} }
return res; return res;
} }
} }
@Override @Override
public void remove() {} public void remove() {}
private String firstPage() throws DnetCollectorException { private String firstPage() throws DnetCollectorException {
try { try {
String url = baseUrl + "?verb=ListRecords&metadataPrefix=" + URLEncoder.encode(mdFormat, "UTF-8"); String url =
if (set != null && !set.isEmpty()) { baseUrl
url += "&set=" + URLEncoder.encode(set, "UTF-8"); + "?verb=ListRecords&metadataPrefix="
} + URLEncoder.encode(mdFormat, "UTF-8");
if (fromDate != null && fromDate.matches("\\d{4}-\\d{2}-\\d{2}")) { if (set != null && !set.isEmpty()) {
url += "&from=" + URLEncoder.encode(fromDate, "UTF-8"); url += "&set=" + URLEncoder.encode(set, "UTF-8");
} }
if (untilDate != null && untilDate.matches("\\d{4}-\\d{2}-\\d{2}")) { if (fromDate != null && fromDate.matches("\\d{4}-\\d{2}-\\d{2}")) {
url += "&until=" + URLEncoder.encode(untilDate, "UTF-8"); url += "&from=" + URLEncoder.encode(fromDate, "UTF-8");
} }
log.info("Start harvesting using url: " + url); if (untilDate != null && untilDate.matches("\\d{4}-\\d{2}-\\d{2}")) {
url += "&until=" + URLEncoder.encode(untilDate, "UTF-8");
}
log.info("Start harvesting using url: " + url);
return downloadPage(url); return downloadPage(url);
} catch (final UnsupportedEncodingException e) { } catch (final UnsupportedEncodingException e) {
throw new DnetCollectorException(e); throw new DnetCollectorException(e);
} }
} }
private String extractResumptionToken(final String xml) { private String extractResumptionToken(final String xml) {
final String s = StringUtils.substringAfter(xml, "<resumptionToken"); final String s = StringUtils.substringAfter(xml, "<resumptionToken");
if (s == null) { return null; } if (s == null) {
return null;
}
final String result = StringUtils.substringBetween(s, ">", "</"); final String result = StringUtils.substringBetween(s, ">", "</");
if (result == null) { return null; } if (result == null) {
return result.trim(); return null;
}
return result.trim();
}
} private String otherPages(final String resumptionToken) throws DnetCollectorException {
try {
return downloadPage(
baseUrl
+ "?verb=ListRecords&resumptionToken="
+ URLEncoder.encode(resumptionToken, "UTF-8"));
} catch (final UnsupportedEncodingException e) {
throw new DnetCollectorException(e);
}
}
private String otherPages(final String resumptionToken) throws DnetCollectorException { private String downloadPage(final String url) throws DnetCollectorException {
try {
return downloadPage(baseUrl + "?verb=ListRecords&resumptionToken=" + URLEncoder.encode(resumptionToken, "UTF-8"));
} catch (final UnsupportedEncodingException e) {
throw new DnetCollectorException(e);
}
}
private String downloadPage(final String url) throws DnetCollectorException { final String xml = httpConnector.getInputSource(url);
Document doc;
try {
doc = reader.read(new StringReader(xml));
} catch (final DocumentException e) {
log.warn("Error parsing xml, I try to clean it: " + xml, e);
final String cleaned = XmlCleaner.cleanAllEntities(xml);
try {
doc = reader.read(new StringReader(cleaned));
} catch (final DocumentException e1) {
final String resumptionToken = extractResumptionToken(xml);
if (resumptionToken == null) {
throw new DnetCollectorException(
"Error parsing cleaned document:" + cleaned, e1);
}
return resumptionToken;
}
}
final String xml = httpConnector.getInputSource(url); final Node errorNode =
Document doc; doc.selectSingleNode("/*[local-name()='OAI-PMH']/*[local-name()='error']");
try { if (errorNode != null) {
doc = reader.read(new StringReader(xml)); final String code = errorNode.valueOf("@code");
} catch (final DocumentException e) { if ("noRecordsMatch".equalsIgnoreCase(code.trim())) {
log.warn("Error parsing xml, I try to clean it: " + xml, e); log.warn("noRecordsMatch for oai call: " + url);
final String cleaned = XmlCleaner.cleanAllEntities(xml); return null;
try { } else {
doc = reader.read(new StringReader(cleaned)); throw new DnetCollectorException(code + " - " + errorNode.getText());
} catch (final DocumentException e1) { }
final String resumptionToken = extractResumptionToken(xml); }
if (resumptionToken == null) { throw new DnetCollectorException("Error parsing cleaned document:" + cleaned, e1); }
return resumptionToken;
}
}
final Node errorNode = doc.selectSingleNode("/*[local-name()='OAI-PMH']/*[local-name()='error']"); for (final Object o :
if (errorNode != null) { doc.selectNodes("//*[local-name()='ListRecords']/*[local-name()='record']")) {
final String code = errorNode.valueOf("@code"); queue.add(((Node) o).asXML());
if ("noRecordsMatch".equalsIgnoreCase(code.trim())) { }
log.warn("noRecordsMatch for oai call: " + url);
return null;
} else {
throw new DnetCollectorException(code + " - " + errorNode.getText());
}
}
for (final Object o : doc.selectNodes("//*[local-name()='ListRecords']/*[local-name()='record']")) {
queue.add(((Node) o).asXML());
}
return doc.valueOf("//*[local-name()='resumptionToken']");
}
return doc.valueOf("//*[local-name()='resumptionToken']");
}
} }

View File

@ -1,25 +1,23 @@
package eu.dnetlib.dhp.collection.plugin.oai; package eu.dnetlib.dhp.collection.plugin.oai;
import eu.dnetlib.dhp.collection.worker.utils.HttpConnector; import eu.dnetlib.dhp.collection.worker.utils.HttpConnector;
import java.util.Iterator; import java.util.Iterator;
public class OaiIteratorFactory { public class OaiIteratorFactory {
private HttpConnector httpConnector;
private HttpConnector httpConnector; public Iterator<String> newIterator(
final String baseUrl,
public Iterator<String> newIterator(final String baseUrl, final String mdFormat, final String set, final String fromDate, final String untilDate) { final String mdFormat,
return new OaiIterator(baseUrl, mdFormat, set, fromDate, untilDate, getHttpConnector()); final String set,
} final String fromDate,
final String untilDate) {
private HttpConnector getHttpConnector() { return new OaiIterator(baseUrl, mdFormat, set, fromDate, untilDate, getHttpConnector());
if (httpConnector== null) }
httpConnector = new HttpConnector();
return httpConnector;
}
private HttpConnector getHttpConnector() {
if (httpConnector == null) httpConnector = new HttpConnector();
return httpConnector;
}
} }

View File

@ -2,29 +2,30 @@ package eu.dnetlib.dhp.collection.worker;
public class DnetCollectorException extends Exception { public class DnetCollectorException extends Exception {
/** /** */
* private static final long serialVersionUID = -290723075076039757L;
*/
private static final long serialVersionUID = -290723075076039757L;
public DnetCollectorException() { public DnetCollectorException() {
super(); super();
} }
public DnetCollectorException(final String message, final Throwable cause, final boolean enableSuppression, final boolean writableStackTrace) { public DnetCollectorException(
super(message, cause, enableSuppression, writableStackTrace); final String message,
} final Throwable cause,
final boolean enableSuppression,
final boolean writableStackTrace) {
super(message, cause, enableSuppression, writableStackTrace);
}
public DnetCollectorException(final String message, final Throwable cause) { public DnetCollectorException(final String message, final Throwable cause) {
super(message, cause); super(message, cause);
} }
public DnetCollectorException(final String message) { public DnetCollectorException(final String message) {
super(message); super(message);
} }
public DnetCollectorException(final Throwable cause) {
super(cause);
}
public DnetCollectorException(final Throwable cause) {
super(cause);
}
} }

View File

@ -2,13 +2,17 @@ package eu.dnetlib.dhp.collection.worker;
import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.collector.worker.model.ApiDescriptor; import eu.dnetlib.collector.worker.model.ApiDescriptor;
import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.collection.plugin.CollectorPlugin; import eu.dnetlib.dhp.collection.plugin.CollectorPlugin;
import eu.dnetlib.dhp.collection.worker.utils.CollectorPluginFactory; import eu.dnetlib.dhp.collection.worker.utils.CollectorPluginFactory;
import eu.dnetlib.message.Message; import eu.dnetlib.message.Message;
import eu.dnetlib.message.MessageManager; import eu.dnetlib.message.MessageManager;
import eu.dnetlib.message.MessageType; import eu.dnetlib.message.MessageType;
import java.io.IOException;
import java.net.URI;
import java.util.HashMap;
import java.util.Map;
import java.util.concurrent.atomic.AtomicInteger;
import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.Path;
@ -18,37 +22,34 @@ import org.apache.hadoop.io.Text;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.net.URI;
import java.util.HashMap;
import java.util.Map;
import java.util.concurrent.atomic.AtomicInteger;
public class DnetCollectorWorker { public class DnetCollectorWorker {
private static final Logger log = LoggerFactory.getLogger(DnetCollectorWorker.class); private static final Logger log = LoggerFactory.getLogger(DnetCollectorWorker.class);
private final CollectorPluginFactory collectorPluginFactory; private final CollectorPluginFactory collectorPluginFactory;
private final ArgumentApplicationParser argumentParser; private final ArgumentApplicationParser argumentParser;
private final MessageManager manager; private final MessageManager manager;
public DnetCollectorWorker(
public DnetCollectorWorker(final CollectorPluginFactory collectorPluginFactory, final ArgumentApplicationParser argumentParser, final MessageManager manager) throws DnetCollectorException { final CollectorPluginFactory collectorPluginFactory,
final ArgumentApplicationParser argumentParser,
final MessageManager manager)
throws DnetCollectorException {
this.collectorPluginFactory = collectorPluginFactory; this.collectorPluginFactory = collectorPluginFactory;
this.argumentParser = argumentParser; this.argumentParser = argumentParser;
this.manager = manager; this.manager = manager;
} }
public void collect() throws DnetCollectorException { public void collect() throws DnetCollectorException {
try { try {
final ObjectMapper jsonMapper = new ObjectMapper(); final ObjectMapper jsonMapper = new ObjectMapper();
final ApiDescriptor api = jsonMapper.readValue(argumentParser.get("apidescriptor"), ApiDescriptor.class); final ApiDescriptor api =
jsonMapper.readValue(argumentParser.get("apidescriptor"), ApiDescriptor.class);
final CollectorPlugin plugin = collectorPluginFactory.getPluginByProtocol(api.getProtocol()); final CollectorPlugin plugin =
collectorPluginFactory.getPluginByProtocol(api.getProtocol());
final String hdfsuri = argumentParser.get("namenode"); final String hdfsuri = argumentParser.get("namenode");
@ -62,7 +63,7 @@ public class DnetCollectorWorker {
System.setProperty("HADOOP_USER_NAME", argumentParser.get("userHDFS")); System.setProperty("HADOOP_USER_NAME", argumentParser.get("userHDFS"));
System.setProperty("hadoop.home.dir", "/"); System.setProperty("hadoop.home.dir", "/");
//Get the filesystem - HDFS // Get the filesystem - HDFS
FileSystem.get(URI.create(hdfsuri), conf); FileSystem.get(URI.create(hdfsuri), conf);
Path hdfswritepath = new Path(argumentParser.get("hdfsPath")); Path hdfswritepath = new Path(argumentParser.get("hdfsPath"));
@ -71,43 +72,69 @@ public class DnetCollectorWorker {
final Map<String, String> ongoingMap = new HashMap<>(); final Map<String, String> ongoingMap = new HashMap<>();
final Map<String, String> reportMap = new HashMap<>(); final Map<String, String> reportMap = new HashMap<>();
final AtomicInteger counter = new AtomicInteger(0); final AtomicInteger counter = new AtomicInteger(0);
try (SequenceFile.Writer writer = SequenceFile.createWriter(conf, try (SequenceFile.Writer writer =
SequenceFile.Writer.file(hdfswritepath), SequenceFile.Writer.keyClass(IntWritable.class), SequenceFile.createWriter(
SequenceFile.Writer.valueClass(Text.class))) { conf,
SequenceFile.Writer.file(hdfswritepath),
SequenceFile.Writer.keyClass(IntWritable.class),
SequenceFile.Writer.valueClass(Text.class))) {
final IntWritable key = new IntWritable(counter.get()); final IntWritable key = new IntWritable(counter.get());
final Text value = new Text(); final Text value = new Text();
plugin.collect(api).forEach(content -> { plugin.collect(api)
.forEach(
key.set(counter.getAndIncrement()); content -> {
value.set(content); key.set(counter.getAndIncrement());
if (counter.get() % 10 == 0) { value.set(content);
try { if (counter.get() % 10 == 0) {
ongoingMap.put("ongoing", "" + counter.get()); try {
log.debug("Sending message: "+ manager.sendMessage(new Message(argumentParser.get("workflowId"), "Collection", MessageType.ONGOING, ongoingMap), argumentParser.get("rabbitOngoingQueue"), true, false)); ongoingMap.put("ongoing", "" + counter.get());
} catch (Exception e) { log.debug(
log.error("Error on sending message ", e); "Sending message: "
} + manager.sendMessage(
} new Message(
try { argumentParser.get(
writer.append(key, value); "workflowId"),
} catch (IOException e) { "Collection",
throw new RuntimeException(e); MessageType.ONGOING,
} ongoingMap),
argumentParser.get(
}); "rabbitOngoingQueue"),
true,
false));
} catch (Exception e) {
log.error("Error on sending message ", e);
}
}
try {
writer.append(key, value);
} catch (IOException e) {
throw new RuntimeException(e);
}
});
} }
ongoingMap.put("ongoing", "" + counter.get()); ongoingMap.put("ongoing", "" + counter.get());
manager.sendMessage(new Message(argumentParser.get("workflowId"), "Collection", MessageType.ONGOING, ongoingMap), argumentParser.get("rabbitOngoingQueue"), true, false); manager.sendMessage(
new Message(
argumentParser.get("workflowId"),
"Collection",
MessageType.ONGOING,
ongoingMap),
argumentParser.get("rabbitOngoingQueue"),
true,
false);
reportMap.put("collected", "" + counter.get()); reportMap.put("collected", "" + counter.get());
manager.sendMessage(new Message(argumentParser.get("workflowId"), "Collection", MessageType.REPORT, reportMap), argumentParser.get("rabbitOngoingQueue"), true, false); manager.sendMessage(
new Message(
argumentParser.get("workflowId"),
"Collection",
MessageType.REPORT,
reportMap),
argumentParser.get("rabbitOngoingQueue"),
true,
false);
manager.close(); manager.close();
} catch (Throwable e) { } catch (Throwable e) {
throw new DnetCollectorException("Error on collecting ",e); throw new DnetCollectorException("Error on collecting ", e);
} }
} }
} }

View File

@ -1,6 +1,5 @@
package eu.dnetlib.dhp.collection.worker; package eu.dnetlib.dhp.collection.worker;
import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.collection.worker.utils.CollectorPluginFactory; import eu.dnetlib.dhp.collection.worker.utils.CollectorPluginFactory;
import eu.dnetlib.message.MessageManager; import eu.dnetlib.message.MessageManager;
@ -8,16 +7,13 @@ import org.apache.commons.io.IOUtils;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
/** /**
* DnetCollectortWorkerApplication is the main class responsible to start * DnetCollectortWorkerApplication is the main class responsible to start the Dnet Collection into
* the Dnet Collection into HDFS. * HDFS. This module will be executed on the hadoop cluster and taking in input some parameters that
* This module will be executed on the hadoop cluster and taking in input some parameters * tells it which is the right collector plugin to use and where store the data into HDFS path
* that tells it which is the right collector plugin to use and where store the data into HDFS path
* *
* @author Sandro La Bruzzo * @author Sandro La Bruzzo
*/ */
public class DnetCollectorWorkerApplication { public class DnetCollectorWorkerApplication {
private static final Logger log = LoggerFactory.getLogger(DnetCollectorWorkerApplication.class); private static final Logger log = LoggerFactory.getLogger(DnetCollectorWorkerApplication.class);
@ -26,22 +22,27 @@ public class DnetCollectorWorkerApplication {
private static ArgumentApplicationParser argumentParser; private static ArgumentApplicationParser argumentParser;
/** @param args */
/**
* @param args
*/
public static void main(final String[] args) throws Exception { public static void main(final String[] args) throws Exception {
argumentParser= new ArgumentApplicationParser(IOUtils.toString(DnetCollectorWorker.class.getResourceAsStream("/eu/dnetlib/collector/worker/collector_parameter.json"))); argumentParser =
new ArgumentApplicationParser(
IOUtils.toString(
DnetCollectorWorker.class.getResourceAsStream(
"/eu/dnetlib/collector/worker/collector_parameter.json")));
argumentParser.parseArgument(args); argumentParser.parseArgument(args);
log.info("hdfsPath =" + argumentParser.get("hdfsPath")); log.info("hdfsPath =" + argumentParser.get("hdfsPath"));
log.info("json = " + argumentParser.get("apidescriptor")); log.info("json = " + argumentParser.get("apidescriptor"));
final MessageManager manager = new MessageManager(argumentParser.get("rabbitHost"), argumentParser.get("rabbitUser"), argumentParser.get("rabbitPassword"), false, false, null); final MessageManager manager =
final DnetCollectorWorker worker = new DnetCollectorWorker(collectorPluginFactory, argumentParser, manager); new MessageManager(
argumentParser.get("rabbitHost"),
argumentParser.get("rabbitUser"),
argumentParser.get("rabbitPassword"),
false,
false,
null);
final DnetCollectorWorker worker =
new DnetCollectorWorker(collectorPluginFactory, argumentParser, manager);
worker.collect(); worker.collect();
} }
} }

View File

@ -4,16 +4,15 @@ import java.util.LinkedList;
public class CollectorPluginErrorLogList extends LinkedList<String> { public class CollectorPluginErrorLogList extends LinkedList<String> {
private static final long serialVersionUID = -6925786561303289704L; private static final long serialVersionUID = -6925786561303289704L;
@Override
public String toString() {
String log = new String();
int index = 0;
for (final String errorMessage : this) {
log += String.format("Retry #%s: %s / ", index++, errorMessage);
}
return log;
}
@Override
public String toString() {
String log = new String();
int index = 0;
for (final String errorMessage : this) {
log += String.format("Retry #%s: %s / ", index++, errorMessage);
}
return log;
}
} }

View File

@ -4,19 +4,16 @@ import eu.dnetlib.dhp.collection.plugin.CollectorPlugin;
import eu.dnetlib.dhp.collection.plugin.oai.OaiCollectorPlugin; import eu.dnetlib.dhp.collection.plugin.oai.OaiCollectorPlugin;
import eu.dnetlib.dhp.collection.worker.DnetCollectorException; import eu.dnetlib.dhp.collection.worker.DnetCollectorException;
;
public class CollectorPluginFactory { public class CollectorPluginFactory {
public CollectorPlugin getPluginByProtocol(final String protocol) throws DnetCollectorException { public CollectorPlugin getPluginByProtocol(final String protocol)
if (protocol==null) throw new DnetCollectorException("protocol cannot be null"); throws DnetCollectorException {
switch (protocol.toLowerCase().trim()){ if (protocol == null) throw new DnetCollectorException("protocol cannot be null");
switch (protocol.toLowerCase().trim()) {
case "oai": case "oai":
return new OaiCollectorPlugin(); return new OaiCollectorPlugin();
default: default:
throw new DnetCollectorException("UNknown protocol"); throw new DnetCollectorException("UNknown protocol");
} }
} }
} }

View File

@ -1,15 +1,6 @@
package eu.dnetlib.dhp.collection.worker.utils; package eu.dnetlib.dhp.collection.worker.utils;
import eu.dnetlib.dhp.collection.worker.DnetCollectorException; import eu.dnetlib.dhp.collection.worker.DnetCollectorException;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang.math.NumberUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import javax.net.ssl.HttpsURLConnection;
import javax.net.ssl.SSLContext;
import javax.net.ssl.TrustManager;
import javax.net.ssl.X509TrustManager;
import java.io.IOException; import java.io.IOException;
import java.io.InputStream; import java.io.InputStream;
import java.net.*; import java.net.*;
@ -17,203 +8,243 @@ import java.security.GeneralSecurityException;
import java.security.cert.X509Certificate; import java.security.cert.X509Certificate;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import javax.net.ssl.HttpsURLConnection;
import javax.net.ssl.SSLContext;
import javax.net.ssl.TrustManager;
import javax.net.ssl.X509TrustManager;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang.math.NumberUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
public class HttpConnector { public class HttpConnector {
private static final Log log = LogFactory.getLog(HttpConnector.class); private static final Log log = LogFactory.getLog(HttpConnector.class);
private int maxNumberOfRetry = 6; private int maxNumberOfRetry = 6;
private int defaultDelay = 120; // seconds private int defaultDelay = 120; // seconds
private int readTimeOut = 120; // seconds private int readTimeOut = 120; // seconds
private String responseType = null; private String responseType = null;
private final String userAgent = "Mozilla/5.0 (compatible; OAI; +http://www.openaire.eu)"; private final String userAgent = "Mozilla/5.0 (compatible; OAI; +http://www.openaire.eu)";
public HttpConnector() { public HttpConnector() {
CookieHandler.setDefault(new CookieManager(null, CookiePolicy.ACCEPT_ALL)); CookieHandler.setDefault(new CookieManager(null, CookiePolicy.ACCEPT_ALL));
} }
/** /**
* Given the URL returns the content via HTTP GET * Given the URL returns the content via HTTP GET
* *
* @param requestUrl * @param requestUrl the URL
* the URL * @return the content of the downloaded resource
* @return the content of the downloaded resource * @throws DnetCollectorException when retrying more than maxNumberOfRetry times
* @throws DnetCollectorException */
* when retrying more than maxNumberOfRetry times public String getInputSource(final String requestUrl) throws DnetCollectorException {
*/ return attemptDownlaodAsString(requestUrl, 1, new CollectorPluginErrorLogList());
public String getInputSource(final String requestUrl) throws DnetCollectorException { }
return attemptDownlaodAsString(requestUrl, 1, new CollectorPluginErrorLogList());
}
/** /**
* Given the URL returns the content as a stream via HTTP GET * Given the URL returns the content as a stream via HTTP GET
* *
* @param requestUrl * @param requestUrl the URL
* the URL * @return the content of the downloaded resource as InputStream
* @return the content of the downloaded resource as InputStream * @throws DnetCollectorException when retrying more than maxNumberOfRetry times
* @throws DnetCollectorException */
* when retrying more than maxNumberOfRetry times public InputStream getInputSourceAsStream(final String requestUrl)
*/ throws DnetCollectorException {
public InputStream getInputSourceAsStream(final String requestUrl) throws DnetCollectorException { return attemptDownload(requestUrl, 1, new CollectorPluginErrorLogList());
return attemptDownload(requestUrl, 1, new CollectorPluginErrorLogList()); }
}
private String attemptDownlaodAsString(final String requestUrl, final int retryNumber, final CollectorPluginErrorLogList errorList) private String attemptDownlaodAsString(
throws DnetCollectorException { final String requestUrl,
try { final int retryNumber,
final InputStream s = attemptDownload(requestUrl, 1, new CollectorPluginErrorLogList()); final CollectorPluginErrorLogList errorList)
try { throws DnetCollectorException {
return IOUtils.toString(s); try {
} catch (final IOException e) { final InputStream s = attemptDownload(requestUrl, 1, new CollectorPluginErrorLogList());
log.error("error while retrieving from http-connection occured: " + requestUrl, e); try {
Thread.sleep(defaultDelay * 1000); return IOUtils.toString(s);
errorList.add(e.getMessage()); } catch (final IOException e) {
return attemptDownlaodAsString(requestUrl, retryNumber + 1, errorList); log.error("error while retrieving from http-connection occured: " + requestUrl, e);
} finally { Thread.sleep(defaultDelay * 1000);
IOUtils.closeQuietly(s); errorList.add(e.getMessage());
} return attemptDownlaodAsString(requestUrl, retryNumber + 1, errorList);
} catch (final InterruptedException e) { } finally {
throw new DnetCollectorException(e); IOUtils.closeQuietly(s);
} }
} } catch (final InterruptedException e) {
throw new DnetCollectorException(e);
}
}
private InputStream attemptDownload(final String requestUrl, final int retryNumber, final CollectorPluginErrorLogList errorList) private InputStream attemptDownload(
throws DnetCollectorException { final String requestUrl,
final int retryNumber,
final CollectorPluginErrorLogList errorList)
throws DnetCollectorException {
if (retryNumber > maxNumberOfRetry) { throw new DnetCollectorException("Max number of retries exceeded. Cause: \n " + errorList); } if (retryNumber > maxNumberOfRetry) {
throw new DnetCollectorException(
"Max number of retries exceeded. Cause: \n " + errorList);
}
log.debug("Downloading " + requestUrl + " - try: " + retryNumber); log.debug("Downloading " + requestUrl + " - try: " + retryNumber);
try { try {
InputStream input = null; InputStream input = null;
try { try {
final HttpURLConnection urlConn = (HttpURLConnection) new URL(requestUrl).openConnection(); final HttpURLConnection urlConn =
urlConn.setInstanceFollowRedirects(false); (HttpURLConnection) new URL(requestUrl).openConnection();
urlConn.setReadTimeout(readTimeOut * 1000); urlConn.setInstanceFollowRedirects(false);
urlConn.addRequestProperty("User-Agent", userAgent); urlConn.setReadTimeout(readTimeOut * 1000);
urlConn.addRequestProperty("User-Agent", userAgent);
if (log.isDebugEnabled()) { if (log.isDebugEnabled()) {
logHeaderFields(urlConn); logHeaderFields(urlConn);
} }
final int retryAfter = obtainRetryAfter(urlConn.getHeaderFields()); final int retryAfter = obtainRetryAfter(urlConn.getHeaderFields());
if (retryAfter > 0 && urlConn.getResponseCode() == HttpURLConnection.HTTP_UNAVAILABLE) { if (retryAfter > 0
log.warn("waiting and repeating request after " + retryAfter + " sec."); && urlConn.getResponseCode() == HttpURLConnection.HTTP_UNAVAILABLE) {
Thread.sleep(retryAfter * 1000); log.warn("waiting and repeating request after " + retryAfter + " sec.");
errorList.add("503 Service Unavailable"); Thread.sleep(retryAfter * 1000);
urlConn.disconnect(); errorList.add("503 Service Unavailable");
return attemptDownload(requestUrl, retryNumber + 1, errorList); urlConn.disconnect();
} else if (urlConn.getResponseCode() == HttpURLConnection.HTTP_MOVED_PERM || urlConn.getResponseCode() == HttpURLConnection.HTTP_MOVED_TEMP) { return attemptDownload(requestUrl, retryNumber + 1, errorList);
final String newUrl = obtainNewLocation(urlConn.getHeaderFields()); } else if (urlConn.getResponseCode() == HttpURLConnection.HTTP_MOVED_PERM
log.debug("The requested url has been moved to " + newUrl); || urlConn.getResponseCode() == HttpURLConnection.HTTP_MOVED_TEMP) {
errorList.add(String.format("%s %s. Moved to: %s", urlConn.getResponseCode(), urlConn.getResponseMessage(), newUrl)); final String newUrl = obtainNewLocation(urlConn.getHeaderFields());
urlConn.disconnect(); log.debug("The requested url has been moved to " + newUrl);
return attemptDownload(newUrl, retryNumber + 1, errorList); errorList.add(
} else if (urlConn.getResponseCode() != HttpURLConnection.HTTP_OK) { String.format(
log.error(String.format("HTTP error: %s %s", urlConn.getResponseCode(), urlConn.getResponseMessage())); "%s %s. Moved to: %s",
Thread.sleep(defaultDelay * 1000); urlConn.getResponseCode(),
errorList.add(String.format("%s %s", urlConn.getResponseCode(), urlConn.getResponseMessage())); urlConn.getResponseMessage(),
urlConn.disconnect(); newUrl));
return attemptDownload(requestUrl, retryNumber + 1, errorList); urlConn.disconnect();
} else { return attemptDownload(newUrl, retryNumber + 1, errorList);
input = urlConn.getInputStream(); } else if (urlConn.getResponseCode() != HttpURLConnection.HTTP_OK) {
responseType = urlConn.getContentType(); log.error(
return input; String.format(
} "HTTP error: %s %s",
} catch (final IOException e) { urlConn.getResponseCode(), urlConn.getResponseMessage()));
log.error("error while retrieving from http-connection occured: " + requestUrl, e); Thread.sleep(defaultDelay * 1000);
Thread.sleep(defaultDelay * 1000); errorList.add(
errorList.add(e.getMessage()); String.format(
return attemptDownload(requestUrl, retryNumber + 1, errorList); "%s %s",
} urlConn.getResponseCode(), urlConn.getResponseMessage()));
} catch (final InterruptedException e) { urlConn.disconnect();
throw new DnetCollectorException(e); return attemptDownload(requestUrl, retryNumber + 1, errorList);
} } else {
} input = urlConn.getInputStream();
responseType = urlConn.getContentType();
return input;
}
} catch (final IOException e) {
log.error("error while retrieving from http-connection occured: " + requestUrl, e);
Thread.sleep(defaultDelay * 1000);
errorList.add(e.getMessage());
return attemptDownload(requestUrl, retryNumber + 1, errorList);
}
} catch (final InterruptedException e) {
throw new DnetCollectorException(e);
}
}
private void logHeaderFields(final HttpURLConnection urlConn) throws IOException { private void logHeaderFields(final HttpURLConnection urlConn) throws IOException {
log.debug("StatusCode: " + urlConn.getResponseMessage()); log.debug("StatusCode: " + urlConn.getResponseMessage());
for (final Map.Entry<String, List<String>> e : urlConn.getHeaderFields().entrySet()) { for (final Map.Entry<String, List<String>> e : urlConn.getHeaderFields().entrySet()) {
if (e.getKey() != null) { if (e.getKey() != null) {
for (final String v : e.getValue()) { for (final String v : e.getValue()) {
log.debug(" key: " + e.getKey() + " - value: " + v); log.debug(" key: " + e.getKey() + " - value: " + v);
} }
} }
} }
} }
private int obtainRetryAfter(final Map<String, List<String>> headerMap) { private int obtainRetryAfter(final Map<String, List<String>> headerMap) {
for (final String key : headerMap.keySet()) { for (final String key : headerMap.keySet()) {
if (key != null && key.toLowerCase().equals("retry-after") && headerMap.get(key).size() > 0 if (key != null
&& NumberUtils.isNumber(headerMap.get(key).get(0))) { return Integer.parseInt(headerMap.get(key).get(0)) + 10; } && key.toLowerCase().equals("retry-after")
} && headerMap.get(key).size() > 0
return -1; && NumberUtils.isNumber(headerMap.get(key).get(0))) {
} return Integer.parseInt(headerMap.get(key).get(0)) + 10;
}
}
return -1;
}
private String obtainNewLocation(final Map<String, List<String>> headerMap) throws DnetCollectorException { private String obtainNewLocation(final Map<String, List<String>> headerMap)
for (final String key : headerMap.keySet()) { throws DnetCollectorException {
if (key != null && key.toLowerCase().equals("location") && headerMap.get(key).size() > 0) { return headerMap.get(key).get(0); } for (final String key : headerMap.keySet()) {
} if (key != null
throw new DnetCollectorException("The requested url has been MOVED, but 'location' param is MISSING"); && key.toLowerCase().equals("location")
} && headerMap.get(key).size() > 0) {
return headerMap.get(key).get(0);
}
}
throw new DnetCollectorException(
"The requested url has been MOVED, but 'location' param is MISSING");
}
/** /**
* register for https scheme; this is a workaround and not intended for the use in trusted environments * register for https scheme; this is a workaround and not intended for the use in trusted
*/ * environments
public void initTrustManager() { */
final X509TrustManager tm = new X509TrustManager() { public void initTrustManager() {
final X509TrustManager tm =
new X509TrustManager() {
@Override @Override
public void checkClientTrusted(final X509Certificate[] xcs, final String string) {} public void checkClientTrusted(
final X509Certificate[] xcs, final String string) {}
@Override @Override
public void checkServerTrusted(final X509Certificate[] xcs, final String string) {} public void checkServerTrusted(
final X509Certificate[] xcs, final String string) {}
@Override @Override
public X509Certificate[] getAcceptedIssuers() { public X509Certificate[] getAcceptedIssuers() {
return null; return null;
} }
}; };
try { try {
final SSLContext ctx = SSLContext.getInstance("TLS"); final SSLContext ctx = SSLContext.getInstance("TLS");
ctx.init(null, new TrustManager[] { tm }, null); ctx.init(null, new TrustManager[] {tm}, null);
HttpsURLConnection.setDefaultSSLSocketFactory(ctx.getSocketFactory()); HttpsURLConnection.setDefaultSSLSocketFactory(ctx.getSocketFactory());
} catch (final GeneralSecurityException e) { } catch (final GeneralSecurityException e) {
log.fatal(e); log.fatal(e);
throw new IllegalStateException(e); throw new IllegalStateException(e);
} }
} }
public int getMaxNumberOfRetry() { public int getMaxNumberOfRetry() {
return maxNumberOfRetry; return maxNumberOfRetry;
} }
public void setMaxNumberOfRetry(final int maxNumberOfRetry) { public void setMaxNumberOfRetry(final int maxNumberOfRetry) {
this.maxNumberOfRetry = maxNumberOfRetry; this.maxNumberOfRetry = maxNumberOfRetry;
} }
public int getDefaultDelay() { public int getDefaultDelay() {
return defaultDelay; return defaultDelay;
} }
public void setDefaultDelay(final int defaultDelay) { public void setDefaultDelay(final int defaultDelay) {
this.defaultDelay = defaultDelay; this.defaultDelay = defaultDelay;
} }
public int getReadTimeOut() { public int getReadTimeOut() {
return readTimeOut; return readTimeOut;
} }
public void setReadTimeOut(final int readTimeOut) { public void setReadTimeOut(final int readTimeOut) {
this.readTimeOut = readTimeOut; this.readTimeOut = readTimeOut;
} }
public String getResponseType() {
return responseType;
}
public String getResponseType() {
return responseType;
}
} }

View File

@ -6,254 +6,392 @@ import java.util.Map;
import java.util.Set; import java.util.Set;
import java.util.regex.Pattern; import java.util.regex.Pattern;
/** /** @author jochen, Andreas Czerniak */
* @author jochen, Andreas Czerniak
*
*/
public class XmlCleaner { public class XmlCleaner {
/** /** Pattern for numeric entities. */
* Pattern for numeric entities. private static Pattern validCharacterEntityPattern =
*/ Pattern.compile("^&#x?\\d{2,4};"); // $NON-NLS-1$
private static Pattern validCharacterEntityPattern = Pattern.compile("^&#x?\\d{2,4};"); //$NON-NLS-1$ // private static Pattern validCharacterEntityPattern = Pattern.compile("^&#?\\d{2,4};");
// private static Pattern validCharacterEntityPattern = Pattern.compile("^&#?\\d{2,4};"); //$NON-NLS-1$ // //$NON-NLS-1$
// see https://www.w3.org/TR/REC-xml/#charsets , not only limited to &#11; // see https://www.w3.org/TR/REC-xml/#charsets , not only limited to &#11;
private static Pattern invalidControlCharPattern = Pattern.compile("&#x?1[0-9a-fA-F];"); private static Pattern invalidControlCharPattern = Pattern.compile("&#x?1[0-9a-fA-F];");
/** /**
* Pattern that negates the allowable XML 4 byte unicode characters. Valid are: #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | * Pattern that negates the allowable XML 4 byte unicode characters. Valid are: #x9 | #xA | #xD
* [#x10000-#x10FFFF] * | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]
*/ */
private static Pattern invalidCharacterPattern = Pattern.compile("[^\t\r\n\u0020-\uD7FF\uE000-\uFFFD]"); //$NON-NLS-1$ private static Pattern invalidCharacterPattern =
Pattern.compile("[^\t\r\n\u0020-\uD7FF\uE000-\uFFFD]"); // $NON-NLS-1$
// Map entities to their unicode equivalent // Map entities to their unicode equivalent
private static Set<String> goodEntities = new HashSet<>(); private static Set<String> goodEntities = new HashSet<>();
private static Map<String, String> badEntities = new HashMap<>(); private static Map<String, String> badEntities = new HashMap<>();
static { static {
// pre-defined XML entities // pre-defined XML entities
goodEntities.add("&quot;"); //$NON-NLS-1$ // quotation mark goodEntities.add("&quot;"); // $NON-NLS-1$ // quotation mark
goodEntities.add("&amp;"); //$NON-NLS-1$ // ampersand goodEntities.add("&amp;"); // $NON-NLS-1$ // ampersand
goodEntities.add("&lt;"); //$NON-NLS-1$ // less-than sign goodEntities.add("&lt;"); // $NON-NLS-1$ // less-than sign
goodEntities.add("&gt;"); //$NON-NLS-1$ // greater-than sign goodEntities.add("&gt;"); // $NON-NLS-1$ // greater-than sign
// control entities // control entities
// badEntities.put("&#11;", ""); // badEntities.put("&#11;", "");
badEntities.put("&#127;", " "); //$NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character badEntities.put("&#127;", " "); // $NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
badEntities.put("&#128;", " "); //$NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character badEntities.put("&#128;", " "); // $NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
badEntities.put("&#129;", " "); //$NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character badEntities.put("&#129;", " "); // $NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
badEntities.put("&#130;", " "); //$NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character badEntities.put("&#130;", " "); // $NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
badEntities.put("&#131;", " "); //$NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character badEntities.put("&#131;", " "); // $NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
badEntities.put("&#132;", " "); //$NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character badEntities.put("&#132;", " "); // $NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
badEntities.put("&#133;", " "); //$NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character badEntities.put("&#133;", " "); // $NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
badEntities.put("&#134;", " "); //$NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character badEntities.put("&#134;", " "); // $NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
badEntities.put("&#135;", " "); //$NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character badEntities.put("&#135;", " "); // $NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
badEntities.put("&#136;", " "); //$NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character badEntities.put("&#136;", " "); // $NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
badEntities.put("&#137;", " "); //$NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character badEntities.put("&#137;", " "); // $NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
badEntities.put("&#138;", " "); //$NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character badEntities.put("&#138;", " "); // $NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
badEntities.put("&#139;", " "); //$NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character badEntities.put("&#139;", " "); // $NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
badEntities.put("&#140;", " "); //$NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character badEntities.put("&#140;", " "); // $NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
badEntities.put("&#141;", " "); //$NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character badEntities.put("&#141;", " "); // $NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
badEntities.put("&#142;", " "); //$NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character badEntities.put("&#142;", " "); // $NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
badEntities.put("&#143;", " "); //$NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character badEntities.put("&#143;", " "); // $NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
badEntities.put("&#144;", " "); //$NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character badEntities.put("&#144;", " "); // $NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
badEntities.put("&#145;", " "); //$NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character badEntities.put("&#145;", " "); // $NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
badEntities.put("&#146;", " "); //$NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character badEntities.put("&#146;", " "); // $NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
badEntities.put("&#147;", " "); //$NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character badEntities.put("&#147;", " "); // $NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
badEntities.put("&#148;", " "); //$NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character badEntities.put("&#148;", " "); // $NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
badEntities.put("&#149;", " "); //$NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character badEntities.put("&#149;", " "); // $NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
badEntities.put("&#150;", " "); //$NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character badEntities.put("&#150;", " "); // $NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
badEntities.put("&#151;", " "); //$NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character badEntities.put("&#151;", " "); // $NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
badEntities.put("&#152;", " "); //$NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character badEntities.put("&#152;", " "); // $NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
badEntities.put("&#153;", " "); //$NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character badEntities.put("&#153;", " "); // $NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
badEntities.put("&#154;", " "); //$NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character badEntities.put("&#154;", " "); // $NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
badEntities.put("&#155;", " "); //$NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character badEntities.put("&#155;", " "); // $NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
badEntities.put("&#156;", " "); //$NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character badEntities.put("&#156;", " "); // $NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
badEntities.put("&#157;", " "); //$NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character badEntities.put("&#157;", " "); // $NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
badEntities.put("&#158;", " "); //$NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character badEntities.put("&#158;", " "); // $NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
badEntities.put("&#159;", " "); //$NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character badEntities.put("&#159;", " "); // $NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
// misc entities // misc entities
badEntities.put("&euro;", "\u20AC"); //$NON-NLS-1$ //$NON-NLS-2$ // euro badEntities.put("&euro;", "\u20AC"); // $NON-NLS-1$ //$NON-NLS-2$ // euro
badEntities.put("&lsquo;", "\u2018"); //$NON-NLS-1$ //$NON-NLS-2$ // left single quotation mark badEntities.put(
badEntities.put("&rsquo;", "\u2019"); //$NON-NLS-1$ //$NON-NLS-2$ // right single quotation mark "&lsquo;", "\u2018"); // $NON-NLS-1$ //$NON-NLS-2$ // left single quotation mark
// Latin 1 entities badEntities.put(
badEntities.put("&nbsp;", "\u00A0"); //$NON-NLS-1$ //$NON-NLS-2$ // no-break space "&rsquo;", "\u2019"); // $NON-NLS-1$ //$NON-NLS-2$ // right single quotation mark
badEntities.put("&iexcl;", "\u00A1"); //$NON-NLS-1$ //$NON-NLS-2$ // inverted exclamation mark // Latin 1 entities
badEntities.put("&cent;", "\u00A2"); //$NON-NLS-1$ //$NON-NLS-2$ // cent sign badEntities.put("&nbsp;", "\u00A0"); // $NON-NLS-1$ //$NON-NLS-2$ // no-break space
badEntities.put("&pound;", "\u00A3"); //$NON-NLS-1$ //$NON-NLS-2$ // pound sign badEntities.put(
badEntities.put("&curren;", "\u00A4"); //$NON-NLS-1$ //$NON-NLS-2$ // currency sign "&iexcl;", "\u00A1"); // $NON-NLS-1$ //$NON-NLS-2$ // inverted exclamation mark
badEntities.put("&yen;", "\u00A5"); //$NON-NLS-1$ //$NON-NLS-2$ // yen sign badEntities.put("&cent;", "\u00A2"); // $NON-NLS-1$ //$NON-NLS-2$ // cent sign
badEntities.put("&brvbar;", "\u00A6"); //$NON-NLS-1$ //$NON-NLS-2$ // broken vertical bar badEntities.put("&pound;", "\u00A3"); // $NON-NLS-1$ //$NON-NLS-2$ // pound sign
badEntities.put("&sect;", "\u00A7"); //$NON-NLS-1$ //$NON-NLS-2$ // section sign badEntities.put("&curren;", "\u00A4"); // $NON-NLS-1$ //$NON-NLS-2$ // currency sign
badEntities.put("&uml;", "\u00A8"); //$NON-NLS-1$ //$NON-NLS-2$ // diaeresis badEntities.put("&yen;", "\u00A5"); // $NON-NLS-1$ //$NON-NLS-2$ // yen sign
badEntities.put("&copy;", "\u00A9"); //$NON-NLS-1$ //$NON-NLS-2$ // copyright sign badEntities.put("&brvbar;", "\u00A6"); // $NON-NLS-1$ //$NON-NLS-2$ // broken vertical bar
badEntities.put("&ordf;", "\u00AA"); //$NON-NLS-1$ //$NON-NLS-2$ // feminine ordinal indicator badEntities.put("&sect;", "\u00A7"); // $NON-NLS-1$ //$NON-NLS-2$ // section sign
badEntities.put("&laquo;", "\u00AB"); //$NON-NLS-1$ //$NON-NLS-2$ // left-pointing double angle quotation mark badEntities.put("&uml;", "\u00A8"); // $NON-NLS-1$ //$NON-NLS-2$ // diaeresis
badEntities.put("&not;", "\u00AC"); //$NON-NLS-1$ //$NON-NLS-2$ // not sign badEntities.put("&copy;", "\u00A9"); // $NON-NLS-1$ //$NON-NLS-2$ // copyright sign
badEntities.put("&shy;", "\u00AD"); //$NON-NLS-1$ //$NON-NLS-2$ // soft hyphen badEntities.put(
badEntities.put("&reg;", "\u00AE"); //$NON-NLS-1$ //$NON-NLS-2$ // registered sign "&ordf;", "\u00AA"); // $NON-NLS-1$ //$NON-NLS-2$ // feminine ordinal indicator
badEntities.put("&macr;", "\u00AF"); //$NON-NLS-1$ //$NON-NLS-2$ // macron badEntities.put(
badEntities.put("&deg;", "\u00B0"); //$NON-NLS-1$ //$NON-NLS-2$ // degree sign "&laquo;",
badEntities.put("&plusmn;", "\u00B1"); //$NON-NLS-1$ //$NON-NLS-2$ // plus-minus sign "\u00AB"); //$NON-NLS-1$ //$NON-NLS-2$ // left-pointing double angle quotation mark
badEntities.put("&sup2;", "\u00B2"); //$NON-NLS-1$ //$NON-NLS-2$ // superscript two badEntities.put("&not;", "\u00AC"); // $NON-NLS-1$ //$NON-NLS-2$ // not sign
badEntities.put("&sup3;", "\u00B3"); //$NON-NLS-1$ //$NON-NLS-2$ // superscript three badEntities.put("&shy;", "\u00AD"); // $NON-NLS-1$ //$NON-NLS-2$ // soft hyphen
badEntities.put("&acute;", "\u00B4"); //$NON-NLS-1$ //$NON-NLS-2$ // acute accent badEntities.put("&reg;", "\u00AE"); // $NON-NLS-1$ //$NON-NLS-2$ // registered sign
badEntities.put("&micro;", "\u00B5"); //$NON-NLS-1$ //$NON-NLS-2$ // micro sign badEntities.put("&macr;", "\u00AF"); // $NON-NLS-1$ //$NON-NLS-2$ // macron
badEntities.put("&para;", "\u00B6"); //$NON-NLS-1$ //$NON-NLS-2$ // pilcrow sign badEntities.put("&deg;", "\u00B0"); // $NON-NLS-1$ //$NON-NLS-2$ // degree sign
badEntities.put("&middot;", "\u00B7"); //$NON-NLS-1$ //$NON-NLS-2$ // middle dot badEntities.put("&plusmn;", "\u00B1"); // $NON-NLS-1$ //$NON-NLS-2$ // plus-minus sign
badEntities.put("&cedil;", "\u00B8"); //$NON-NLS-1$ //$NON-NLS-2$ // cedilla badEntities.put("&sup2;", "\u00B2"); // $NON-NLS-1$ //$NON-NLS-2$ // superscript two
badEntities.put("&sup1;", "\u00B9"); //$NON-NLS-1$ //$NON-NLS-2$ // superscript one badEntities.put("&sup3;", "\u00B3"); // $NON-NLS-1$ //$NON-NLS-2$ // superscript three
badEntities.put("&ordm;", "\u00BA"); //$NON-NLS-1$ //$NON-NLS-2$ // masculine ordinal indicator badEntities.put("&acute;", "\u00B4"); // $NON-NLS-1$ //$NON-NLS-2$ // acute accent
badEntities.put("&raquo;", "\u00BB"); //$NON-NLS-1$ //$NON-NLS-2$ // right-pointing double angle quotation mark badEntities.put("&micro;", "\u00B5"); // $NON-NLS-1$ //$NON-NLS-2$ // micro sign
badEntities.put("&frac14;", "\u00BC"); //$NON-NLS-1$ //$NON-NLS-2$ // vulgar fraction one quarter badEntities.put("&para;", "\u00B6"); // $NON-NLS-1$ //$NON-NLS-2$ // pilcrow sign
badEntities.put("&frac12;", "\u00BD"); //$NON-NLS-1$ //$NON-NLS-2$ // vulgar fraction one half badEntities.put("&middot;", "\u00B7"); // $NON-NLS-1$ //$NON-NLS-2$ // middle dot
badEntities.put("&frac34;", "\u00BE"); //$NON-NLS-1$ //$NON-NLS-2$ // vulgar fraction three quarters badEntities.put("&cedil;", "\u00B8"); // $NON-NLS-1$ //$NON-NLS-2$ // cedilla
badEntities.put("&iquest;", "\u00BF"); //$NON-NLS-1$ //$NON-NLS-2$ // inverted question mark badEntities.put("&sup1;", "\u00B9"); // $NON-NLS-1$ //$NON-NLS-2$ // superscript one
badEntities.put("&Agrave;", "\u00C0"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter A with grave badEntities.put(
badEntities.put("&Aacute;", "\u00C1"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter A with acute "&ordm;", "\u00BA"); // $NON-NLS-1$ //$NON-NLS-2$ // masculine ordinal indicator
badEntities.put("&Acirc;", "\u00C2"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter A with circumflex badEntities.put(
badEntities.put("&Atilde;", "\u00C3"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter A with tilde "&raquo;",
badEntities.put("&Auml;", "\u00C4"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter A with diaeresis "\u00BB"); //$NON-NLS-1$ //$NON-NLS-2$ // right-pointing double angle quotation mark
badEntities.put("&Aring;", "\u00C5"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter A with ring above badEntities.put(
badEntities.put("&AElig;", "\u00C6"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter AE "&frac14;", "\u00BC"); // $NON-NLS-1$ //$NON-NLS-2$ // vulgar fraction one quarter
badEntities.put("&Ccedil;", "\u00C7"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter C with cedilla badEntities.put(
badEntities.put("&Egrave;", "\u00C8"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter E with grave "&frac12;", "\u00BD"); // $NON-NLS-1$ //$NON-NLS-2$ // vulgar fraction one half
badEntities.put("&Eacute;", "\u00C9"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter E with acute badEntities.put(
badEntities.put("&Ecirc;", "\u00CA"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter E with circumflex "&frac34;",
badEntities.put("&Euml;", "\u00CB"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter E with diaeresis "\u00BE"); // $NON-NLS-1$ //$NON-NLS-2$ // vulgar fraction three quarters
badEntities.put("&Igrave;", "\u00CC"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter I with grave badEntities.put(
badEntities.put("&Iacute;", "\u00CD"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter I with acute "&iquest;", "\u00BF"); // $NON-NLS-1$ //$NON-NLS-2$ // inverted question mark
badEntities.put("&Icirc;", "\u00CE"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter I with circumflex badEntities.put(
badEntities.put("&Iuml;", "\u00CF"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter I with diaeresis "&Agrave;",
badEntities.put("&ETH;", "\u00D0"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter ETH "\u00C0"); // $NON-NLS-1$ //$NON-NLS-2$ // latin capital letter A with grave
badEntities.put("&Ntilde;", "\u00D1"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter N with tilde badEntities.put(
badEntities.put("&Ograve;", "\u00D2"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter O with grave "&Aacute;",
badEntities.put("&Oacute;", "\u00D3"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter O with acute "\u00C1"); // $NON-NLS-1$ //$NON-NLS-2$ // latin capital letter A with acute
badEntities.put("&Ocirc;", "\u00D4"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter O with circumflex badEntities.put(
badEntities.put("&Otilde;", "\u00D5"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter O with tilde "&Acirc;",
badEntities.put("&Ouml;", "\u00D6"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter O with diaeresis "\u00C2"); // $NON-NLS-1$ //$NON-NLS-2$ // latin capital letter A with circumflex
badEntities.put("&times;", "\u00D7"); //$NON-NLS-1$ //$NON-NLS-2$ // multiplication sign badEntities.put(
badEntities.put("&Oslash;", "\u00D8"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter O with stroke "&Atilde;",
badEntities.put("&Ugrave;", "\u00D9"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter U with grave "\u00C3"); // $NON-NLS-1$ //$NON-NLS-2$ // latin capital letter A with tilde
badEntities.put("&Uacute;", "\u00DA"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter U with acute badEntities.put(
badEntities.put("&Ucirc;", "\u00DB"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter U with circumflex "&Auml;",
badEntities.put("&Uuml;", "\u00DC"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter U with diaeresis "\u00C4"); // $NON-NLS-1$ //$NON-NLS-2$ // latin capital letter A with diaeresis
badEntities.put("&Yacute;", "\u00DD"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter Y with acute badEntities.put(
badEntities.put("&THORN;", "\u00DE"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter THORN "&Aring;",
badEntities.put("&szlig;", "\u00DF"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter sharp s "\u00C5"); // $NON-NLS-1$ //$NON-NLS-2$ // latin capital letter A with ring above
badEntities.put("&agrave;", "\u00E0"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter a with grave badEntities.put(
badEntities.put("&aacute;", "\u00E1"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter a with acute "&AElig;", "\u00C6"); // $NON-NLS-1$ //$NON-NLS-2$ // latin capital letter AE
badEntities.put("&acirc;", "\u00E2"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter a with circumflex badEntities.put(
badEntities.put("&atilde;", "\u00E3"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter a with tilde "&Ccedil;",
badEntities.put("&auml;", "\u00E4"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter a with diaeresis "\u00C7"); // $NON-NLS-1$ //$NON-NLS-2$ // latin capital letter C with cedilla
badEntities.put("&aring;", "\u00E5"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter a with ring above badEntities.put(
badEntities.put("&aelig;", "\u00E6"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter ae "&Egrave;",
badEntities.put("&ccedil;", "\u00E7"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter c with cedilla "\u00C8"); // $NON-NLS-1$ //$NON-NLS-2$ // latin capital letter E with grave
badEntities.put("&egrave;", "\u00E8"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter e with grave badEntities.put(
badEntities.put("&eacute;", "\u00E9"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter e with acute "&Eacute;",
badEntities.put("&ecirc;", "\u00EA"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter e with circumflex "\u00C9"); // $NON-NLS-1$ //$NON-NLS-2$ // latin capital letter E with acute
badEntities.put("&euml;", "\u00EB"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter e with diaeresis badEntities.put(
badEntities.put("&igrave;", "\u00EC"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter i with grave "&Ecirc;",
badEntities.put("&iacute;", "\u00ED"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter i with acute "\u00CA"); // $NON-NLS-1$ //$NON-NLS-2$ // latin capital letter E with circumflex
badEntities.put("&icirc;", "\u00EE"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter i with circumflex badEntities.put(
badEntities.put("&iuml;", "\u00EF"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter i with diaeresis "&Euml;",
badEntities.put("&eth;", "\u00F0"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter eth "\u00CB"); // $NON-NLS-1$ //$NON-NLS-2$ // latin capital letter E with diaeresis
badEntities.put("&ntilde;", "\u00F1"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter n with tilde badEntities.put(
badEntities.put("&ograve;", "\u00F2"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter o with grave "&Igrave;",
badEntities.put("&oacute;", "\u00F3"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter o with acute "\u00CC"); // $NON-NLS-1$ //$NON-NLS-2$ // latin capital letter I with grave
badEntities.put("&ocirc;", "\u00F4"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter o with circumflex badEntities.put(
badEntities.put("&otilde;", "\u00F5"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter o with tilde "&Iacute;",
badEntities.put("&ouml;", "\u00F6"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter o with diaeresis "\u00CD"); // $NON-NLS-1$ //$NON-NLS-2$ // latin capital letter I with acute
badEntities.put("&divide;", "\u00F7"); //$NON-NLS-1$ //$NON-NLS-2$ // division sign badEntities.put(
badEntities.put("&oslash;", "\u00F8"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter o with stroke "&Icirc;",
badEntities.put("&ugrave;", "\u00F9"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter u with grave "\u00CE"); // $NON-NLS-1$ //$NON-NLS-2$ // latin capital letter I with circumflex
badEntities.put("&uacute;", "\u00FA"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter u with acute badEntities.put(
badEntities.put("&ucirc;", "\u00FB"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter u with circumflex "&Iuml;",
badEntities.put("&uuml;", "\u00FC"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter u with diaeresis "\u00CF"); // $NON-NLS-1$ //$NON-NLS-2$ // latin capital letter I with diaeresis
badEntities.put("&yacute;", "\u00FD"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter y with acute badEntities.put("&ETH;", "\u00D0"); // $NON-NLS-1$ //$NON-NLS-2$ // latin capital letter ETH
badEntities.put("&thorn;", "\u00FE"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter thorn badEntities.put(
badEntities.put("&yuml;", "\u00FF"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter y with diaeresis "&Ntilde;",
} "\u00D1"); // $NON-NLS-1$ //$NON-NLS-2$ // latin capital letter N with tilde
badEntities.put(
"&Ograve;",
"\u00D2"); // $NON-NLS-1$ //$NON-NLS-2$ // latin capital letter O with grave
badEntities.put(
"&Oacute;",
"\u00D3"); // $NON-NLS-1$ //$NON-NLS-2$ // latin capital letter O with acute
badEntities.put(
"&Ocirc;",
"\u00D4"); // $NON-NLS-1$ //$NON-NLS-2$ // latin capital letter O with circumflex
badEntities.put(
"&Otilde;",
"\u00D5"); // $NON-NLS-1$ //$NON-NLS-2$ // latin capital letter O with tilde
badEntities.put(
"&Ouml;",
"\u00D6"); // $NON-NLS-1$ //$NON-NLS-2$ // latin capital letter O with diaeresis
badEntities.put("&times;", "\u00D7"); // $NON-NLS-1$ //$NON-NLS-2$ // multiplication sign
badEntities.put(
"&Oslash;",
"\u00D8"); // $NON-NLS-1$ //$NON-NLS-2$ // latin capital letter O with stroke
badEntities.put(
"&Ugrave;",
"\u00D9"); // $NON-NLS-1$ //$NON-NLS-2$ // latin capital letter U with grave
badEntities.put(
"&Uacute;",
"\u00DA"); // $NON-NLS-1$ //$NON-NLS-2$ // latin capital letter U with acute
badEntities.put(
"&Ucirc;",
"\u00DB"); // $NON-NLS-1$ //$NON-NLS-2$ // latin capital letter U with circumflex
badEntities.put(
"&Uuml;",
"\u00DC"); // $NON-NLS-1$ //$NON-NLS-2$ // latin capital letter U with diaeresis
badEntities.put(
"&Yacute;",
"\u00DD"); // $NON-NLS-1$ //$NON-NLS-2$ // latin capital letter Y with acute
badEntities.put(
"&THORN;", "\u00DE"); // $NON-NLS-1$ //$NON-NLS-2$ // latin capital letter THORN
badEntities.put(
"&szlig;", "\u00DF"); // $NON-NLS-1$ //$NON-NLS-2$ // latin small letter sharp s
badEntities.put(
"&agrave;",
"\u00E0"); // $NON-NLS-1$ //$NON-NLS-2$ // latin small letter a with grave
badEntities.put(
"&aacute;",
"\u00E1"); // $NON-NLS-1$ //$NON-NLS-2$ // latin small letter a with acute
badEntities.put(
"&acirc;",
"\u00E2"); // $NON-NLS-1$ //$NON-NLS-2$ // latin small letter a with circumflex
badEntities.put(
"&atilde;",
"\u00E3"); // $NON-NLS-1$ //$NON-NLS-2$ // latin small letter a with tilde
badEntities.put(
"&auml;",
"\u00E4"); // $NON-NLS-1$ //$NON-NLS-2$ // latin small letter a with diaeresis
badEntities.put(
"&aring;",
"\u00E5"); // $NON-NLS-1$ //$NON-NLS-2$ // latin small letter a with ring above
badEntities.put("&aelig;", "\u00E6"); // $NON-NLS-1$ //$NON-NLS-2$ // latin small letter ae
badEntities.put(
"&ccedil;",
"\u00E7"); // $NON-NLS-1$ //$NON-NLS-2$ // latin small letter c with cedilla
badEntities.put(
"&egrave;",
"\u00E8"); // $NON-NLS-1$ //$NON-NLS-2$ // latin small letter e with grave
badEntities.put(
"&eacute;",
"\u00E9"); // $NON-NLS-1$ //$NON-NLS-2$ // latin small letter e with acute
badEntities.put(
"&ecirc;",
"\u00EA"); // $NON-NLS-1$ //$NON-NLS-2$ // latin small letter e with circumflex
badEntities.put(
"&euml;",
"\u00EB"); // $NON-NLS-1$ //$NON-NLS-2$ // latin small letter e with diaeresis
badEntities.put(
"&igrave;",
"\u00EC"); // $NON-NLS-1$ //$NON-NLS-2$ // latin small letter i with grave
badEntities.put(
"&iacute;",
"\u00ED"); // $NON-NLS-1$ //$NON-NLS-2$ // latin small letter i with acute
badEntities.put(
"&icirc;",
"\u00EE"); // $NON-NLS-1$ //$NON-NLS-2$ // latin small letter i with circumflex
badEntities.put(
"&iuml;",
"\u00EF"); // $NON-NLS-1$ //$NON-NLS-2$ // latin small letter i with diaeresis
badEntities.put("&eth;", "\u00F0"); // $NON-NLS-1$ //$NON-NLS-2$ // latin small letter eth
badEntities.put(
"&ntilde;",
"\u00F1"); // $NON-NLS-1$ //$NON-NLS-2$ // latin small letter n with tilde
badEntities.put(
"&ograve;",
"\u00F2"); // $NON-NLS-1$ //$NON-NLS-2$ // latin small letter o with grave
badEntities.put(
"&oacute;",
"\u00F3"); // $NON-NLS-1$ //$NON-NLS-2$ // latin small letter o with acute
badEntities.put(
"&ocirc;",
"\u00F4"); // $NON-NLS-1$ //$NON-NLS-2$ // latin small letter o with circumflex
badEntities.put(
"&otilde;",
"\u00F5"); // $NON-NLS-1$ //$NON-NLS-2$ // latin small letter o with tilde
badEntities.put(
"&ouml;",
"\u00F6"); // $NON-NLS-1$ //$NON-NLS-2$ // latin small letter o with diaeresis
badEntities.put("&divide;", "\u00F7"); // $NON-NLS-1$ //$NON-NLS-2$ // division sign
badEntities.put(
"&oslash;",
"\u00F8"); // $NON-NLS-1$ //$NON-NLS-2$ // latin small letter o with stroke
badEntities.put(
"&ugrave;",
"\u00F9"); // $NON-NLS-1$ //$NON-NLS-2$ // latin small letter u with grave
badEntities.put(
"&uacute;",
"\u00FA"); // $NON-NLS-1$ //$NON-NLS-2$ // latin small letter u with acute
badEntities.put(
"&ucirc;",
"\u00FB"); // $NON-NLS-1$ //$NON-NLS-2$ // latin small letter u with circumflex
badEntities.put(
"&uuml;",
"\u00FC"); // $NON-NLS-1$ //$NON-NLS-2$ // latin small letter u with diaeresis
badEntities.put(
"&yacute;",
"\u00FD"); // $NON-NLS-1$ //$NON-NLS-2$ // latin small letter y with acute
badEntities.put(
"&thorn;", "\u00FE"); // $NON-NLS-1$ //$NON-NLS-2$ // latin small letter thorn
badEntities.put(
"&yuml;",
"\u00FF"); // $NON-NLS-1$ //$NON-NLS-2$ // latin small letter y with diaeresis
}
/** /**
* For each entity in the input that is not allowed in XML, replace the entity with its unicode equivalent or remove it. For each * For each entity in the input that is not allowed in XML, replace the entity with its unicode
* instance of a bare {@literal &}, replace it with {@literal &amp;<br/> * equivalent or remove it. For each instance of a bare {@literal &}, replace it with {@literal
* } XML only allows 4 entities: {@literal &amp;amp;}, {@literal &amp;quot;}, {@literal &amp;lt;} and {@literal &amp;gt;}. * &amp;<br/> } XML only allows 4 entities: {@literal &amp;amp;}, {@literal &amp;quot;},
* * {@literal &amp;lt;} and {@literal &amp;gt;}.
* @param broken *
* the string to handle entities * @param broken the string to handle entities
* @return the string with entities appropriately fixed up * @return the string with entities appropriately fixed up
*/ */
static public String cleanAllEntities(final String broken) { public static String cleanAllEntities(final String broken) {
if (broken == null) { return null; } if (broken == null) {
return null;
}
String working = invalidControlCharPattern.matcher(broken).replaceAll(""); String working = invalidControlCharPattern.matcher(broken).replaceAll("");
working = invalidCharacterPattern.matcher(working).replaceAll(""); working = invalidCharacterPattern.matcher(working).replaceAll("");
int cleanfrom = 0; int cleanfrom = 0;
while (true) { while (true) {
int amp = working.indexOf('&', cleanfrom); int amp = working.indexOf('&', cleanfrom);
// If there are no more amps then we are done // If there are no more amps then we are done
if (amp == -1) { if (amp == -1) {
break; break;
} }
// Skip references of the kind &#ddd; // Skip references of the kind &#ddd;
if (validCharacterEntityPattern.matcher(working.substring(amp)).find()) { if (validCharacterEntityPattern.matcher(working.substring(amp)).find()) {
cleanfrom = working.indexOf(';', amp) + 1; cleanfrom = working.indexOf(';', amp) + 1;
continue; continue;
} }
int i = amp + 1; int i = amp + 1;
while (true) { while (true) {
// if we are at the end of the string then just escape the '&'; // if we are at the end of the string then just escape the '&';
if (i >= working.length()) { return working.substring(0, amp) + "&amp;" + working.substring(amp + 1); //$NON-NLS-1$ if (i >= working.length()) {
} return working.substring(0, amp)
// if we have come to a ; then we have an entity + "&amp;"
// If it is something that xml can't handle then replace it. + working.substring(amp + 1); // $NON-NLS-1$
final char c = working.charAt(i); }
if (c == ';') { // if we have come to a ; then we have an entity
final String entity = working.substring(amp, i + 1); // If it is something that xml can't handle then replace it.
final String replace = handleEntity(entity); final char c = working.charAt(i);
working = working.substring(0, amp) + replace + working.substring(i + 1); if (c == ';') {
break; final String entity = working.substring(amp, i + 1);
} final String replace = handleEntity(entity);
// Did we end an entity without finding a closing ; working = working.substring(0, amp) + replace + working.substring(i + 1);
// Then treat it as an '&' that needs to be replaced with &amp; break;
if (!Character.isLetterOrDigit(c)) { }
working = working.substring(0, amp) + "&amp;" + working.substring(amp + 1); //$NON-NLS-1$ // Did we end an entity without finding a closing ;
amp = i + 4; // account for the 4 extra characters // Then treat it as an '&' that needs to be replaced with &amp;
break; if (!Character.isLetterOrDigit(c)) {
} working =
i++; working.substring(0, amp)
} + "&amp;"
cleanfrom = amp + 1; + working.substring(amp + 1); // $NON-NLS-1$
} amp = i + 4; // account for the 4 extra characters
break;
}
i++;
}
cleanfrom = amp + 1;
}
if (Pattern.compile("<<").matcher(working).find()) { if (Pattern.compile("<<").matcher(working).find()) {
working = working.replaceAll("<<", "&lt;&lt;"); working = working.replaceAll("<<", "&lt;&lt;");
} }
if (Pattern.compile(">>").matcher(working).find()) { if (Pattern.compile(">>").matcher(working).find()) {
working = working.replaceAll(">>", "&gt;&gt;"); working = working.replaceAll(">>", "&gt;&gt;");
} }
return working; return working;
} }
/** /**
* Replace entity with its unicode equivalent, if it is not a valid XML entity. Otherwise strip it out. XML only allows 4 entities: * Replace entity with its unicode equivalent, if it is not a valid XML entity. Otherwise strip
* &amp;amp;, &amp;quot;, &amp;lt; and &amp;gt;. * it out. XML only allows 4 entities: &amp;amp;, &amp;quot;, &amp;lt; and &amp;gt;.
* *
* @param entity * @param entity the entity to be replaced
* the entity to be replaced * @return the substitution for the entity, either itself, the unicode equivalent or an empty
* @return the substitution for the entity, either itself, the unicode equivalent or an empty string. * string.
*/ */
private static String handleEntity(final String entity) { private static String handleEntity(final String entity) {
if (goodEntities.contains(entity)) { return entity; } if (goodEntities.contains(entity)) {
return entity;
}
final String replace = badEntities.get(entity); final String replace = badEntities.get(entity);
if (replace != null) { return replace; } if (replace != null) {
return replace;
}
return replace != null ? replace : ""; return replace != null ? replace : "";
} }
} }

View File

@ -1,7 +1,6 @@
package eu.dnetlib.dhp.migration.actions; package eu.dnetlib.dhp.migration.actions;
import eu.dnetlib.data.proto.FieldTypeProtos.Qualifier; import eu.dnetlib.data.proto.FieldTypeProtos.Qualifier;
import java.util.Comparator; import java.util.Comparator;
public class LicenseComparator implements Comparator<Qualifier> { public class LicenseComparator implements Comparator<Qualifier> {
@ -45,5 +44,4 @@ public class LicenseComparator implements Comparator<Qualifier> {
// Else (but unlikely), lexicographical ordering will do. // Else (but unlikely), lexicographical ordering will do.
return lClass.compareTo(rClass); return lClass.compareTo(rClass);
} }
} }

View File

@ -6,6 +6,11 @@ import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.utils.ISLookupClientFactory; import eu.dnetlib.dhp.utils.ISLookupClientFactory;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
import java.io.File;
import java.io.FileOutputStream;
import java.io.OutputStream;
import java.util.*;
import java.util.stream.Collectors;
import org.apache.commons.io.IOUtils; import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
import org.apache.commons.logging.Log; import org.apache.commons.logging.Log;
@ -17,12 +22,6 @@ import org.apache.hadoop.tools.DistCp;
import org.apache.hadoop.tools.DistCpOptions; import org.apache.hadoop.tools.DistCpOptions;
import org.apache.hadoop.util.ToolRunner; import org.apache.hadoop.util.ToolRunner;
import java.io.File;
import java.io.FileOutputStream;
import java.io.OutputStream;
import java.util.*;
import java.util.stream.Collectors;
public class MigrateActionSet { public class MigrateActionSet {
private static final Log log = LogFactory.getLog(MigrateActionSet.class); private static final Log log = LogFactory.getLog(MigrateActionSet.class);
@ -34,9 +33,11 @@ public class MigrateActionSet {
private static Boolean DEFAULT_TRANSFORM_ONLY = false; private static Boolean DEFAULT_TRANSFORM_ONLY = false;
public static void main(String[] args) throws Exception { public static void main(String[] args) throws Exception {
final ArgumentApplicationParser parser = new ArgumentApplicationParser( final ArgumentApplicationParser parser =
IOUtils.toString(MigrateActionSet.class.getResourceAsStream( new ArgumentApplicationParser(
"/eu/dnetlib/dhp/migration/migrate_actionsets_parameters.json"))); IOUtils.toString(
MigrateActionSet.class.getResourceAsStream(
"/eu/dnetlib/dhp/migration/migrate_actionsets_parameters.json")));
parser.parseArgument(args); parser.parseArgument(args);
new MigrateActionSet().run(parser); new MigrateActionSet().run(parser);
@ -47,7 +48,7 @@ public class MigrateActionSet {
final String isLookupUrl = parser.get("isLookupUrl"); final String isLookupUrl = parser.get("isLookupUrl");
final String sourceNN = parser.get("sourceNameNode"); final String sourceNN = parser.get("sourceNameNode");
final String targetNN = parser.get("targetNameNode"); final String targetNN = parser.get("targetNameNode");
final String workDir = parser.get("workingDirectory"); final String workDir = parser.get("workingDirectory");
final Integer distcp_num_maps = Integer.parseInt(parser.get("distcp_num_maps")); final Integer distcp_num_maps = Integer.parseInt(parser.get("distcp_num_maps"));
final String distcp_memory_mb = parser.get("distcp_memory_mb"); final String distcp_memory_mb = parser.get("distcp_memory_mb");
@ -63,10 +64,12 @@ public class MigrateActionSet {
ISLookUpService isLookUp = ISLookupClientFactory.getLookUpService(isLookupUrl); ISLookUpService isLookUp = ISLookupClientFactory.getLookUpService(isLookupUrl);
Configuration conf = getConfiguration(distcp_task_timeout, distcp_memory_mb, distcp_num_maps); Configuration conf =
getConfiguration(distcp_task_timeout, distcp_memory_mb, distcp_num_maps);
FileSystem targetFS = FileSystem.get(conf); FileSystem targetFS = FileSystem.get(conf);
Configuration sourceConf = getConfiguration(distcp_task_timeout, distcp_memory_mb, distcp_num_maps); Configuration sourceConf =
getConfiguration(distcp_task_timeout, distcp_memory_mb, distcp_num_maps);
sourceConf.set(FileSystem.FS_DEFAULT_NAME_KEY, sourceNN); sourceConf.set(FileSystem.FS_DEFAULT_NAME_KEY, sourceNN);
FileSystem sourceFS = FileSystem.get(sourceConf); FileSystem sourceFS = FileSystem.get(sourceConf);
@ -75,14 +78,20 @@ public class MigrateActionSet {
List<Path> targetPaths = new ArrayList<>(); List<Path> targetPaths = new ArrayList<>();
final List<Path> sourcePaths = getSourcePaths(sourceNN, isLookUp); final List<Path> sourcePaths = getSourcePaths(sourceNN, isLookUp);
log.info(String.format("paths to process:\n%s", sourcePaths.stream().map(p -> p.toString()).collect(Collectors.joining("\n")))); log.info(
for(Path source : sourcePaths) { String.format(
"paths to process:\n%s",
sourcePaths.stream()
.map(p -> p.toString())
.collect(Collectors.joining("\n"))));
for (Path source : sourcePaths) {
if (!sourceFS.exists(source)) { if (!sourceFS.exists(source)) {
log.warn(String.format("skipping unexisting path: %s", source)); log.warn(String.format("skipping unexisting path: %s", source));
} else { } else {
LinkedList<String> pathQ = Lists.newLinkedList(Splitter.on(SEPARATOR).split(source.toUri().getPath())); LinkedList<String> pathQ =
Lists.newLinkedList(Splitter.on(SEPARATOR).split(source.toUri().getPath()));
final String rawSet = pathQ.pollLast(); final String rawSet = pathQ.pollLast();
log.info(String.format("got RAWSET: %s", rawSet)); log.info(String.format("got RAWSET: %s", rawSet));
@ -91,7 +100,14 @@ public class MigrateActionSet {
final String actionSetDirectory = pathQ.pollLast(); final String actionSetDirectory = pathQ.pollLast();
final Path targetPath = new Path(targetNN + workDir + SEPARATOR + actionSetDirectory + SEPARATOR + rawSet); final Path targetPath =
new Path(
targetNN
+ workDir
+ SEPARATOR
+ actionSetDirectory
+ SEPARATOR
+ rawSet);
log.info(String.format("using TARGET PATH: %s", targetPath)); log.info(String.format("using TARGET PATH: %s", targetPath));
@ -99,7 +115,13 @@ public class MigrateActionSet {
if (targetFS.exists(targetPath)) { if (targetFS.exists(targetPath)) {
targetFS.delete(targetPath, true); targetFS.delete(targetPath, true);
} }
runDistcp(distcp_num_maps, distcp_memory_mb, distcp_task_timeout, conf, source, targetPath); runDistcp(
distcp_num_maps,
distcp_memory_mb,
distcp_task_timeout,
conf,
source,
targetPath);
} }
targetPaths.add(targetPath); targetPaths.add(targetPath);
@ -107,19 +129,25 @@ public class MigrateActionSet {
} }
} }
props.setProperty(TARGET_PATHS, targetPaths props.setProperty(
.stream() TARGET_PATHS,
.map(p -> p.toString()) targetPaths.stream().map(p -> p.toString()).collect(Collectors.joining(",")));
.collect(Collectors.joining(",")));
File file = new File(System.getProperty("oozie.action.output.properties")); File file = new File(System.getProperty("oozie.action.output.properties"));
try(OutputStream os = new FileOutputStream(file)) { try (OutputStream os = new FileOutputStream(file)) {
props.store(os, ""); props.store(os, "");
} }
System.out.println(file.getAbsolutePath()); System.out.println(file.getAbsolutePath());
} }
private void runDistcp(Integer distcp_num_maps, String distcp_memory_mb, String distcp_task_timeout, Configuration conf, Path source, Path targetPath) throws Exception { private void runDistcp(
Integer distcp_num_maps,
String distcp_memory_mb,
String distcp_task_timeout,
Configuration conf,
Path source,
Path targetPath)
throws Exception {
final DistCpOptions op = new DistCpOptions(source, targetPath); final DistCpOptions op = new DistCpOptions(source, targetPath);
op.setMaxMaps(distcp_num_maps); op.setMaxMaps(distcp_num_maps);
@ -127,20 +155,25 @@ public class MigrateActionSet {
op.preserve(DistCpOptions.FileAttribute.REPLICATION); op.preserve(DistCpOptions.FileAttribute.REPLICATION);
op.preserve(DistCpOptions.FileAttribute.CHECKSUMTYPE); op.preserve(DistCpOptions.FileAttribute.CHECKSUMTYPE);
int res = ToolRunner.run(new DistCp(conf, op), new String[]{ int res =
"-Dmapred.task.timeout=" + distcp_task_timeout, ToolRunner.run(
"-Dmapreduce.map.memory.mb=" + distcp_memory_mb, new DistCp(conf, op),
"-pb", new String[] {
"-m " + distcp_num_maps, "-Dmapred.task.timeout=" + distcp_task_timeout,
source.toString(), "-Dmapreduce.map.memory.mb=" + distcp_memory_mb,
targetPath.toString()}); "-pb",
"-m " + distcp_num_maps,
source.toString(),
targetPath.toString()
});
if (res != 0) { if (res != 0) {
throw new RuntimeException(String.format("distcp exited with code %s", res)); throw new RuntimeException(String.format("distcp exited with code %s", res));
} }
} }
private Configuration getConfiguration(String distcp_task_timeout, String distcp_memory_mb, Integer distcp_num_maps) { private Configuration getConfiguration(
String distcp_task_timeout, String distcp_memory_mb, Integer distcp_num_maps) {
final Configuration conf = new Configuration(); final Configuration conf = new Configuration();
conf.set("dfs.webhdfs.socket.connect-timeout", distcp_task_timeout); conf.set("dfs.webhdfs.socket.connect-timeout", distcp_task_timeout);
conf.set("dfs.webhdfs.socket.read-timeout", distcp_task_timeout); conf.set("dfs.webhdfs.socket.read-timeout", distcp_task_timeout);
@ -151,20 +184,20 @@ public class MigrateActionSet {
return conf; return conf;
} }
private List<Path> getSourcePaths(String sourceNN, ISLookUpService isLookUp) throws ISLookUpException { private List<Path> getSourcePaths(String sourceNN, ISLookUpService isLookUp)
String XQUERY = "distinct-values(\n" + throws ISLookUpException {
"let $basePath := collection('/db/DRIVER/ServiceResources/ActionManagerServiceResourceType')//SERVICE_PROPERTIES/PROPERTY[@key = 'basePath']/@value/string()\n" + String XQUERY =
"for $x in collection('/db/DRIVER/ActionManagerSetDSResources/ActionManagerSetDSResourceType') \n" + "distinct-values(\n"
"let $setDir := $x//SET/@directory/string()\n" + + "let $basePath := collection('/db/DRIVER/ServiceResources/ActionManagerServiceResourceType')//SERVICE_PROPERTIES/PROPERTY[@key = 'basePath']/@value/string()\n"
"let $rawSet := $x//RAW_SETS/LATEST/@id/string()\n" + + "for $x in collection('/db/DRIVER/ActionManagerSetDSResources/ActionManagerSetDSResourceType') \n"
"return concat($basePath, '/', $setDir, '/', $rawSet))"; + "let $setDir := $x//SET/@directory/string()\n"
+ "let $rawSet := $x//RAW_SETS/LATEST/@id/string()\n"
+ "return concat($basePath, '/', $setDir, '/', $rawSet))";
log.info(String.format("running xquery:\n%s", XQUERY)); log.info(String.format("running xquery:\n%s", XQUERY));
return isLookUp.quickSearchProfile(XQUERY) return isLookUp.quickSearchProfile(XQUERY).stream()
.stream()
.map(p -> sourceNN + p) .map(p -> sourceNN + p)
.map(Path::new) .map(Path::new)
.collect(Collectors.toList()); .collect(Collectors.toList());
} }
} }

View File

@ -4,12 +4,11 @@ import com.google.common.collect.Lists;
import com.googlecode.protobuf.format.JsonFormat; import com.googlecode.protobuf.format.JsonFormat;
import eu.dnetlib.data.proto.*; import eu.dnetlib.data.proto.*;
import eu.dnetlib.dhp.schema.oaf.*; import eu.dnetlib.dhp.schema.oaf.*;
import org.apache.commons.lang3.StringUtils;
import java.io.Serializable; import java.io.Serializable;
import java.util.List; import java.util.List;
import java.util.Optional; import java.util.Optional;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import org.apache.commons.lang3.StringUtils;
public class ProtoConverter implements Serializable { public class ProtoConverter implements Serializable {
@ -42,10 +41,12 @@ public class ProtoConverter implements Serializable {
rel.setRelType(r.getRelType().toString()); rel.setRelType(r.getRelType().toString());
rel.setSubRelType(r.getSubRelType().toString()); rel.setSubRelType(r.getSubRelType().toString());
rel.setRelClass(r.getRelClass()); rel.setRelClass(r.getRelClass());
rel.setCollectedFrom(r.getCollectedfromCount() > 0 ? rel.setCollectedfrom(
r.getCollectedfromList().stream() r.getCollectedfromCount() > 0
.map(kv -> mapKV(kv)) ? r.getCollectedfromList().stream()
.collect(Collectors.toList()) : null); .map(kv -> mapKV(kv))
.collect(Collectors.toList())
: null);
return rel; return rel;
} }
@ -71,8 +72,7 @@ public class ProtoConverter implements Serializable {
final ResultProtos.Result r = oaf.getEntity().getResult(); final ResultProtos.Result r = oaf.getEntity().getResult();
if (r.getInstanceCount() > 0) { if (r.getInstanceCount() > 0) {
return r.getInstanceList() return r.getInstanceList().stream()
.stream()
.map(i -> convertInstance(i)) .map(i -> convertInstance(i))
.collect(Collectors.toList()); .collect(Collectors.toList());
} }
@ -96,15 +96,16 @@ public class ProtoConverter implements Serializable {
} }
private static Organization convertOrganization(OafProtos.Oaf oaf) { private static Organization convertOrganization(OafProtos.Oaf oaf) {
final OrganizationProtos.Organization.Metadata m = oaf.getEntity().getOrganization().getMetadata(); final OrganizationProtos.Organization.Metadata m =
oaf.getEntity().getOrganization().getMetadata();
final Organization org = setOaf(new Organization(), oaf); final Organization org = setOaf(new Organization(), oaf);
setEntity(org, oaf); setEntity(org, oaf);
org.setLegalshortname(mapStringField(m.getLegalshortname())); org.setLegalshortname(mapStringField(m.getLegalshortname()));
org.setLegalname(mapStringField(m.getLegalname())); org.setLegalname(mapStringField(m.getLegalname()));
org.setAlternativeNames(m.getAlternativeNamesList(). org.setAlternativeNames(
stream() m.getAlternativeNamesList().stream()
.map(ProtoConverter::mapStringField) .map(ProtoConverter::mapStringField)
.collect(Collectors.toList())); .collect(Collectors.toList()));
org.setWebsiteurl(mapStringField(m.getWebsiteurl())); org.setWebsiteurl(mapStringField(m.getWebsiteurl()));
org.setLogourl(mapStringField(m.getLogourl())); org.setLogourl(mapStringField(m.getLogourl()));
org.setEclegalbody(mapStringField(m.getEclegalbody())); org.setEclegalbody(mapStringField(m.getEclegalbody()));
@ -112,7 +113,8 @@ public class ProtoConverter implements Serializable {
org.setEcnonprofit(mapStringField(m.getEcnonprofit())); org.setEcnonprofit(mapStringField(m.getEcnonprofit()));
org.setEcresearchorganization(mapStringField(m.getEcresearchorganization())); org.setEcresearchorganization(mapStringField(m.getEcresearchorganization()));
org.setEchighereducation(mapStringField(m.getEchighereducation())); org.setEchighereducation(mapStringField(m.getEchighereducation()));
org.setEcinternationalorganizationeurinterests(mapStringField(m.getEcinternationalorganizationeurinterests())); org.setEcinternationalorganizationeurinterests(
mapStringField(m.getEcinternationalorganizationeurinterests()));
org.setEcinternationalorganization(mapStringField(m.getEcinternationalorganization())); org.setEcinternationalorganization(mapStringField(m.getEcinternationalorganization()));
org.setEcenterprise(mapStringField(m.getEcenterprise())); org.setEcenterprise(mapStringField(m.getEcenterprise()));
org.setEcsmevalidated(mapStringField(m.getEcsmevalidated())); org.setEcsmevalidated(mapStringField(m.getEcsmevalidated()));
@ -123,13 +125,14 @@ public class ProtoConverter implements Serializable {
} }
private static Datasource convertDataSource(OafProtos.Oaf oaf) { private static Datasource convertDataSource(OafProtos.Oaf oaf) {
final DatasourceProtos.Datasource.Metadata m = oaf.getEntity().getDatasource().getMetadata(); final DatasourceProtos.Datasource.Metadata m =
oaf.getEntity().getDatasource().getMetadata();
final Datasource datasource = setOaf(new Datasource(), oaf); final Datasource datasource = setOaf(new Datasource(), oaf);
setEntity(datasource, oaf); setEntity(datasource, oaf);
datasource.setAccessinfopackage(m.getAccessinfopackageList() datasource.setAccessinfopackage(
.stream() m.getAccessinfopackageList().stream()
.map(ProtoConverter::mapStringField) .map(ProtoConverter::mapStringField)
.collect(Collectors.toList())); .collect(Collectors.toList()));
datasource.setCertificates(mapStringField(m.getCertificates())); datasource.setCertificates(mapStringField(m.getCertificates()));
datasource.setCitationguidelineurl(mapStringField(m.getCitationguidelineurl())); datasource.setCitationguidelineurl(mapStringField(m.getCitationguidelineurl()));
datasource.setContactemail(mapStringField(m.getContactemail())); datasource.setContactemail(mapStringField(m.getContactemail()));
@ -148,37 +151,36 @@ public class ProtoConverter implements Serializable {
datasource.setLogourl(mapStringField(m.getLogourl())); datasource.setLogourl(mapStringField(m.getLogourl()));
datasource.setMissionstatementurl(mapStringField(m.getMissionstatementurl())); datasource.setMissionstatementurl(mapStringField(m.getMissionstatementurl()));
datasource.setNamespaceprefix(mapStringField(m.getNamespaceprefix())); datasource.setNamespaceprefix(mapStringField(m.getNamespaceprefix()));
datasource.setOdcontenttypes(m.getOdcontenttypesList() datasource.setOdcontenttypes(
.stream() m.getOdcontenttypesList().stream()
.map(ProtoConverter::mapStringField) .map(ProtoConverter::mapStringField)
.collect(Collectors.toList())); .collect(Collectors.toList()));
datasource.setOdlanguages(m.getOdlanguagesList() datasource.setOdlanguages(
.stream() m.getOdlanguagesList().stream()
.map(ProtoConverter::mapStringField) .map(ProtoConverter::mapStringField)
.collect(Collectors.toList())); .collect(Collectors.toList()));
datasource.setOdnumberofitems(mapStringField(m.getOdnumberofitems())); datasource.setOdnumberofitems(mapStringField(m.getOdnumberofitems()));
datasource.setOdnumberofitemsdate(mapStringField(m.getOdnumberofitemsdate())); datasource.setOdnumberofitemsdate(mapStringField(m.getOdnumberofitemsdate()));
datasource.setOdpolicies(mapStringField(m.getOdpolicies())); datasource.setOdpolicies(mapStringField(m.getOdpolicies()));
datasource.setOfficialname(mapStringField(m.getOfficialname())); datasource.setOfficialname(mapStringField(m.getOfficialname()));
datasource.setOpenairecompatibility(mapQualifier(m.getOpenairecompatibility())); datasource.setOpenairecompatibility(mapQualifier(m.getOpenairecompatibility()));
datasource.setPidsystems(mapStringField(m.getPidsystems())); datasource.setPidsystems(mapStringField(m.getPidsystems()));
datasource.setPolicies(m.getPoliciesList() datasource.setPolicies(
.stream() m.getPoliciesList().stream()
.map(ProtoConverter::mapKV) .map(ProtoConverter::mapKV)
.collect(Collectors.toList())); .collect(Collectors.toList()));
datasource.setQualitymanagementkind(mapStringField(m.getQualitymanagementkind())); datasource.setQualitymanagementkind(mapStringField(m.getQualitymanagementkind()));
datasource.setReleaseenddate(mapStringField(m.getReleaseenddate())); datasource.setReleaseenddate(mapStringField(m.getReleaseenddate()));
datasource.setServiceprovider(mapBoolField(m.getServiceprovider())); datasource.setServiceprovider(mapBoolField(m.getServiceprovider()));
datasource.setReleasestartdate(mapStringField(m.getReleasestartdate())); datasource.setReleasestartdate(mapStringField(m.getReleasestartdate()));
datasource.setSubjects(m.getSubjectsList() datasource.setSubjects(
.stream() m.getSubjectsList().stream()
.map(ProtoConverter::mapStructuredProperty) .map(ProtoConverter::mapStructuredProperty)
.collect(Collectors.toList())); .collect(Collectors.toList()));
datasource.setVersioning(mapBoolField(m.getVersioning())); datasource.setVersioning(mapBoolField(m.getVersioning()));
datasource.setWebsiteurl(mapStringField(m.getWebsiteurl())); datasource.setWebsiteurl(mapStringField(m.getWebsiteurl()));
datasource.setJournal(mapJournal(m.getJournal())); datasource.setJournal(mapJournal(m.getJournal()));
return datasource; return datasource;
} }
@ -204,14 +206,16 @@ public class ProtoConverter implements Serializable {
project.setFundedamount(m.getFundedamount()); project.setFundedamount(m.getFundedamount());
project.setTotalcost(m.getTotalcost()); project.setTotalcost(m.getTotalcost());
project.setKeywords(mapStringField(m.getKeywords())); project.setKeywords(mapStringField(m.getKeywords()));
project.setSubjects(m.getSubjectsList().stream() project.setSubjects(
.map(sp -> mapStructuredProperty(sp)) m.getSubjectsList().stream()
.collect(Collectors.toList())); .map(sp -> mapStructuredProperty(sp))
.collect(Collectors.toList()));
project.setTitle(mapStringField(m.getTitle())); project.setTitle(mapStringField(m.getTitle()));
project.setWebsiteurl(mapStringField(m.getWebsiteurl())); project.setWebsiteurl(mapStringField(m.getWebsiteurl()));
project.setFundingtree(m.getFundingtreeList().stream() project.setFundingtree(
.map(f -> mapStringField(f)) m.getFundingtreeList().stream()
.collect(Collectors.toList())); .map(f -> mapStringField(f))
.collect(Collectors.toList()));
project.setJsonextrainfo(mapStringField(m.getJsonextrainfo())); project.setJsonextrainfo(mapStringField(m.getJsonextrainfo()));
project.setSummary(mapStringField(m.getSummary())); project.setSummary(mapStringField(m.getSummary()));
project.setOptional1(mapStringField(m.getOptional1())); project.setOptional1(mapStringField(m.getOptional1()));
@ -242,14 +246,14 @@ public class ProtoConverter implements Serializable {
setEntity(software, oaf); setEntity(software, oaf);
setResult(software, oaf); setResult(software, oaf);
software.setDocumentationUrl(m.getDocumentationUrlList() software.setDocumentationUrl(
.stream() m.getDocumentationUrlList().stream()
.map(ProtoConverter::mapStringField) .map(ProtoConverter::mapStringField)
.collect(Collectors.toList())); .collect(Collectors.toList()));
software.setLicense(m.getLicenseList() software.setLicense(
.stream() m.getLicenseList().stream()
.map(ProtoConverter::mapStructuredProperty) .map(ProtoConverter::mapStructuredProperty)
.collect(Collectors.toList())); .collect(Collectors.toList()));
software.setCodeRepositoryUrl(mapStringField(m.getCodeRepositoryUrl())); software.setCodeRepositoryUrl(mapStringField(m.getCodeRepositoryUrl()));
software.setProgrammingLanguage(mapQualifier(m.getProgrammingLanguage())); software.setProgrammingLanguage(mapQualifier(m.getProgrammingLanguage()));
return software; return software;
@ -260,18 +264,18 @@ public class ProtoConverter implements Serializable {
OtherResearchProduct otherResearchProducts = setOaf(new OtherResearchProduct(), oaf); OtherResearchProduct otherResearchProducts = setOaf(new OtherResearchProduct(), oaf);
setEntity(otherResearchProducts, oaf); setEntity(otherResearchProducts, oaf);
setResult(otherResearchProducts, oaf); setResult(otherResearchProducts, oaf);
otherResearchProducts.setContactperson(m.getContactpersonList() otherResearchProducts.setContactperson(
.stream() m.getContactpersonList().stream()
.map(ProtoConverter::mapStringField) .map(ProtoConverter::mapStringField)
.collect(Collectors.toList())); .collect(Collectors.toList()));
otherResearchProducts.setContactgroup(m.getContactgroupList() otherResearchProducts.setContactgroup(
.stream() m.getContactgroupList().stream()
.map(ProtoConverter::mapStringField) .map(ProtoConverter::mapStringField)
.collect(Collectors.toList())); .collect(Collectors.toList()));
otherResearchProducts.setTool(m.getToolList() otherResearchProducts.setTool(
.stream() m.getToolList().stream()
.map(ProtoConverter::mapStringField) .map(ProtoConverter::mapStringField)
.collect(Collectors.toList())); .collect(Collectors.toList()));
return otherResearchProducts; return otherResearchProducts;
} }
@ -298,12 +302,11 @@ public class ProtoConverter implements Serializable {
dataset.setVersion(mapStringField(m.getVersion())); dataset.setVersion(mapStringField(m.getVersion()));
dataset.setLastmetadataupdate(mapStringField(m.getLastmetadataupdate())); dataset.setLastmetadataupdate(mapStringField(m.getLastmetadataupdate()));
dataset.setMetadataversionnumber(mapStringField(m.getMetadataversionnumber())); dataset.setMetadataversionnumber(mapStringField(m.getMetadataversionnumber()));
dataset.setGeolocation(m.getGeolocationList() dataset.setGeolocation(
.stream() m.getGeolocationList().stream()
.map(ProtoConverter::mapGeolocation) .map(ProtoConverter::mapGeolocation)
.collect(Collectors.toList())); .collect(Collectors.toList()));
return dataset; return dataset;
} }
public static <T extends Oaf> T setOaf(T oaf, OafProtos.Oaf o) { public static <T extends Oaf> T setOaf(T oaf, OafProtos.Oaf o) {
@ -313,100 +316,103 @@ public class ProtoConverter implements Serializable {
} }
public static <T extends OafEntity> T setEntity(T entity, OafProtos.Oaf oaf) { public static <T extends OafEntity> T setEntity(T entity, OafProtos.Oaf oaf) {
//setting Entity fields // setting Entity fields
final OafProtos.OafEntity e = oaf.getEntity(); final OafProtos.OafEntity e = oaf.getEntity();
entity.setId(e.getId()); entity.setId(e.getId());
entity.setOriginalId(e.getOriginalIdList()); entity.setOriginalId(e.getOriginalIdList());
entity.setCollectedfrom(e.getCollectedfromList() entity.setCollectedfrom(
.stream() e.getCollectedfromList().stream()
.map(ProtoConverter::mapKV) .map(ProtoConverter::mapKV)
.collect(Collectors.toList())); .collect(Collectors.toList()));
entity.setPid(e.getPidList().stream() entity.setPid(
.map(ProtoConverter::mapStructuredProperty) e.getPidList().stream()
.collect(Collectors.toList())); .map(ProtoConverter::mapStructuredProperty)
.collect(Collectors.toList()));
entity.setDateofcollection(e.getDateofcollection()); entity.setDateofcollection(e.getDateofcollection());
entity.setDateoftransformation(e.getDateoftransformation()); entity.setDateoftransformation(e.getDateoftransformation());
entity.setExtraInfo(e.getExtraInfoList() entity.setExtraInfo(
.stream() e.getExtraInfoList().stream()
.map(ProtoConverter::mapExtraInfo) .map(ProtoConverter::mapExtraInfo)
.collect(Collectors.toList())); .collect(Collectors.toList()));
return entity; return entity;
} }
public static <T extends Result> T setResult(T entity, OafProtos.Oaf oaf) { public static <T extends Result> T setResult(T entity, OafProtos.Oaf oaf) {
//setting Entity fields // setting Entity fields
final ResultProtos.Result.Metadata m = oaf.getEntity().getResult().getMetadata(); final ResultProtos.Result.Metadata m = oaf.getEntity().getResult().getMetadata();
entity.setAuthor(m.getAuthorList() entity.setAuthor(
.stream() m.getAuthorList().stream()
.map(ProtoConverter::mapAuthor) .map(ProtoConverter::mapAuthor)
.collect(Collectors.toList())); .collect(Collectors.toList()));
entity.setResulttype(mapQualifier(m.getResulttype())); entity.setResulttype(mapQualifier(m.getResulttype()));
entity.setLanguage(mapQualifier(m.getLanguage())); entity.setLanguage(mapQualifier(m.getLanguage()));
entity.setCountry(m.getCountryList() entity.setCountry(
.stream() m.getCountryList().stream()
.map(ProtoConverter::mapQualifierAsCountry) .map(ProtoConverter::mapQualifierAsCountry)
.collect(Collectors.toList())); .collect(Collectors.toList()));
entity.setSubject(m.getSubjectList() entity.setSubject(
.stream() m.getSubjectList().stream()
.map(ProtoConverter::mapStructuredProperty) .map(ProtoConverter::mapStructuredProperty)
.collect(Collectors.toList())); .collect(Collectors.toList()));
entity.setTitle(m.getTitleList() entity.setTitle(
.stream() m.getTitleList().stream()
.map(ProtoConverter::mapStructuredProperty) .map(ProtoConverter::mapStructuredProperty)
.collect(Collectors.toList())); .collect(Collectors.toList()));
entity.setRelevantdate(m.getRelevantdateList() entity.setRelevantdate(
.stream() m.getRelevantdateList().stream()
.map(ProtoConverter::mapStructuredProperty) .map(ProtoConverter::mapStructuredProperty)
.collect(Collectors.toList())); .collect(Collectors.toList()));
entity.setDescription(m.getDescriptionList() entity.setDescription(
.stream() m.getDescriptionList().stream()
.map(ProtoConverter::mapStringField) .map(ProtoConverter::mapStringField)
.collect(Collectors.toList())); .collect(Collectors.toList()));
entity.setDateofacceptance(mapStringField(m.getDateofacceptance())); entity.setDateofacceptance(mapStringField(m.getDateofacceptance()));
entity.setPublisher(mapStringField(m.getPublisher())); entity.setPublisher(mapStringField(m.getPublisher()));
entity.setEmbargoenddate(mapStringField(m.getEmbargoenddate())); entity.setEmbargoenddate(mapStringField(m.getEmbargoenddate()));
entity.setSource(m.getSourceList() entity.setSource(
.stream() m.getSourceList().stream()
.map(ProtoConverter::mapStringField) .map(ProtoConverter::mapStringField)
.collect(Collectors.toList())); .collect(Collectors.toList()));
entity.setFulltext(m.getFulltextList() entity.setFulltext(
.stream() m.getFulltextList().stream()
.map(ProtoConverter::mapStringField) .map(ProtoConverter::mapStringField)
.collect(Collectors.toList())); .collect(Collectors.toList()));
entity.setFormat(m.getFormatList() entity.setFormat(
.stream() m.getFormatList().stream()
.map(ProtoConverter::mapStringField) .map(ProtoConverter::mapStringField)
.collect(Collectors.toList())); .collect(Collectors.toList()));
entity.setContributor(m.getContributorList() entity.setContributor(
.stream() m.getContributorList().stream()
.map(ProtoConverter::mapStringField) .map(ProtoConverter::mapStringField)
.collect(Collectors.toList())); .collect(Collectors.toList()));
entity.setResourcetype(mapQualifier(m.getResourcetype())); entity.setResourcetype(mapQualifier(m.getResourcetype()));
entity.setCoverage(m.getCoverageList() entity.setCoverage(
.stream() m.getCoverageList().stream()
.map(ProtoConverter::mapStringField) .map(ProtoConverter::mapStringField)
.collect(Collectors.toList())); .collect(Collectors.toList()));
entity.setContext(m.getContextList() entity.setContext(
.stream() m.getContextList().stream()
.map(ProtoConverter::mapContext) .map(ProtoConverter::mapContext)
.collect(Collectors.toList())); .collect(Collectors.toList()));
entity.setBestaccessright(getBestAccessRights(oaf.getEntity().getResult().getInstanceList())); entity.setBestaccessright(
getBestAccessRights(oaf.getEntity().getResult().getInstanceList()));
return entity; return entity;
} }
private static Qualifier getBestAccessRights(List<ResultProtos.Result.Instance> instanceList) { private static Qualifier getBestAccessRights(List<ResultProtos.Result.Instance> instanceList) {
if (instanceList != null) { if (instanceList != null) {
final Optional<FieldTypeProtos.Qualifier> min = instanceList.stream() final Optional<FieldTypeProtos.Qualifier> min =
.map(i -> i.getAccessright()).min(new LicenseComparator()); instanceList.stream().map(i -> i.getAccessright()).min(new LicenseComparator());
final Qualifier rights = min.isPresent() ? mapQualifier(min.get()) : new Qualifier(); final Qualifier rights = min.isPresent() ? mapQualifier(min.get()) : new Qualifier();
if (StringUtils.isBlank(rights.getClassid())) { if (StringUtils.isBlank(rights.getClassid())) {
rights.setClassid(UNKNOWN); rights.setClassid(UNKNOWN);
} }
if (StringUtils.isBlank(rights.getClassname()) || UNKNOWN.equalsIgnoreCase(rights.getClassname())) { if (StringUtils.isBlank(rights.getClassname())
|| UNKNOWN.equalsIgnoreCase(rights.getClassname())) {
rights.setClassname(NOT_AVAILABLE); rights.setClassname(NOT_AVAILABLE);
} }
if (StringUtils.isBlank(rights.getSchemeid())) { if (StringUtils.isBlank(rights.getSchemeid())) {
@ -425,14 +431,13 @@ public class ProtoConverter implements Serializable {
final Context entity = new Context(); final Context entity = new Context();
entity.setId(context.getId()); entity.setId(context.getId());
entity.setDataInfo(context.getDataInfoList() entity.setDataInfo(
.stream() context.getDataInfoList().stream()
.map(ProtoConverter::mapDataInfo) .map(ProtoConverter::mapDataInfo)
.collect(Collectors.toList())); .collect(Collectors.toList()));
return entity; return entity;
} }
public static KeyValue mapKV(FieldTypeProtos.KeyValue kv) { public static KeyValue mapKV(FieldTypeProtos.KeyValue kv) {
final KeyValue keyValue = new KeyValue(); final KeyValue keyValue = new KeyValue();
keyValue.setKey(kv.getKey()); keyValue.setKey(kv.getKey());
@ -495,7 +500,8 @@ public class ProtoConverter implements Serializable {
return entity; return entity;
} }
public static OriginDescription mapOriginalDescription(FieldTypeProtos.OAIProvenance.OriginDescription originDescription) { public static OriginDescription mapOriginalDescription(
FieldTypeProtos.OAIProvenance.OriginDescription originDescription) {
final OriginDescription originDescriptionResult = new OriginDescription(); final OriginDescription originDescriptionResult = new OriginDescription();
originDescriptionResult.setHarvestDate(originDescription.getHarvestDate()); originDescriptionResult.setHarvestDate(originDescription.getHarvestDate());
originDescriptionResult.setAltered(originDescription.getAltered()); originDescriptionResult.setAltered(originDescription.getAltered());
@ -550,24 +556,24 @@ public class ProtoConverter implements Serializable {
entity.setName(author.getName()); entity.setName(author.getName());
entity.setSurname(author.getSurname()); entity.setSurname(author.getSurname());
entity.setRank(author.getRank()); entity.setRank(author.getRank());
entity.setPid(author.getPidList() entity.setPid(
.stream() author.getPidList().stream()
.map(kv -> { .map(
final StructuredProperty sp = new StructuredProperty(); kv -> {
sp.setValue(kv.getValue()); final StructuredProperty sp = new StructuredProperty();
final Qualifier q = new Qualifier(); sp.setValue(kv.getValue());
q.setClassid(kv.getKey()); final Qualifier q = new Qualifier();
q.setClassname(kv.getKey()); q.setClassid(kv.getKey());
sp.setQualifier(q); q.setClassname(kv.getKey());
return sp; sp.setQualifier(q);
}) return sp;
.collect(Collectors.toList())); })
entity.setAffiliation(author.getAffiliationList() .collect(Collectors.toList()));
.stream() entity.setAffiliation(
.map(ProtoConverter::mapStringField) author.getAffiliationList().stream()
.collect(Collectors.toList())); .map(ProtoConverter::mapStringField)
.collect(Collectors.toList()));
return entity; return entity;
} }
public static GeoLocation mapGeolocation(ResultProtos.Result.GeoLocation geoLocation) { public static GeoLocation mapGeolocation(ResultProtos.Result.GeoLocation geoLocation) {

View File

@ -5,14 +5,17 @@ import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.common.base.Splitter; import com.google.common.base.Splitter;
import com.google.common.collect.Lists; import com.google.common.collect.Lists;
import com.google.protobuf.InvalidProtocolBufferException; import com.google.protobuf.InvalidProtocolBufferException;
import eu.dnetlib.dhp.schema.action.AtomicAction;
import eu.dnetlib.data.proto.OafProtos; import eu.dnetlib.data.proto.OafProtos;
import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.schema.action.AtomicAction;
import eu.dnetlib.dhp.schema.oaf.*; import eu.dnetlib.dhp.schema.oaf.*;
import eu.dnetlib.dhp.utils.ISLookupClientFactory; import eu.dnetlib.dhp.utils.ISLookupClientFactory;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
import java.io.IOException;
import java.io.Serializable;
import java.util.LinkedList;
import java.util.Objects;
import org.apache.commons.io.IOUtils; import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
import org.apache.commons.logging.Log; import org.apache.commons.logging.Log;
@ -21,16 +24,9 @@ import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text; import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.compress.GzipCodec; import org.apache.hadoop.io.compress.GzipCodec;
import org.apache.hadoop.mapred.SequenceFileOutputFormat;
import org.apache.spark.SparkConf; import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.SparkSession; import org.apache.spark.sql.SparkSession;
import scala.Tuple2;
import java.io.IOException;
import java.io.Serializable;
import java.util.LinkedList;
import java.util.Objects;
public class TransformActions implements Serializable { public class TransformActions implements Serializable {
@ -38,9 +34,11 @@ public class TransformActions implements Serializable {
private static final String SEPARATOR = "/"; private static final String SEPARATOR = "/";
public static void main(String[] args) throws Exception { public static void main(String[] args) throws Exception {
final ArgumentApplicationParser parser = new ArgumentApplicationParser( final ArgumentApplicationParser parser =
IOUtils.toString(MigrateActionSet.class.getResourceAsStream( new ArgumentApplicationParser(
"/eu/dnetlib/dhp/migration/transform_actionsets_parameters.json"))); IOUtils.toString(
MigrateActionSet.class.getResourceAsStream(
"/eu/dnetlib/dhp/migration/transform_actionsets_parameters.json")));
parser.parseArgument(args); parser.parseArgument(args);
new TransformActions().run(parser); new TransformActions().run(parser);
@ -51,7 +49,7 @@ public class TransformActions implements Serializable {
final String isLookupUrl = parser.get("isLookupUrl"); final String isLookupUrl = parser.get("isLookupUrl");
log.info("isLookupUrl: " + isLookupUrl); log.info("isLookupUrl: " + isLookupUrl);
final String inputPaths = parser.get("inputPaths"); final String inputPaths = parser.get("inputPaths");
if (StringUtils.isBlank(inputPaths)) { if (StringUtils.isBlank(inputPaths)) {
throw new RuntimeException("empty inputPaths"); throw new RuntimeException("empty inputPaths");
@ -60,18 +58,25 @@ public class TransformActions implements Serializable {
final String targetBaseDir = getTargetBaseDir(isLookupUrl); final String targetBaseDir = getTargetBaseDir(isLookupUrl);
try(SparkSession spark = getSparkSession(parser)) { try (SparkSession spark = getSparkSession(parser)) {
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
final FileSystem fs = FileSystem.get(spark.sparkContext().hadoopConfiguration()); final FileSystem fs = FileSystem.get(spark.sparkContext().hadoopConfiguration());
for(String sourcePath : Lists.newArrayList(Splitter.on(",").split(inputPaths))) { for (String sourcePath : Lists.newArrayList(Splitter.on(",").split(inputPaths))) {
LinkedList<String> pathQ = Lists.newLinkedList(Splitter.on(SEPARATOR).split(sourcePath)); LinkedList<String> pathQ =
Lists.newLinkedList(Splitter.on(SEPARATOR).split(sourcePath));
final String rawset = pathQ.pollLast(); final String rawset = pathQ.pollLast();
final String actionSetDirectory = pathQ.pollLast(); final String actionSetDirectory = pathQ.pollLast();
final Path targetDirectory = new Path(targetBaseDir + SEPARATOR + actionSetDirectory + SEPARATOR + rawset); final Path targetDirectory =
new Path(
targetBaseDir
+ SEPARATOR
+ actionSetDirectory
+ SEPARATOR
+ rawset);
if (fs.exists(targetDirectory)) { if (fs.exists(targetDirectory)) {
log.info(String.format("found target directory '%s", targetDirectory)); log.info(String.format("found target directory '%s", targetDirectory));
@ -79,20 +84,27 @@ public class TransformActions implements Serializable {
log.info(String.format("deleted target directory '%s", targetDirectory)); log.info(String.format("deleted target directory '%s", targetDirectory));
} }
log.info(String.format("transforming actions from '%s' to '%s'", sourcePath, targetDirectory)); log.info(
String.format(
"transforming actions from '%s' to '%s'",
sourcePath, targetDirectory));
sc.sequenceFile(sourcePath, Text.class, Text.class) sc.sequenceFile(sourcePath, Text.class, Text.class)
.map(a -> eu.dnetlib.actionmanager.actions.AtomicAction.fromJSON(a._2().toString())) .map(
.map(a -> doTransform(a)) a ->
.filter(Objects::isNull) eu.dnetlib.actionmanager.actions.AtomicAction.fromJSON(
.filter(a -> a.getPayload() == null) a._2().toString()))
.map(a -> new ObjectMapper().writeValueAsString(a)) .map(a -> doTransform(a))
.saveAsTextFile(targetDirectory.toString(), GzipCodec.class); .filter(Objects::isNull)
.filter(a -> a.getPayload() == null)
.map(a -> new ObjectMapper().writeValueAsString(a))
.saveAsTextFile(targetDirectory.toString(), GzipCodec.class);
} }
} }
} }
private Text transformAction(eu.dnetlib.actionmanager.actions.AtomicAction aa) throws InvalidProtocolBufferException, JsonProcessingException { private Text transformAction(eu.dnetlib.actionmanager.actions.AtomicAction aa)
throws InvalidProtocolBufferException, JsonProcessingException {
final Text out = new Text(); final Text out = new Text();
final ObjectMapper mapper = new ObjectMapper(); final ObjectMapper mapper = new ObjectMapper();
if (aa.getTargetValue() != null && aa.getTargetValue().length > 0) { if (aa.getTargetValue() != null && aa.getTargetValue().length > 0) {
@ -135,7 +147,8 @@ public class TransformActions implements Serializable {
return new AtomicAction<>(Relation.class, rel); return new AtomicAction<>(Relation.class, rel);
} }
private AtomicAction doTransform(eu.dnetlib.actionmanager.actions.AtomicAction aa) throws InvalidProtocolBufferException { private AtomicAction doTransform(eu.dnetlib.actionmanager.actions.AtomicAction aa)
throws InvalidProtocolBufferException {
final OafProtos.Oaf proto_oaf = OafProtos.Oaf.parseFrom(aa.getTargetValue()); final OafProtos.Oaf proto_oaf = OafProtos.Oaf.parseFrom(aa.getTargetValue());
final Oaf oaf = ProtoConverter.convert(proto_oaf); final Oaf oaf = ProtoConverter.convert(proto_oaf);
switch (proto_oaf.getKind()) { switch (proto_oaf.getKind()) {
@ -148,14 +161,21 @@ public class TransformActions implements Serializable {
case project: case project:
return new AtomicAction<>(Project.class, (Project) oaf); return new AtomicAction<>(Project.class, (Project) oaf);
case result: case result:
final String resulttypeid = proto_oaf.getEntity().getResult().getMetadata().getResulttype().getClassid(); final String resulttypeid =
proto_oaf
.getEntity()
.getResult()
.getMetadata()
.getResulttype()
.getClassid();
switch (resulttypeid) { switch (resulttypeid) {
case "publication": case "publication":
return new AtomicAction<>(Publication.class, (Publication) oaf); return new AtomicAction<>(Publication.class, (Publication) oaf);
case "software": case "software":
return new AtomicAction<>(Software.class, (Software) oaf); return new AtomicAction<>(Software.class, (Software) oaf);
case "other": case "other":
return new AtomicAction<>(OtherResearchProduct.class, (OtherResearchProduct) oaf); return new AtomicAction<>(
OtherResearchProduct.class, (OtherResearchProduct) oaf);
case "dataset": case "dataset":
return new AtomicAction<>(Dataset.class, (Dataset) oaf); return new AtomicAction<>(Dataset.class, (Dataset) oaf);
default: default:
@ -163,7 +183,8 @@ public class TransformActions implements Serializable {
return new AtomicAction<>(Result.class, (Result) oaf); return new AtomicAction<>(Result.class, (Result) oaf);
} }
default: default:
throw new IllegalArgumentException("invalid entity type: " + proto_oaf.getEntity().getType()); throw new IllegalArgumentException(
"invalid entity type: " + proto_oaf.getEntity().getType());
} }
case relation: case relation:
return new AtomicAction<>(Relation.class, (Relation) oaf); return new AtomicAction<>(Relation.class, (Relation) oaf);
@ -174,15 +195,15 @@ public class TransformActions implements Serializable {
private String getTargetBaseDir(String isLookupUrl) throws ISLookUpException { private String getTargetBaseDir(String isLookupUrl) throws ISLookUpException {
ISLookUpService isLookUp = ISLookupClientFactory.getLookUpService(isLookupUrl); ISLookUpService isLookUp = ISLookupClientFactory.getLookUpService(isLookupUrl);
String XQUERY = "collection('/db/DRIVER/ServiceResources/ActionManagerServiceResourceType')//SERVICE_PROPERTIES/PROPERTY[@key = 'basePath']/@value/string()"; String XQUERY =
"collection('/db/DRIVER/ServiceResources/ActionManagerServiceResourceType')//SERVICE_PROPERTIES/PROPERTY[@key = 'basePath']/@value/string()";
return isLookUp.getResourceProfileByQuery(XQUERY); return isLookUp.getResourceProfileByQuery(XQUERY);
} }
private static SparkSession getSparkSession(ArgumentApplicationParser parser) { private static SparkSession getSparkSession(ArgumentApplicationParser parser) {
SparkConf conf = new SparkConf(); SparkConf conf = new SparkConf();
return SparkSession return SparkSession.builder()
.builder()
.appName(TransformActions.class.getSimpleName()) .appName(TransformActions.class.getSimpleName())
.master(parser.get("master")) .master(parser.get("master"))
.config(conf) .config(conf)

Some files were not shown because too many files have changed in this diff Show More