forked from D-Net/dnet-hadoop
Merge remote-tracking branch 'upstream/master'
This commit is contained in:
commit
454b8a6a29
|
@ -3,7 +3,6 @@ package eu.dnetlib.maven.plugin.properties;
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
import org.apache.commons.lang.ArrayUtils;
|
import org.apache.commons.lang.ArrayUtils;
|
||||||
import org.apache.commons.lang.StringUtils;
|
import org.apache.commons.lang.StringUtils;
|
||||||
import org.apache.maven.plugin.AbstractMojo;
|
import org.apache.maven.plugin.AbstractMojo;
|
||||||
|
@ -12,8 +11,8 @@ import org.apache.maven.plugin.MojoFailureException;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Generates oozie properties which were not provided from commandline.
|
* Generates oozie properties which were not provided from commandline.
|
||||||
* @author mhorst
|
|
||||||
*
|
*
|
||||||
|
* @author mhorst
|
||||||
* @goal generate-properties
|
* @goal generate-properties
|
||||||
*/
|
*/
|
||||||
public class GenerateOoziePropertiesMojo extends AbstractMojo {
|
public class GenerateOoziePropertiesMojo extends AbstractMojo {
|
||||||
|
@ -25,22 +24,25 @@ public class GenerateOoziePropertiesMojo extends AbstractMojo {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void execute() throws MojoExecutionException, MojoFailureException {
|
public void execute() throws MojoExecutionException, MojoFailureException {
|
||||||
if (System.getProperties().containsKey(PROPERTY_NAME_WF_SOURCE_DIR) &&
|
if (System.getProperties().containsKey(PROPERTY_NAME_WF_SOURCE_DIR)
|
||||||
!System.getProperties().containsKey(PROPERTY_NAME_SANDBOX_NAME)) {
|
&& !System.getProperties().containsKey(PROPERTY_NAME_SANDBOX_NAME)) {
|
||||||
String generatedSandboxName = generateSandboxName(System.getProperties().getProperty(
|
String generatedSandboxName =
|
||||||
PROPERTY_NAME_WF_SOURCE_DIR));
|
generateSandboxName(
|
||||||
if (generatedSandboxName!=null) {
|
|
||||||
System.getProperties().setProperty(PROPERTY_NAME_SANDBOX_NAME,
|
|
||||||
generatedSandboxName);
|
|
||||||
} else {
|
|
||||||
System.out.println("unable to generate sandbox name from path: " +
|
|
||||||
System.getProperties().getProperty(PROPERTY_NAME_WF_SOURCE_DIR));
|
System.getProperties().getProperty(PROPERTY_NAME_WF_SOURCE_DIR));
|
||||||
|
if (generatedSandboxName != null) {
|
||||||
|
System.getProperties()
|
||||||
|
.setProperty(PROPERTY_NAME_SANDBOX_NAME, generatedSandboxName);
|
||||||
|
} else {
|
||||||
|
System.out.println(
|
||||||
|
"unable to generate sandbox name from path: "
|
||||||
|
+ System.getProperties().getProperty(PROPERTY_NAME_WF_SOURCE_DIR));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Generates sandbox name from workflow source directory.
|
* Generates sandbox name from workflow source directory.
|
||||||
|
*
|
||||||
* @param wfSourceDir
|
* @param wfSourceDir
|
||||||
* @return generated sandbox name
|
* @return generated sandbox name
|
||||||
*/
|
*/
|
||||||
|
@ -53,8 +55,9 @@ public class GenerateOoziePropertiesMojo extends AbstractMojo {
|
||||||
for (String token : tokens) {
|
for (String token : tokens) {
|
||||||
for (String limiter : limiters) {
|
for (String limiter : limiters) {
|
||||||
if (limiter.equals(token)) {
|
if (limiter.equals(token)) {
|
||||||
return sandboxNameParts.size()>0?
|
return sandboxNameParts.size() > 0
|
||||||
StringUtils.join(sandboxNameParts.toArray()):null;
|
? StringUtils.join(sandboxNameParts.toArray())
|
||||||
|
: null;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (sandboxNameParts.size() > 0) {
|
if (sandboxNameParts.size() > 0) {
|
||||||
|
@ -67,5 +70,4 @@ public class GenerateOoziePropertiesMojo extends AbstractMojo {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,19 +1,17 @@
|
||||||
/**
|
/**
|
||||||
|
* Licensed under the Educational Community License, Version 2.0 (the "License"); you may not use
|
||||||
|
* this file except in compliance with the License. You may obtain a copy of the License at
|
||||||
*
|
*
|
||||||
* Licensed under the Educational Community License, Version 2.0 (the "License");
|
* <p>http://www.opensource.org/licenses/ecl2.php
|
||||||
* you may not use this file except in compliance with the License.
|
|
||||||
* You may obtain a copy of the License at
|
|
||||||
*
|
*
|
||||||
* http://www.opensource.org/licenses/ecl2.php
|
* <p>Unless required by applicable law or agreed to in writing, software distributed under the
|
||||||
*
|
* License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
* express or implied. See the License for the specific language governing permissions and
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
package eu.dnetlib.maven.plugin.properties;
|
package eu.dnetlib.maven.plugin.properties;
|
||||||
|
|
||||||
|
import edu.umd.cs.findbugs.annotations.SuppressFBWarnings;
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.FileInputStream;
|
import java.io.FileInputStream;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
@ -26,7 +24,6 @@ import java.util.List;
|
||||||
import java.util.Map.Entry;
|
import java.util.Map.Entry;
|
||||||
import java.util.Properties;
|
import java.util.Properties;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
|
||||||
import org.apache.commons.io.FileUtils;
|
import org.apache.commons.io.FileUtils;
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
import org.apache.commons.lang.StringUtils;
|
import org.apache.commons.lang.StringUtils;
|
||||||
|
@ -38,13 +35,10 @@ import org.springframework.core.io.DefaultResourceLoader;
|
||||||
import org.springframework.core.io.Resource;
|
import org.springframework.core.io.Resource;
|
||||||
import org.springframework.core.io.ResourceLoader;
|
import org.springframework.core.io.ResourceLoader;
|
||||||
|
|
||||||
import edu.umd.cs.findbugs.annotations.SuppressFBWarnings;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Writes project properties for the keys listed in specified properties files.
|
* Writes project properties for the keys listed in specified properties files. Based on:
|
||||||
* Based on:
|
|
||||||
* http://site.kuali.org/maven/plugins/properties-maven-plugin/1.3.2/write-project-properties-mojo.html
|
* http://site.kuali.org/maven/plugins/properties-maven-plugin/1.3.2/write-project-properties-mojo.html
|
||||||
|
*
|
||||||
* @author mhorst
|
* @author mhorst
|
||||||
* @goal write-project-properties
|
* @goal write-project-properties
|
||||||
*/
|
*/
|
||||||
|
@ -56,9 +50,7 @@ public class WritePredefinedProjectProperties extends AbstractMojo {
|
||||||
protected static final String PROPERTY_PREFIX_ENV = "env.";
|
protected static final String PROPERTY_PREFIX_ENV = "env.";
|
||||||
private static final String ENCODING_UTF8 = "utf8";
|
private static final String ENCODING_UTF8 = "utf8";
|
||||||
|
|
||||||
/**
|
/** @parameter property="properties.includePropertyKeysFromFiles" */
|
||||||
* @parameter property="properties.includePropertyKeysFromFiles"
|
|
||||||
*/
|
|
||||||
private String[] includePropertyKeysFromFiles;
|
private String[] includePropertyKeysFromFiles;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -78,31 +70,33 @@ public class WritePredefinedProjectProperties extends AbstractMojo {
|
||||||
protected File outputFile;
|
protected File outputFile;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* If true, the plugin will silently ignore any non-existent properties files, and the build will continue
|
* If true, the plugin will silently ignore any non-existent properties files, and the build
|
||||||
|
* will continue
|
||||||
*
|
*
|
||||||
* @parameter property="properties.quiet" default-value="true"
|
* @parameter property="properties.quiet" default-value="true"
|
||||||
*/
|
*/
|
||||||
private boolean quiet;
|
private boolean quiet;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Comma separated list of characters to escape when writing property values. cr=carriage return, lf=linefeed,
|
* Comma separated list of characters to escape when writing property values. cr=carriage
|
||||||
* tab=tab. Any other values are taken literally.
|
* return, lf=linefeed, tab=tab. Any other values are taken literally.
|
||||||
*
|
*
|
||||||
* @parameter default-value="cr,lf,tab" property="properties.escapeChars"
|
* @parameter default-value="cr,lf,tab" property="properties.escapeChars"
|
||||||
*/
|
*/
|
||||||
private String escapeChars;
|
private String escapeChars;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* If true, the plugin will include system properties when writing the properties file. System properties override
|
* If true, the plugin will include system properties when writing the properties file. System
|
||||||
* both environment variables and project properties.
|
* properties override both environment variables and project properties.
|
||||||
*
|
*
|
||||||
* @parameter default-value="false" property="properties.includeSystemProperties"
|
* @parameter default-value="false" property="properties.includeSystemProperties"
|
||||||
*/
|
*/
|
||||||
private boolean includeSystemProperties;
|
private boolean includeSystemProperties;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* If true, the plugin will include environment variables when writing the properties file. Environment variables
|
* If true, the plugin will include environment variables when writing the properties file.
|
||||||
* are prefixed with "env". Environment variables override project properties.
|
* Environment variables are prefixed with "env". Environment variables override project
|
||||||
|
* properties.
|
||||||
*
|
*
|
||||||
* @parameter default-value="false" property="properties.includeEnvironmentVariables"
|
* @parameter default-value="false" property="properties.includeEnvironmentVariables"
|
||||||
*/
|
*/
|
||||||
|
@ -116,8 +110,8 @@ public class WritePredefinedProjectProperties extends AbstractMojo {
|
||||||
private String exclude;
|
private String exclude;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Comma separated set of properties to write to the properties file. If provided, only the properties matching
|
* Comma separated set of properties to write to the properties file. If provided, only the
|
||||||
* those supplied here will be written to the properties file.
|
* properties matching those supplied here will be written to the properties file.
|
||||||
*
|
*
|
||||||
* @parameter property="properties.include"
|
* @parameter property="properties.include"
|
||||||
*/
|
*/
|
||||||
|
@ -153,6 +147,7 @@ public class WritePredefinedProjectProperties extends AbstractMojo {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Provides environment variables.
|
* Provides environment variables.
|
||||||
|
*
|
||||||
* @return environment variables
|
* @return environment variables
|
||||||
*/
|
*/
|
||||||
protected static Properties getEnvironmentVariables() {
|
protected static Properties getEnvironmentVariables() {
|
||||||
|
@ -165,12 +160,14 @@ public class WritePredefinedProjectProperties extends AbstractMojo {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Removes properties which should not be written.
|
* Removes properties which should not be written.
|
||||||
|
*
|
||||||
* @param properties
|
* @param properties
|
||||||
* @param omitCSV
|
* @param omitCSV
|
||||||
* @param includeCSV
|
* @param includeCSV
|
||||||
* @throws MojoExecutionException
|
* @throws MojoExecutionException
|
||||||
*/
|
*/
|
||||||
protected void trim(Properties properties, String omitCSV, String includeCSV) throws MojoExecutionException {
|
protected void trim(Properties properties, String omitCSV, String includeCSV)
|
||||||
|
throws MojoExecutionException {
|
||||||
List<String> omitKeys = getListFromCSV(omitCSV);
|
List<String> omitKeys = getListFromCSV(omitCSV);
|
||||||
for (String key : omitKeys) {
|
for (String key : omitKeys) {
|
||||||
properties.remove(key);
|
properties.remove(key);
|
||||||
|
@ -201,6 +198,7 @@ public class WritePredefinedProjectProperties extends AbstractMojo {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Checks whether file exists.
|
* Checks whether file exists.
|
||||||
|
*
|
||||||
* @param location
|
* @param location
|
||||||
* @return true when exists, false otherwise.
|
* @return true when exists, false otherwise.
|
||||||
*/
|
*/
|
||||||
|
@ -219,6 +217,7 @@ public class WritePredefinedProjectProperties extends AbstractMojo {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Validates resource location.
|
* Validates resource location.
|
||||||
|
*
|
||||||
* @param location
|
* @param location
|
||||||
* @return true when valid, false otherwise
|
* @return true when valid, false otherwise
|
||||||
* @throws MojoExecutionException
|
* @throws MojoExecutionException
|
||||||
|
@ -238,6 +237,7 @@ public class WritePredefinedProjectProperties extends AbstractMojo {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Provides input stream.
|
* Provides input stream.
|
||||||
|
*
|
||||||
* @param location
|
* @param location
|
||||||
* @return input stream
|
* @return input stream
|
||||||
* @throws IOException
|
* @throws IOException
|
||||||
|
@ -254,6 +254,7 @@ public class WritePredefinedProjectProperties extends AbstractMojo {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Creates properties for given location.
|
* Creates properties for given location.
|
||||||
|
*
|
||||||
* @param location
|
* @param location
|
||||||
* @return properties for given location
|
* @return properties for given location
|
||||||
* @throws MojoExecutionException
|
* @throws MojoExecutionException
|
||||||
|
@ -278,6 +279,7 @@ public class WritePredefinedProjectProperties extends AbstractMojo {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Provides escape characters.
|
* Provides escape characters.
|
||||||
|
*
|
||||||
* @param escapeChars
|
* @param escapeChars
|
||||||
* @return escape characters
|
* @return escape characters
|
||||||
*/
|
*/
|
||||||
|
@ -293,6 +295,7 @@ public class WritePredefinedProjectProperties extends AbstractMojo {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Provides real token.
|
* Provides real token.
|
||||||
|
*
|
||||||
* @param token
|
* @param token
|
||||||
* @return real token
|
* @return real token
|
||||||
*/
|
*/
|
||||||
|
@ -310,6 +313,7 @@ public class WritePredefinedProjectProperties extends AbstractMojo {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns content.
|
* Returns content.
|
||||||
|
*
|
||||||
* @param comment
|
* @param comment
|
||||||
* @param properties
|
* @param properties
|
||||||
* @param escapeTokens
|
* @param escapeTokens
|
||||||
|
@ -332,13 +336,15 @@ public class WritePredefinedProjectProperties extends AbstractMojo {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Writes properties to given file.
|
* Writes properties to given file.
|
||||||
|
*
|
||||||
* @param file
|
* @param file
|
||||||
* @param comment
|
* @param comment
|
||||||
* @param properties
|
* @param properties
|
||||||
* @param escapeTokens
|
* @param escapeTokens
|
||||||
* @throws MojoExecutionException
|
* @throws MojoExecutionException
|
||||||
*/
|
*/
|
||||||
protected void writeProperties(File file, String comment, Properties properties, List<String> escapeTokens)
|
protected void writeProperties(
|
||||||
|
File file, String comment, Properties properties, List<String> escapeTokens)
|
||||||
throws MojoExecutionException {
|
throws MojoExecutionException {
|
||||||
try {
|
try {
|
||||||
String content = getContent(comment, properties, escapeTokens);
|
String content = getContent(comment, properties, escapeTokens);
|
||||||
|
@ -350,6 +356,7 @@ public class WritePredefinedProjectProperties extends AbstractMojo {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Escapes characters.
|
* Escapes characters.
|
||||||
|
*
|
||||||
* @param s
|
* @param s
|
||||||
* @param escapeChars
|
* @param escapeChars
|
||||||
* @return
|
* @return
|
||||||
|
@ -364,6 +371,7 @@ public class WritePredefinedProjectProperties extends AbstractMojo {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Provides replacement token.
|
* Provides replacement token.
|
||||||
|
*
|
||||||
* @param escapeChar
|
* @param escapeChar
|
||||||
* @return replacement token
|
* @return replacement token
|
||||||
*/
|
*/
|
||||||
|
@ -381,6 +389,7 @@ public class WritePredefinedProjectProperties extends AbstractMojo {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns list from csv.
|
* Returns list from csv.
|
||||||
|
*
|
||||||
* @param csv
|
* @param csv
|
||||||
* @return list of values generated from CSV
|
* @return list of values generated from CSV
|
||||||
*/
|
*/
|
||||||
|
@ -422,15 +431,14 @@ public class WritePredefinedProjectProperties extends AbstractMojo {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Sets property files for which keys properties should be included.
|
* Sets property files for which keys properties should be included.
|
||||||
|
*
|
||||||
* @param includePropertyKeysFromFiles
|
* @param includePropertyKeysFromFiles
|
||||||
*/
|
*/
|
||||||
public void setIncludePropertyKeysFromFiles(
|
public void setIncludePropertyKeysFromFiles(String[] includePropertyKeysFromFiles) {
|
||||||
String[] includePropertyKeysFromFiles) {
|
|
||||||
if (includePropertyKeysFromFiles != null) {
|
if (includePropertyKeysFromFiles != null) {
|
||||||
this.includePropertyKeysFromFiles = Arrays.copyOf(
|
this.includePropertyKeysFromFiles =
|
||||||
includePropertyKeysFromFiles,
|
Arrays.copyOf(
|
||||||
includePropertyKeysFromFiles.length);
|
includePropertyKeysFromFiles, includePropertyKeysFromFiles.length);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
|
@ -1,16 +1,13 @@
|
||||||
package eu.dnetlib.maven.plugin.properties;
|
package eu.dnetlib.maven.plugin.properties;
|
||||||
|
|
||||||
import org.junit.jupiter.api.BeforeEach;
|
|
||||||
import org.junit.jupiter.api.Test;
|
|
||||||
|
|
||||||
import static eu.dnetlib.maven.plugin.properties.GenerateOoziePropertiesMojo.PROPERTY_NAME_SANDBOX_NAME;
|
import static eu.dnetlib.maven.plugin.properties.GenerateOoziePropertiesMojo.PROPERTY_NAME_SANDBOX_NAME;
|
||||||
import static eu.dnetlib.maven.plugin.properties.GenerateOoziePropertiesMojo.PROPERTY_NAME_WF_SOURCE_DIR;
|
import static eu.dnetlib.maven.plugin.properties.GenerateOoziePropertiesMojo.PROPERTY_NAME_WF_SOURCE_DIR;
|
||||||
import static org.junit.jupiter.api.Assertions.*;
|
import static org.junit.jupiter.api.Assertions.*;
|
||||||
|
|
||||||
/**
|
import org.junit.jupiter.api.BeforeEach;
|
||||||
* @author mhorst, claudio.atzori
|
import org.junit.jupiter.api.Test;
|
||||||
*
|
|
||||||
*/
|
/** @author mhorst, claudio.atzori */
|
||||||
public class GenerateOoziePropertiesMojoTest {
|
public class GenerateOoziePropertiesMojoTest {
|
||||||
|
|
||||||
private GenerateOoziePropertiesMojo mojo = new GenerateOoziePropertiesMojo();
|
private GenerateOoziePropertiesMojo mojo = new GenerateOoziePropertiesMojo();
|
||||||
|
@ -96,5 +93,4 @@ public class GenerateOoziePropertiesMojoTest {
|
||||||
// assert
|
// assert
|
||||||
assertEquals("wf/transformers", System.getProperty(PROPERTY_NAME_SANDBOX_NAME));
|
assertEquals("wf/transformers", System.getProperty(PROPERTY_NAME_SANDBOX_NAME));
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,5 +1,12 @@
|
||||||
package eu.dnetlib.maven.plugin.properties;
|
package eu.dnetlib.maven.plugin.properties;
|
||||||
|
|
||||||
|
import static eu.dnetlib.maven.plugin.properties.WritePredefinedProjectProperties.PROPERTY_PREFIX_ENV;
|
||||||
|
import static org.junit.jupiter.api.Assertions.*;
|
||||||
|
import static org.mockito.Mockito.doReturn;
|
||||||
|
import static org.mockito.Mockito.lenient;
|
||||||
|
|
||||||
|
import java.io.*;
|
||||||
|
import java.util.Properties;
|
||||||
import org.apache.maven.plugin.MojoExecutionException;
|
import org.apache.maven.plugin.MojoExecutionException;
|
||||||
import org.apache.maven.project.MavenProject;
|
import org.apache.maven.project.MavenProject;
|
||||||
import org.junit.jupiter.api.*;
|
import org.junit.jupiter.api.*;
|
||||||
|
@ -9,23 +16,11 @@ import org.mockito.Mock;
|
||||||
import org.mockito.MockitoAnnotations;
|
import org.mockito.MockitoAnnotations;
|
||||||
import org.mockito.junit.jupiter.MockitoExtension;
|
import org.mockito.junit.jupiter.MockitoExtension;
|
||||||
|
|
||||||
import java.io.*;
|
/** @author mhorst, claudio.atzori */
|
||||||
import java.util.Properties;
|
|
||||||
|
|
||||||
import static eu.dnetlib.maven.plugin.properties.WritePredefinedProjectProperties.PROPERTY_PREFIX_ENV;
|
|
||||||
import static org.junit.jupiter.api.Assertions.*;
|
|
||||||
import static org.mockito.Mockito.doReturn;
|
|
||||||
import static org.mockito.Mockito.lenient;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @author mhorst, claudio.atzori
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
@ExtendWith(MockitoExtension.class)
|
@ExtendWith(MockitoExtension.class)
|
||||||
public class WritePredefinedProjectPropertiesTest {
|
public class WritePredefinedProjectPropertiesTest {
|
||||||
|
|
||||||
@Mock
|
@Mock private MavenProject mavenProject;
|
||||||
private MavenProject mavenProject;
|
|
||||||
|
|
||||||
private WritePredefinedProjectProperties mojo;
|
private WritePredefinedProjectProperties mojo;
|
||||||
|
|
||||||
|
@ -86,7 +81,8 @@ public class WritePredefinedProjectPropertiesTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testExecuteWithProjectPropertiesExclusion(@TempDir File testFolder) throws Exception {
|
public void testExecuteWithProjectPropertiesExclusion(@TempDir File testFolder)
|
||||||
|
throws Exception {
|
||||||
// given
|
// given
|
||||||
String key = "projectPropertyKey";
|
String key = "projectPropertyKey";
|
||||||
String value = "projectPropertyValue";
|
String value = "projectPropertyValue";
|
||||||
|
@ -110,7 +106,8 @@ public class WritePredefinedProjectPropertiesTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testExecuteWithProjectPropertiesInclusion(@TempDir File testFolder) throws Exception {
|
public void testExecuteWithProjectPropertiesInclusion(@TempDir File testFolder)
|
||||||
|
throws Exception {
|
||||||
// given
|
// given
|
||||||
String key = "projectPropertyKey";
|
String key = "projectPropertyKey";
|
||||||
String value = "projectPropertyValue";
|
String value = "projectPropertyValue";
|
||||||
|
@ -134,7 +131,8 @@ public class WritePredefinedProjectPropertiesTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testExecuteIncludingPropertyKeysFromFile(@TempDir File testFolder) throws Exception {
|
public void testExecuteIncludingPropertyKeysFromFile(@TempDir File testFolder)
|
||||||
|
throws Exception {
|
||||||
// given
|
// given
|
||||||
String key = "projectPropertyKey";
|
String key = "projectPropertyKey";
|
||||||
String value = "projectPropertyValue";
|
String value = "projectPropertyValue";
|
||||||
|
@ -150,7 +148,8 @@ public class WritePredefinedProjectPropertiesTest {
|
||||||
includedProperties.setProperty(includedKey, "irrelevantValue");
|
includedProperties.setProperty(includedKey, "irrelevantValue");
|
||||||
includedProperties.store(new FileWriter(includedPropertiesFile), null);
|
includedProperties.store(new FileWriter(includedPropertiesFile), null);
|
||||||
|
|
||||||
mojo.setIncludePropertyKeysFromFiles(new String[] {includedPropertiesFile.getAbsolutePath()});
|
mojo.setIncludePropertyKeysFromFiles(
|
||||||
|
new String[] {includedPropertiesFile.getAbsolutePath()});
|
||||||
|
|
||||||
// execute
|
// execute
|
||||||
mojo.execute();
|
mojo.execute();
|
||||||
|
@ -164,7 +163,8 @@ public class WritePredefinedProjectPropertiesTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testExecuteIncludingPropertyKeysFromClasspathResource(@TempDir File testFolder) throws Exception {
|
public void testExecuteIncludingPropertyKeysFromClasspathResource(@TempDir File testFolder)
|
||||||
|
throws Exception {
|
||||||
// given
|
// given
|
||||||
String key = "projectPropertyKey";
|
String key = "projectPropertyKey";
|
||||||
String value = "projectPropertyValue";
|
String value = "projectPropertyValue";
|
||||||
|
@ -175,7 +175,8 @@ public class WritePredefinedProjectPropertiesTest {
|
||||||
projectProperties.setProperty(includedKey, includedValue);
|
projectProperties.setProperty(includedKey, includedValue);
|
||||||
doReturn(projectProperties).when(mavenProject).getProperties();
|
doReturn(projectProperties).when(mavenProject).getProperties();
|
||||||
|
|
||||||
mojo.setIncludePropertyKeysFromFiles(new String[] {"/eu/dnetlib/maven/plugin/properties/included.properties"});
|
mojo.setIncludePropertyKeysFromFiles(
|
||||||
|
new String[] {"/eu/dnetlib/maven/plugin/properties/included.properties"});
|
||||||
|
|
||||||
// execute
|
// execute
|
||||||
mojo.execute();
|
mojo.execute();
|
||||||
|
@ -207,7 +208,8 @@ public class WritePredefinedProjectPropertiesTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testExecuteIncludingPropertyKeysFromXmlFile(@TempDir File testFolder) throws Exception {
|
public void testExecuteIncludingPropertyKeysFromXmlFile(@TempDir File testFolder)
|
||||||
|
throws Exception {
|
||||||
// given
|
// given
|
||||||
String key = "projectPropertyKey";
|
String key = "projectPropertyKey";
|
||||||
String value = "projectPropertyValue";
|
String value = "projectPropertyValue";
|
||||||
|
@ -223,7 +225,8 @@ public class WritePredefinedProjectPropertiesTest {
|
||||||
includedProperties.setProperty(includedKey, "irrelevantValue");
|
includedProperties.setProperty(includedKey, "irrelevantValue");
|
||||||
includedProperties.storeToXML(new FileOutputStream(includedPropertiesFile), null);
|
includedProperties.storeToXML(new FileOutputStream(includedPropertiesFile), null);
|
||||||
|
|
||||||
mojo.setIncludePropertyKeysFromFiles(new String[] {includedPropertiesFile.getAbsolutePath()});
|
mojo.setIncludePropertyKeysFromFiles(
|
||||||
|
new String[] {includedPropertiesFile.getAbsolutePath()});
|
||||||
|
|
||||||
// execute
|
// execute
|
||||||
mojo.execute();
|
mojo.execute();
|
||||||
|
@ -237,7 +240,8 @@ public class WritePredefinedProjectPropertiesTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testExecuteIncludingPropertyKeysFromInvalidXmlFile(@TempDir File testFolder) throws Exception {
|
public void testExecuteIncludingPropertyKeysFromInvalidXmlFile(@TempDir File testFolder)
|
||||||
|
throws Exception {
|
||||||
// given
|
// given
|
||||||
String key = "projectPropertyKey";
|
String key = "projectPropertyKey";
|
||||||
String value = "projectPropertyValue";
|
String value = "projectPropertyValue";
|
||||||
|
@ -253,7 +257,8 @@ public class WritePredefinedProjectPropertiesTest {
|
||||||
includedProperties.setProperty(includedKey, "irrelevantValue");
|
includedProperties.setProperty(includedKey, "irrelevantValue");
|
||||||
includedProperties.store(new FileOutputStream(includedPropertiesFile), null);
|
includedProperties.store(new FileOutputStream(includedPropertiesFile), null);
|
||||||
|
|
||||||
mojo.setIncludePropertyKeysFromFiles(new String[] {includedPropertiesFile.getAbsolutePath()});
|
mojo.setIncludePropertyKeysFromFiles(
|
||||||
|
new String[] {includedPropertiesFile.getAbsolutePath()});
|
||||||
|
|
||||||
// execute
|
// execute
|
||||||
Assertions.assertThrows(MojoExecutionException.class, () -> mojo.execute());
|
Assertions.assertThrows(MojoExecutionException.class, () -> mojo.execute());
|
||||||
|
@ -320,7 +325,8 @@ public class WritePredefinedProjectPropertiesTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testExecuteWithSystemPropertiesAndEscapeChars(@TempDir File testFolder) throws Exception {
|
public void testExecuteWithSystemPropertiesAndEscapeChars(@TempDir File testFolder)
|
||||||
|
throws Exception {
|
||||||
// given
|
// given
|
||||||
String key = "systemPropertyKey ";
|
String key = "systemPropertyKey ";
|
||||||
String value = "systemPropertyValue";
|
String value = "systemPropertyValue";
|
||||||
|
@ -347,7 +353,8 @@ public class WritePredefinedProjectPropertiesTest {
|
||||||
return new File(testFolder, "test.properties");
|
return new File(testFolder, "test.properties");
|
||||||
}
|
}
|
||||||
|
|
||||||
private Properties getStoredProperties(File testFolder) throws FileNotFoundException, IOException {
|
private Properties getStoredProperties(File testFolder)
|
||||||
|
throws FileNotFoundException, IOException {
|
||||||
Properties properties = new Properties();
|
Properties properties = new Properties();
|
||||||
properties.load(new FileInputStream(getPropertiesFileLocation(testFolder)));
|
properties.load(new FileInputStream(getPropertiesFileLocation(testFolder)));
|
||||||
return properties;
|
return properties;
|
||||||
|
|
|
@ -3,7 +3,6 @@ package eu.dnetlib.collector.worker.model;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
|
|
||||||
public class ApiDescriptor {
|
public class ApiDescriptor {
|
||||||
|
|
||||||
private String id;
|
private String id;
|
||||||
|
@ -45,5 +44,4 @@ public class ApiDescriptor {
|
||||||
public void setProtocol(final String protocol) {
|
public void setProtocol(final String protocol) {
|
||||||
this.protocol = protocol;
|
this.protocol = protocol;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -2,7 +2,6 @@ package eu.dnetlib.data.mdstore.manager.common.model;
|
||||||
|
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
import java.util.UUID;
|
import java.util.UUID;
|
||||||
|
|
||||||
import javax.persistence.Column;
|
import javax.persistence.Column;
|
||||||
import javax.persistence.Entity;
|
import javax.persistence.Entity;
|
||||||
import javax.persistence.Id;
|
import javax.persistence.Id;
|
||||||
|
@ -12,9 +11,7 @@ import javax.persistence.Table;
|
||||||
@Table(name = "mdstores")
|
@Table(name = "mdstores")
|
||||||
public class MDStore implements Serializable {
|
public class MDStore implements Serializable {
|
||||||
|
|
||||||
/**
|
/** */
|
||||||
*
|
|
||||||
*/
|
|
||||||
private static final long serialVersionUID = 3160530489149700055L;
|
private static final long serialVersionUID = 3160530489149700055L;
|
||||||
|
|
||||||
@Id
|
@Id
|
||||||
|
@ -95,11 +92,13 @@ public class MDStore implements Serializable {
|
||||||
this.apiId = apiId;
|
this.apiId = apiId;
|
||||||
}
|
}
|
||||||
|
|
||||||
public static MDStore newInstance(final String format, final String layout, final String interpretation) {
|
public static MDStore newInstance(
|
||||||
|
final String format, final String layout, final String interpretation) {
|
||||||
return newInstance(format, layout, interpretation, null, null, null);
|
return newInstance(format, layout, interpretation, null, null, null);
|
||||||
}
|
}
|
||||||
|
|
||||||
public static MDStore newInstance(final String format,
|
public static MDStore newInstance(
|
||||||
|
final String format,
|
||||||
final String layout,
|
final String layout,
|
||||||
final String interpretation,
|
final String interpretation,
|
||||||
final String dsName,
|
final String dsName,
|
||||||
|
@ -115,5 +114,4 @@ public class MDStore implements Serializable {
|
||||||
md.setApiId(apiId);
|
md.setApiId(apiId);
|
||||||
return md;
|
return md;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,7 +1,6 @@
|
||||||
package eu.dnetlib.data.mdstore.manager.common.model;
|
package eu.dnetlib.data.mdstore.manager.common.model;
|
||||||
|
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
|
|
||||||
import javax.persistence.Column;
|
import javax.persistence.Column;
|
||||||
import javax.persistence.Entity;
|
import javax.persistence.Entity;
|
||||||
import javax.persistence.Id;
|
import javax.persistence.Id;
|
||||||
|
@ -11,9 +10,7 @@ import javax.persistence.Table;
|
||||||
@Table(name = "mdstore_current_versions")
|
@Table(name = "mdstore_current_versions")
|
||||||
public class MDStoreCurrentVersion implements Serializable {
|
public class MDStoreCurrentVersion implements Serializable {
|
||||||
|
|
||||||
/**
|
/** */
|
||||||
*
|
|
||||||
*/
|
|
||||||
private static final long serialVersionUID = -4757725888593745773L;
|
private static final long serialVersionUID = -4757725888593745773L;
|
||||||
|
|
||||||
@Id
|
@Id
|
||||||
|
|
|
@ -2,7 +2,6 @@ package eu.dnetlib.data.mdstore.manager.common.model;
|
||||||
|
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
import java.util.Date;
|
import java.util.Date;
|
||||||
|
|
||||||
import javax.persistence.Column;
|
import javax.persistence.Column;
|
||||||
import javax.persistence.Entity;
|
import javax.persistence.Entity;
|
||||||
import javax.persistence.Id;
|
import javax.persistence.Id;
|
||||||
|
@ -14,9 +13,7 @@ import javax.persistence.TemporalType;
|
||||||
@Table(name = "mdstore_versions")
|
@Table(name = "mdstore_versions")
|
||||||
public class MDStoreVersion implements Serializable {
|
public class MDStoreVersion implements Serializable {
|
||||||
|
|
||||||
/**
|
/** */
|
||||||
*
|
|
||||||
*/
|
|
||||||
private static final long serialVersionUID = -4763494442274298339L;
|
private static final long serialVersionUID = -4763494442274298339L;
|
||||||
|
|
||||||
@Id
|
@Id
|
||||||
|
@ -97,5 +94,4 @@ public class MDStoreVersion implements Serializable {
|
||||||
public void setSize(final long size) {
|
public void setSize(final long size) {
|
||||||
this.size = size;
|
this.size = size;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -2,7 +2,6 @@ package eu.dnetlib.data.mdstore.manager.common.model;
|
||||||
|
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
import java.util.Date;
|
import java.util.Date;
|
||||||
|
|
||||||
import javax.persistence.Column;
|
import javax.persistence.Column;
|
||||||
import javax.persistence.Entity;
|
import javax.persistence.Entity;
|
||||||
import javax.persistence.Id;
|
import javax.persistence.Id;
|
||||||
|
@ -14,9 +13,7 @@ import javax.persistence.TemporalType;
|
||||||
@Table(name = "mdstores_with_info")
|
@Table(name = "mdstores_with_info")
|
||||||
public class MDStoreWithInfo implements Serializable {
|
public class MDStoreWithInfo implements Serializable {
|
||||||
|
|
||||||
/**
|
/** */
|
||||||
*
|
|
||||||
*/
|
|
||||||
private static final long serialVersionUID = -8445784770687571492L;
|
private static final long serialVersionUID = -8445784770687571492L;
|
||||||
|
|
||||||
@Id
|
@Id
|
||||||
|
@ -141,5 +138,4 @@ public class MDStoreWithInfo implements Serializable {
|
||||||
public void setNumberOfVersions(final long numberOfVersions) {
|
public void setNumberOfVersions(final long numberOfVersions) {
|
||||||
this.numberOfVersions = numberOfVersions;
|
this.numberOfVersions = numberOfVersions;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,10 +1,6 @@
|
||||||
package eu.dnetlib.dhp.application;
|
package eu.dnetlib.dhp.application;
|
||||||
|
|
||||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
import org.apache.commons.cli.*;
|
|
||||||
import org.apache.commons.codec.binary.Base64;
|
|
||||||
import org.apache.commons.io.IOUtils;
|
|
||||||
|
|
||||||
import java.io.ByteArrayInputStream;
|
import java.io.ByteArrayInputStream;
|
||||||
import java.io.ByteArrayOutputStream;
|
import java.io.ByteArrayOutputStream;
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
|
@ -12,7 +8,9 @@ import java.io.StringWriter;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
import java.util.zip.GZIPInputStream;
|
import java.util.zip.GZIPInputStream;
|
||||||
import java.util.zip.GZIPOutputStream;
|
import java.util.zip.GZIPOutputStream;
|
||||||
import java.util.zip.Inflater;
|
import org.apache.commons.cli.*;
|
||||||
|
import org.apache.commons.codec.binary.Base64;
|
||||||
|
import org.apache.commons.io.IOUtils;
|
||||||
|
|
||||||
public class ArgumentApplicationParser implements Serializable {
|
public class ArgumentApplicationParser implements Serializable {
|
||||||
|
|
||||||
|
@ -23,7 +21,8 @@ public class ArgumentApplicationParser implements Serializable {
|
||||||
|
|
||||||
public ArgumentApplicationParser(final String json_configuration) throws Exception {
|
public ArgumentApplicationParser(final String json_configuration) throws Exception {
|
||||||
final ObjectMapper mapper = new ObjectMapper();
|
final ObjectMapper mapper = new ObjectMapper();
|
||||||
final OptionsParameter[] configuration = mapper.readValue(json_configuration, OptionsParameter[].class);
|
final OptionsParameter[] configuration =
|
||||||
|
mapper.readValue(json_configuration, OptionsParameter[].class);
|
||||||
createOptionMap(configuration);
|
createOptionMap(configuration);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -33,23 +32,26 @@ public class ArgumentApplicationParser implements Serializable {
|
||||||
|
|
||||||
private void createOptionMap(final OptionsParameter[] configuration) {
|
private void createOptionMap(final OptionsParameter[] configuration) {
|
||||||
|
|
||||||
Arrays.stream(configuration).map(conf -> {
|
Arrays.stream(configuration)
|
||||||
final Option o = new Option(conf.getParamName(), true, conf.getParamDescription());
|
.map(
|
||||||
|
conf -> {
|
||||||
|
final Option o =
|
||||||
|
new Option(
|
||||||
|
conf.getParamName(), true, conf.getParamDescription());
|
||||||
o.setLongOpt(conf.getParamLongName());
|
o.setLongOpt(conf.getParamLongName());
|
||||||
o.setRequired(conf.isParamRequired());
|
o.setRequired(conf.isParamRequired());
|
||||||
if (conf.isCompressed()) {
|
if (conf.isCompressed()) {
|
||||||
compressedValues.add(conf.getParamLongName());
|
compressedValues.add(conf.getParamLongName());
|
||||||
}
|
}
|
||||||
return o;
|
return o;
|
||||||
}).forEach(options::addOption);
|
})
|
||||||
|
.forEach(options::addOption);
|
||||||
|
|
||||||
// HelpFormatter formatter = new HelpFormatter();
|
// HelpFormatter formatter = new HelpFormatter();
|
||||||
// formatter.printHelp("myapp", null, options, null, true);
|
// formatter.printHelp("myapp", null, options, null, true);
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public static String decompressValue(final String abstractCompressed) {
|
public static String decompressValue(final String abstractCompressed) {
|
||||||
try {
|
try {
|
||||||
byte[] byteArray = Base64.decodeBase64(abstractCompressed.getBytes());
|
byte[] byteArray = Base64.decodeBase64(abstractCompressed.getBytes());
|
||||||
|
@ -74,7 +76,14 @@ public class ArgumentApplicationParser implements Serializable {
|
||||||
public void parseArgument(final String[] args) throws Exception {
|
public void parseArgument(final String[] args) throws Exception {
|
||||||
CommandLineParser parser = new BasicParser();
|
CommandLineParser parser = new BasicParser();
|
||||||
CommandLine cmd = parser.parse(options, args);
|
CommandLine cmd = parser.parse(options, args);
|
||||||
Arrays.stream(cmd.getOptions()).forEach(it -> objectMap.put(it.getLongOpt(), compressedValues.contains(it.getLongOpt())? decompressValue(it.getValue()): it.getValue()));
|
Arrays.stream(cmd.getOptions())
|
||||||
|
.forEach(
|
||||||
|
it ->
|
||||||
|
objectMap.put(
|
||||||
|
it.getLongOpt(),
|
||||||
|
compressedValues.contains(it.getLongOpt())
|
||||||
|
? decompressValue(it.getValue())
|
||||||
|
: it.getValue()));
|
||||||
}
|
}
|
||||||
|
|
||||||
public String get(final String key) {
|
public String get(final String key) {
|
||||||
|
|
|
@ -1,6 +1,5 @@
|
||||||
package eu.dnetlib.dhp.application;
|
package eu.dnetlib.dhp.application;
|
||||||
|
|
||||||
|
|
||||||
public class OptionsParameter {
|
public class OptionsParameter {
|
||||||
|
|
||||||
private String paramName;
|
private String paramName;
|
||||||
|
@ -9,8 +8,7 @@ public class OptionsParameter {
|
||||||
private boolean paramRequired;
|
private boolean paramRequired;
|
||||||
private boolean compressed;
|
private boolean compressed;
|
||||||
|
|
||||||
public OptionsParameter() {
|
public OptionsParameter() {}
|
||||||
}
|
|
||||||
|
|
||||||
public String getParamName() {
|
public String getParamName() {
|
||||||
return paramName;
|
return paramName;
|
||||||
|
|
|
@ -3,23 +3,19 @@ package eu.dnetlib.dhp.common;
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
import java.util.function.Supplier;
|
import java.util.function.Supplier;
|
||||||
|
|
||||||
/**
|
/** Provides serializable and throwing extensions to standard functional interfaces. */
|
||||||
* Provides serializable and throwing extensions to standard functional interfaces.
|
|
||||||
*/
|
|
||||||
public class FunctionalInterfaceSupport {
|
public class FunctionalInterfaceSupport {
|
||||||
|
|
||||||
private FunctionalInterfaceSupport() {
|
private FunctionalInterfaceSupport() {}
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Serializable supplier of any kind of objects. To be used withing spark processing pipelines when supplying
|
* Serializable supplier of any kind of objects. To be used withing spark processing pipelines
|
||||||
* functions externally.
|
* when supplying functions externally.
|
||||||
*
|
*
|
||||||
* @param <T>
|
* @param <T>
|
||||||
*/
|
*/
|
||||||
@FunctionalInterface
|
@FunctionalInterface
|
||||||
public interface SerializableSupplier<T> extends Supplier<T>, Serializable {
|
public interface SerializableSupplier<T> extends Supplier<T>, Serializable {}
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Extension of consumer accepting functions throwing an exception.
|
* Extension of consumer accepting functions throwing an exception.
|
||||||
|
@ -52,5 +48,4 @@ public class FunctionalInterfaceSupport {
|
||||||
public interface ThrowingRunnable<E extends Exception> {
|
public interface ThrowingRunnable<E extends Exception> {
|
||||||
void run() throws E;
|
void run() throws E;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
|
@ -1,5 +1,10 @@
|
||||||
package eu.dnetlib.dhp.common;
|
package eu.dnetlib.dhp.common;
|
||||||
|
|
||||||
|
import static eu.dnetlib.dhp.common.ThrowingSupport.rethrowAsRuntimeException;
|
||||||
|
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
import org.apache.hadoop.conf.Configuration;
|
import org.apache.hadoop.conf.Configuration;
|
||||||
import org.apache.hadoop.fs.FileStatus;
|
import org.apache.hadoop.fs.FileStatus;
|
||||||
import org.apache.hadoop.fs.FileSystem;
|
import org.apache.hadoop.fs.FileSystem;
|
||||||
|
@ -7,19 +12,26 @@ import org.apache.hadoop.fs.Path;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
import java.util.Arrays;
|
/** HDFS utility methods. */
|
||||||
import java.util.List;
|
|
||||||
import java.util.stream.Collectors;
|
|
||||||
|
|
||||||
import static eu.dnetlib.dhp.common.ThrowingSupport.rethrowAsRuntimeException;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* HDFS utility methods.
|
|
||||||
*/
|
|
||||||
public class HdfsSupport {
|
public class HdfsSupport {
|
||||||
private static final Logger logger = LoggerFactory.getLogger(HdfsSupport.class);
|
private static final Logger logger = LoggerFactory.getLogger(HdfsSupport.class);
|
||||||
|
|
||||||
private HdfsSupport() {
|
private HdfsSupport() {}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Checks a path (file or dir) exists on HDFS.
|
||||||
|
*
|
||||||
|
* @param path Path to be checked
|
||||||
|
* @param configuration Configuration of hadoop env
|
||||||
|
*/
|
||||||
|
public static boolean exists(String path, Configuration configuration) {
|
||||||
|
logger.info("Removing path: {}", path);
|
||||||
|
return rethrowAsRuntimeException(
|
||||||
|
() -> {
|
||||||
|
Path f = new Path(path);
|
||||||
|
FileSystem fileSystem = FileSystem.get(configuration);
|
||||||
|
return fileSystem.exists(f);
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -30,7 +42,8 @@ public class HdfsSupport {
|
||||||
*/
|
*/
|
||||||
public static void remove(String path, Configuration configuration) {
|
public static void remove(String path, Configuration configuration) {
|
||||||
logger.info("Removing path: {}", path);
|
logger.info("Removing path: {}", path);
|
||||||
rethrowAsRuntimeException(() -> {
|
rethrowAsRuntimeException(
|
||||||
|
() -> {
|
||||||
Path f = new Path(path);
|
Path f = new Path(path);
|
||||||
FileSystem fileSystem = FileSystem.get(configuration);
|
FileSystem fileSystem = FileSystem.get(configuration);
|
||||||
if (fileSystem.exists(f)) {
|
if (fileSystem.exists(f)) {
|
||||||
|
@ -48,8 +61,9 @@ public class HdfsSupport {
|
||||||
*/
|
*/
|
||||||
public static List<String> listFiles(String path, Configuration configuration) {
|
public static List<String> listFiles(String path, Configuration configuration) {
|
||||||
logger.info("Listing files in path: {}", path);
|
logger.info("Listing files in path: {}", path);
|
||||||
return rethrowAsRuntimeException(() -> Arrays
|
return rethrowAsRuntimeException(
|
||||||
.stream(FileSystem.get(configuration).listStatus(new Path(path)))
|
() ->
|
||||||
|
Arrays.stream(FileSystem.get(configuration).listStatus(new Path(path)))
|
||||||
.filter(FileStatus::isDirectory)
|
.filter(FileStatus::isDirectory)
|
||||||
.map(x -> x.getPath().toString())
|
.map(x -> x.getPath().toString())
|
||||||
.collect(Collectors.toList()));
|
.collect(Collectors.toList()));
|
||||||
|
|
|
@ -1,58 +1,68 @@
|
||||||
package eu.dnetlib.dhp.common;
|
package eu.dnetlib.dhp.common;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.common.FunctionalInterfaceSupport.ThrowingConsumer;
|
import eu.dnetlib.dhp.common.FunctionalInterfaceSupport.ThrowingConsumer;
|
||||||
|
import java.util.Objects;
|
||||||
|
import java.util.function.Function;
|
||||||
import org.apache.spark.SparkConf;
|
import org.apache.spark.SparkConf;
|
||||||
import org.apache.spark.sql.SparkSession;
|
import org.apache.spark.sql.SparkSession;
|
||||||
|
|
||||||
import java.util.Objects;
|
/** SparkSession utility methods. */
|
||||||
import java.util.function.Function;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* SparkSession utility methods.
|
|
||||||
*/
|
|
||||||
public class SparkSessionSupport {
|
public class SparkSessionSupport {
|
||||||
|
|
||||||
private SparkSessionSupport() {
|
private SparkSessionSupport() {}
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Runs a given function using SparkSession created using default builder and supplied SparkConf. Stops SparkSession
|
* Runs a given function using SparkSession created using default builder and supplied
|
||||||
* when SparkSession is managed. Allows to reuse SparkSession created externally.
|
* SparkConf. Stops SparkSession when SparkSession is managed. Allows to reuse SparkSession
|
||||||
|
* created externally.
|
||||||
*
|
*
|
||||||
* @param conf SparkConf instance
|
* @param conf SparkConf instance
|
||||||
* @param isSparkSessionManaged When true will stop SparkSession
|
* @param isSparkSessionManaged When true will stop SparkSession
|
||||||
* @param fn Consumer to be applied to constructed SparkSession
|
* @param fn Consumer to be applied to constructed SparkSession
|
||||||
*/
|
*/
|
||||||
public static void runWithSparkSession(SparkConf conf,
|
public static void runWithSparkSession(
|
||||||
|
SparkConf conf,
|
||||||
Boolean isSparkSessionManaged,
|
Boolean isSparkSessionManaged,
|
||||||
ThrowingConsumer<SparkSession, Exception> fn) {
|
ThrowingConsumer<SparkSession, Exception> fn) {
|
||||||
runWithSparkSession(c -> SparkSession.builder().config(c).getOrCreate(), conf, isSparkSessionManaged, fn);
|
runWithSparkSession(
|
||||||
|
c -> SparkSession.builder().config(c).getOrCreate(),
|
||||||
|
conf,
|
||||||
|
isSparkSessionManaged,
|
||||||
|
fn);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Runs a given function using SparkSession created with hive support and using default builder and supplied SparkConf.
|
* Runs a given function using SparkSession created with hive support and using default builder
|
||||||
* Stops SparkSession when SparkSession is managed. Allows to reuse SparkSession created externally.
|
* and supplied SparkConf. Stops SparkSession when SparkSession is managed. Allows to reuse
|
||||||
|
* SparkSession created externally.
|
||||||
*
|
*
|
||||||
* @param conf SparkConf instance
|
* @param conf SparkConf instance
|
||||||
* @param isSparkSessionManaged When true will stop SparkSession
|
* @param isSparkSessionManaged When true will stop SparkSession
|
||||||
* @param fn Consumer to be applied to constructed SparkSession
|
* @param fn Consumer to be applied to constructed SparkSession
|
||||||
*/
|
*/
|
||||||
public static void runWithSparkHiveSession(SparkConf conf,
|
public static void runWithSparkHiveSession(
|
||||||
|
SparkConf conf,
|
||||||
Boolean isSparkSessionManaged,
|
Boolean isSparkSessionManaged,
|
||||||
ThrowingConsumer<SparkSession, Exception> fn) {
|
ThrowingConsumer<SparkSession, Exception> fn) {
|
||||||
runWithSparkSession(c -> SparkSession.builder().config(c).enableHiveSupport().getOrCreate(), conf, isSparkSessionManaged, fn);
|
runWithSparkSession(
|
||||||
|
c -> SparkSession.builder().config(c).enableHiveSupport().getOrCreate(),
|
||||||
|
conf,
|
||||||
|
isSparkSessionManaged,
|
||||||
|
fn);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Runs a given function using SparkSession created using supplied builder and supplied SparkConf. Stops SparkSession
|
* Runs a given function using SparkSession created using supplied builder and supplied
|
||||||
* when SparkSession is managed. Allows to reuse SparkSession created externally.
|
* SparkConf. Stops SparkSession when SparkSession is managed. Allows to reuse SparkSession
|
||||||
|
* created externally.
|
||||||
*
|
*
|
||||||
* @param sparkSessionBuilder Builder of SparkSession
|
* @param sparkSessionBuilder Builder of SparkSession
|
||||||
* @param conf SparkConf instance
|
* @param conf SparkConf instance
|
||||||
* @param isSparkSessionManaged When true will stop SparkSession
|
* @param isSparkSessionManaged When true will stop SparkSession
|
||||||
* @param fn Consumer to be applied to constructed SparkSession
|
* @param fn Consumer to be applied to constructed SparkSession
|
||||||
*/
|
*/
|
||||||
public static void runWithSparkSession(Function<SparkConf, SparkSession> sparkSessionBuilder,
|
public static void runWithSparkSession(
|
||||||
|
Function<SparkConf, SparkSession> sparkSessionBuilder,
|
||||||
SparkConf conf,
|
SparkConf conf,
|
||||||
Boolean isSparkSessionManaged,
|
Boolean isSparkSessionManaged,
|
||||||
ThrowingConsumer<SparkSession, Exception> fn) {
|
ThrowingConsumer<SparkSession, Exception> fn) {
|
||||||
|
|
|
@ -3,13 +3,10 @@ package eu.dnetlib.dhp.common;
|
||||||
import eu.dnetlib.dhp.common.FunctionalInterfaceSupport.ThrowingRunnable;
|
import eu.dnetlib.dhp.common.FunctionalInterfaceSupport.ThrowingRunnable;
|
||||||
import eu.dnetlib.dhp.common.FunctionalInterfaceSupport.ThrowingSupplier;
|
import eu.dnetlib.dhp.common.FunctionalInterfaceSupport.ThrowingSupplier;
|
||||||
|
|
||||||
/**
|
/** Exception handling utility methods. */
|
||||||
* Exception handling utility methods.
|
|
||||||
*/
|
|
||||||
public class ThrowingSupport {
|
public class ThrowingSupport {
|
||||||
|
|
||||||
private ThrowingSupport() {
|
private ThrowingSupport() {}
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Executes given runnable and rethrows any exceptions as RuntimeException.
|
* Executes given runnable and rethrows any exceptions as RuntimeException.
|
||||||
|
@ -32,7 +29,8 @@ public class ThrowingSupport {
|
||||||
* @param msg Message to be set for rethrown exception
|
* @param msg Message to be set for rethrown exception
|
||||||
* @param <E> Type of exception thrown
|
* @param <E> Type of exception thrown
|
||||||
*/
|
*/
|
||||||
public static <E extends Exception> void rethrowAsRuntimeException(ThrowingRunnable<E> fn, String msg) {
|
public static <E extends Exception> void rethrowAsRuntimeException(
|
||||||
|
ThrowingRunnable<E> fn, String msg) {
|
||||||
try {
|
try {
|
||||||
fn.run();
|
fn.run();
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
|
@ -65,12 +63,12 @@ public class ThrowingSupport {
|
||||||
* @param <E> Type of exception thrown
|
* @param <E> Type of exception thrown
|
||||||
* @return Result of supplier execution
|
* @return Result of supplier execution
|
||||||
*/
|
*/
|
||||||
public static <T, E extends Exception> T rethrowAsRuntimeException(ThrowingSupplier<T, E> fn, String msg) {
|
public static <T, E extends Exception> T rethrowAsRuntimeException(
|
||||||
|
ThrowingSupplier<T, E> fn, String msg) {
|
||||||
try {
|
try {
|
||||||
return fn.get();
|
return fn.get();
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
throw new RuntimeException(msg, e);
|
throw new RuntimeException(msg, e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
|
@ -1,59 +1,45 @@
|
||||||
package eu.dnetlib.dhp.model.mdstore;
|
package eu.dnetlib.dhp.model.mdstore;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.utils.DHPUtils;
|
import eu.dnetlib.dhp.utils.DHPUtils;
|
||||||
|
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
|
|
||||||
|
/** This class models a record inside the new Metadata store collection on HDFS * */
|
||||||
/**
|
|
||||||
* This class models a record inside the new Metadata store collection on HDFS *
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
public class MetadataRecord implements Serializable {
|
public class MetadataRecord implements Serializable {
|
||||||
|
|
||||||
/**
|
/** The D-Net Identifier associated to the record */
|
||||||
* The D-Net Identifier associated to the record
|
|
||||||
*/
|
|
||||||
private String id;
|
private String id;
|
||||||
|
|
||||||
/**
|
/** The original Identifier of the record */
|
||||||
* The original Identifier of the record
|
|
||||||
*/
|
|
||||||
private String originalId;
|
private String originalId;
|
||||||
|
|
||||||
|
/** The encoding of the record, should be JSON or XML */
|
||||||
/**
|
|
||||||
* The encoding of the record, should be JSON or XML
|
|
||||||
*/
|
|
||||||
private String encoding;
|
private String encoding;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The information about the provenance of the record see @{@link Provenance}
|
* The information about the provenance of the record see @{@link Provenance} for the model of
|
||||||
* for the model of this information
|
* this information
|
||||||
*/
|
*/
|
||||||
private Provenance provenance;
|
private Provenance provenance;
|
||||||
|
|
||||||
/**
|
/** The content of the metadata */
|
||||||
* The content of the metadata
|
|
||||||
*/
|
|
||||||
private String body;
|
private String body;
|
||||||
|
|
||||||
/**
|
/** the date when the record has been stored */
|
||||||
* the date when the record has been stored
|
|
||||||
*/
|
|
||||||
private long dateOfCollection;
|
private long dateOfCollection;
|
||||||
|
|
||||||
/**
|
/** the date when the record has been stored */
|
||||||
* the date when the record has been stored
|
|
||||||
*/
|
|
||||||
private long dateOfTransformation;
|
private long dateOfTransformation;
|
||||||
|
|
||||||
|
|
||||||
public MetadataRecord() {
|
public MetadataRecord() {
|
||||||
this.dateOfCollection = System.currentTimeMillis();
|
this.dateOfCollection = System.currentTimeMillis();
|
||||||
}
|
}
|
||||||
|
|
||||||
public MetadataRecord(String originalId, String encoding, Provenance provenance, String body, long dateOfCollection) {
|
public MetadataRecord(
|
||||||
|
String originalId,
|
||||||
|
String encoding,
|
||||||
|
Provenance provenance,
|
||||||
|
String body,
|
||||||
|
long dateOfCollection) {
|
||||||
|
|
||||||
this.originalId = originalId;
|
this.originalId = originalId;
|
||||||
this.encoding = encoding;
|
this.encoding = encoding;
|
||||||
|
@ -71,7 +57,6 @@ public class MetadataRecord implements Serializable {
|
||||||
this.id = id;
|
this.id = id;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public String getOriginalId() {
|
public String getOriginalId() {
|
||||||
return originalId;
|
return originalId;
|
||||||
}
|
}
|
||||||
|
@ -96,7 +81,6 @@ public class MetadataRecord implements Serializable {
|
||||||
this.provenance = provenance;
|
this.provenance = provenance;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public String getBody() {
|
public String getBody() {
|
||||||
return body;
|
return body;
|
||||||
}
|
}
|
||||||
|
@ -127,7 +111,6 @@ public class MetadataRecord implements Serializable {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
return ((MetadataRecord) o).getId().equalsIgnoreCase(id);
|
return ((MetadataRecord) o).getId().equalsIgnoreCase(id);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -2,27 +2,20 @@ package eu.dnetlib.dhp.model.mdstore;
|
||||||
|
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @author Sandro La Bruzzo
|
* @author Sandro La Bruzzo
|
||||||
*
|
* <p>Provenace class models the provenance of the record in the metadataStore It contains the
|
||||||
* Provenace class models the provenance of the record in the metadataStore
|
* identifier and the name of the datasource that gives the record
|
||||||
* It contains the identifier and the name of the datasource that gives the
|
|
||||||
* record
|
|
||||||
*
|
|
||||||
*/
|
*/
|
||||||
public class Provenance implements Serializable {
|
public class Provenance implements Serializable {
|
||||||
|
|
||||||
private String datasourceId;
|
private String datasourceId;
|
||||||
|
|
||||||
|
|
||||||
private String datasourceName;
|
private String datasourceName;
|
||||||
|
|
||||||
private String nsPrefix;
|
private String nsPrefix;
|
||||||
|
|
||||||
public Provenance() {
|
public Provenance() {}
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
public Provenance(String datasourceId, String datasourceName, String nsPrefix) {
|
public Provenance(String datasourceId, String datasourceName, String nsPrefix) {
|
||||||
this.datasourceId = datasourceId;
|
this.datasourceId = datasourceId;
|
||||||
|
|
|
@ -1,20 +1,17 @@
|
||||||
package eu.dnetlib.dhp.parser.utility;
|
package eu.dnetlib.dhp.parser.utility;
|
||||||
|
|
||||||
|
import com.ximpleware.AutoPilot;
|
||||||
|
import com.ximpleware.VTDNav;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
|
/** Created by sandro on 9/29/16. */
|
||||||
import com.ximpleware.AutoPilot;
|
|
||||||
import com.ximpleware.VTDNav;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Created by sandro on 9/29/16.
|
|
||||||
*/
|
|
||||||
public class VtdUtilityParser {
|
public class VtdUtilityParser {
|
||||||
|
|
||||||
public static List<Node> getTextValuesWithAttributes(final AutoPilot ap, final VTDNav vn, final String xpath, final List<String> attributes)
|
public static List<Node> getTextValuesWithAttributes(
|
||||||
|
final AutoPilot ap, final VTDNav vn, final String xpath, final List<String> attributes)
|
||||||
throws VtdException {
|
throws VtdException {
|
||||||
final List<Node> results = new ArrayList<>();
|
final List<Node> results = new ArrayList<>();
|
||||||
try {
|
try {
|
||||||
|
@ -35,11 +32,13 @@ public class VtdUtilityParser {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private static Map<String, String> getAttributes(final VTDNav vn, final List<String> attributes) {
|
private static Map<String, String> getAttributes(
|
||||||
|
final VTDNav vn, final List<String> attributes) {
|
||||||
final Map<String, String> currentAttributes = new HashMap<>();
|
final Map<String, String> currentAttributes = new HashMap<>();
|
||||||
if (attributes != null) {
|
if (attributes != null) {
|
||||||
|
|
||||||
attributes.forEach(attributeKey -> {
|
attributes.forEach(
|
||||||
|
attributeKey -> {
|
||||||
try {
|
try {
|
||||||
int attr = vn.getAttrVal(attributeKey);
|
int attr = vn.getAttrVal(attributeKey);
|
||||||
if (attr > -1) {
|
if (attr > -1) {
|
||||||
|
@ -53,7 +52,8 @@ public class VtdUtilityParser {
|
||||||
return currentAttributes;
|
return currentAttributes;
|
||||||
}
|
}
|
||||||
|
|
||||||
public static List<String> getTextValue(final AutoPilot ap, final VTDNav vn, final String xpath) throws VtdException {
|
public static List<String> getTextValue(final AutoPilot ap, final VTDNav vn, final String xpath)
|
||||||
|
throws VtdException {
|
||||||
List<String> results = new ArrayList<>();
|
List<String> results = new ArrayList<>();
|
||||||
try {
|
try {
|
||||||
ap.selectXPath(xpath);
|
ap.selectXPath(xpath);
|
||||||
|
@ -67,13 +67,13 @@ public class VtdUtilityParser {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public static String getSingleValue(final AutoPilot ap, final VTDNav nav, final String xpath) throws VtdException {
|
public static String getSingleValue(final AutoPilot ap, final VTDNav nav, final String xpath)
|
||||||
|
throws VtdException {
|
||||||
try {
|
try {
|
||||||
ap.selectXPath(xpath);
|
ap.selectXPath(xpath);
|
||||||
while (ap.evalXPath() != -1) {
|
while (ap.evalXPath() != -1) {
|
||||||
int it = nav.getText();
|
int it = nav.getText();
|
||||||
if (it > -1)
|
if (it > -1) return nav.toNormalizedString(it);
|
||||||
return nav.toNormalizedString(it);
|
|
||||||
}
|
}
|
||||||
return null;
|
return null;
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
|
@ -103,5 +103,4 @@ public class VtdUtilityParser {
|
||||||
this.attributes = attributes;
|
this.attributes = attributes;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,18 +1,16 @@
|
||||||
package eu.dnetlib.dhp.utils;
|
package eu.dnetlib.dhp.utils;
|
||||||
|
|
||||||
import com.jayway.jsonpath.JsonPath;
|
import com.jayway.jsonpath.JsonPath;
|
||||||
import net.minidev.json.JSONArray;
|
|
||||||
import org.apache.commons.codec.binary.Base64;
|
|
||||||
import org.apache.commons.codec.binary.Base64OutputStream;
|
|
||||||
import org.apache.commons.codec.binary.Hex;
|
|
||||||
import org.apache.commons.lang3.StringUtils;
|
|
||||||
|
|
||||||
import java.io.ByteArrayInputStream;
|
import java.io.ByteArrayInputStream;
|
||||||
import java.io.ByteArrayOutputStream;
|
import java.io.ByteArrayOutputStream;
|
||||||
import java.nio.charset.StandardCharsets;
|
import java.nio.charset.StandardCharsets;
|
||||||
import java.security.MessageDigest;
|
import java.security.MessageDigest;
|
||||||
import java.util.zip.GZIPInputStream;
|
import java.util.zip.GZIPInputStream;
|
||||||
import java.util.zip.GZIPOutputStream;
|
import java.util.zip.GZIPOutputStream;
|
||||||
|
import net.minidev.json.JSONArray;
|
||||||
|
import org.apache.commons.codec.binary.Base64;
|
||||||
|
import org.apache.commons.codec.binary.Base64OutputStream;
|
||||||
|
import org.apache.commons.codec.binary.Hex;
|
||||||
|
|
||||||
public class DHPUtils {
|
public class DHPUtils {
|
||||||
|
|
||||||
|
@ -32,7 +30,8 @@ public class DHPUtils {
|
||||||
}
|
}
|
||||||
|
|
||||||
public static String compressString(final String input) {
|
public static String compressString(final String input) {
|
||||||
try ( ByteArrayOutputStream out = new ByteArrayOutputStream(); Base64OutputStream b64os = new Base64OutputStream(out)) {
|
try (ByteArrayOutputStream out = new ByteArrayOutputStream();
|
||||||
|
Base64OutputStream b64os = new Base64OutputStream(out)) {
|
||||||
GZIPOutputStream gzip = new GZIPOutputStream(b64os);
|
GZIPOutputStream gzip = new GZIPOutputStream(b64os);
|
||||||
gzip.write(input.getBytes(StandardCharsets.UTF_8));
|
gzip.write(input.getBytes(StandardCharsets.UTF_8));
|
||||||
gzip.close();
|
gzip.close();
|
||||||
|
@ -42,11 +41,11 @@ public class DHPUtils {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public static String decompressString(final String input) {
|
public static String decompressString(final String input) {
|
||||||
byte[] byteArray = Base64.decodeBase64(input.getBytes());
|
byte[] byteArray = Base64.decodeBase64(input.getBytes());
|
||||||
int len;
|
int len;
|
||||||
try (GZIPInputStream gis = new GZIPInputStream(new ByteArrayInputStream((byteArray))); ByteArrayOutputStream bos = new ByteArrayOutputStream(byteArray.length)) {
|
try (GZIPInputStream gis = new GZIPInputStream(new ByteArrayInputStream((byteArray)));
|
||||||
|
ByteArrayOutputStream bos = new ByteArrayOutputStream(byteArray.length)) {
|
||||||
byte[] buffer = new byte[1024];
|
byte[] buffer = new byte[1024];
|
||||||
while ((len = gis.read(buffer)) != -1) {
|
while ((len = gis.read(buffer)) != -1) {
|
||||||
bos.write(buffer, 0, len);
|
bos.write(buffer, 0, len);
|
||||||
|
@ -55,14 +54,12 @@ public class DHPUtils {
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public static String getJPathString(final String jsonPath, final String json) {
|
public static String getJPathString(final String jsonPath, final String json) {
|
||||||
try {
|
try {
|
||||||
Object o = JsonPath.read(json, jsonPath);
|
Object o = JsonPath.read(json, jsonPath);
|
||||||
if (o instanceof String)
|
if (o instanceof String) return (String) o;
|
||||||
return (String) o;
|
|
||||||
if (o instanceof JSONArray && ((JSONArray) o).size() > 0)
|
if (o instanceof JSONArray && ((JSONArray) o).size() > 0)
|
||||||
return (String) ((JSONArray) o).get(0);
|
return (String) ((JSONArray) o).get(0);
|
||||||
return o.toString();
|
return o.toString();
|
||||||
|
@ -70,5 +67,4 @@ public class DHPUtils {
|
||||||
return "";
|
return "";
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -9,10 +9,13 @@ import net.sf.saxon.trans.XPathException;
|
||||||
|
|
||||||
public abstract class AbstractExtensionFunction extends ExtensionFunctionDefinition {
|
public abstract class AbstractExtensionFunction extends ExtensionFunctionDefinition {
|
||||||
|
|
||||||
public static String DEFAULT_SAXON_EXT_NS_URI = "http://www.d-net.research-infrastructures.eu/saxon-extension";
|
public static String DEFAULT_SAXON_EXT_NS_URI =
|
||||||
|
"http://www.d-net.research-infrastructures.eu/saxon-extension";
|
||||||
|
|
||||||
public abstract String getName();
|
public abstract String getName();
|
||||||
public abstract Sequence doCall(XPathContext context, Sequence[] arguments) throws XPathException;
|
|
||||||
|
public abstract Sequence doCall(XPathContext context, Sequence[] arguments)
|
||||||
|
throws XPathException;
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public StructuredQName getFunctionQName() {
|
public StructuredQName getFunctionQName() {
|
||||||
|
@ -28,5 +31,4 @@ public abstract class AbstractExtensionFunction extends ExtensionFunctionDefinit
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
|
@ -1,5 +1,9 @@
|
||||||
package eu.dnetlib.dhp.utils.saxon;
|
package eu.dnetlib.dhp.utils.saxon;
|
||||||
|
|
||||||
|
import java.text.ParseException;
|
||||||
|
import java.text.SimpleDateFormat;
|
||||||
|
import java.util.Calendar;
|
||||||
|
import java.util.GregorianCalendar;
|
||||||
import net.sf.saxon.expr.XPathContext;
|
import net.sf.saxon.expr.XPathContext;
|
||||||
import net.sf.saxon.om.Item;
|
import net.sf.saxon.om.Item;
|
||||||
import net.sf.saxon.om.Sequence;
|
import net.sf.saxon.om.Sequence;
|
||||||
|
@ -7,11 +11,6 @@ import net.sf.saxon.trans.XPathException;
|
||||||
import net.sf.saxon.value.SequenceType;
|
import net.sf.saxon.value.SequenceType;
|
||||||
import net.sf.saxon.value.StringValue;
|
import net.sf.saxon.value.StringValue;
|
||||||
|
|
||||||
import java.text.ParseException;
|
|
||||||
import java.text.SimpleDateFormat;
|
|
||||||
import java.util.Calendar;
|
|
||||||
import java.util.GregorianCalendar;
|
|
||||||
|
|
||||||
public class ExtractYear extends AbstractExtensionFunction {
|
public class ExtractYear extends AbstractExtensionFunction {
|
||||||
|
|
||||||
private static final String[] dateFormats = {"yyyy-MM-dd", "yyyy/MM/dd"};
|
private static final String[] dateFormats = {"yyyy-MM-dd", "yyyy/MM/dd"};
|
||||||
|
@ -60,7 +59,8 @@ public class ExtractYear extends AbstractExtensionFunction {
|
||||||
c.setTime(new SimpleDateFormat(format).parse(s));
|
c.setTime(new SimpleDateFormat(format).parse(s));
|
||||||
String year = String.valueOf(c.get(Calendar.YEAR));
|
String year = String.valueOf(c.get(Calendar.YEAR));
|
||||||
return year;
|
return year;
|
||||||
} catch (ParseException e) {}
|
} catch (ParseException e) {
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return "";
|
return "";
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,18 +1,19 @@
|
||||||
package eu.dnetlib.dhp.utils.saxon;
|
package eu.dnetlib.dhp.utils.saxon;
|
||||||
|
|
||||||
|
import java.text.ParseException;
|
||||||
|
import java.text.SimpleDateFormat;
|
||||||
|
import java.util.Date;
|
||||||
import net.sf.saxon.expr.XPathContext;
|
import net.sf.saxon.expr.XPathContext;
|
||||||
import net.sf.saxon.om.Sequence;
|
import net.sf.saxon.om.Sequence;
|
||||||
import net.sf.saxon.trans.XPathException;
|
import net.sf.saxon.trans.XPathException;
|
||||||
import net.sf.saxon.value.SequenceType;
|
import net.sf.saxon.value.SequenceType;
|
||||||
import net.sf.saxon.value.StringValue;
|
import net.sf.saxon.value.StringValue;
|
||||||
|
|
||||||
import java.text.ParseException;
|
|
||||||
import java.text.SimpleDateFormat;
|
|
||||||
import java.util.Date;
|
|
||||||
|
|
||||||
public class NormalizeDate extends AbstractExtensionFunction {
|
public class NormalizeDate extends AbstractExtensionFunction {
|
||||||
|
|
||||||
private static final String[] normalizeDateFormats = { "yyyy-MM-dd'T'hh:mm:ss", "yyyy-MM-dd", "yyyy/MM/dd", "yyyy" };
|
private static final String[] normalizeDateFormats = {
|
||||||
|
"yyyy-MM-dd'T'hh:mm:ss", "yyyy-MM-dd", "yyyy/MM/dd", "yyyy"
|
||||||
|
};
|
||||||
|
|
||||||
private static final String normalizeOutFormat = new String("yyyy-MM-dd'T'hh:mm:ss'Z'");
|
private static final String normalizeOutFormat = new String("yyyy-MM-dd'T'hh:mm:ss'Z'");
|
||||||
|
|
||||||
|
@ -58,9 +59,9 @@ public class NormalizeDate extends AbstractExtensionFunction {
|
||||||
Date parse = new SimpleDateFormat(format).parse(date);
|
Date parse = new SimpleDateFormat(format).parse(date);
|
||||||
String res = new SimpleDateFormat(normalizeOutFormat).format(parse);
|
String res = new SimpleDateFormat(normalizeOutFormat).format(parse);
|
||||||
return res;
|
return res;
|
||||||
} catch (ParseException e) {}
|
} catch (ParseException e) {
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return "";
|
return "";
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -24,7 +24,8 @@ public class PickFirst extends AbstractExtensionFunction {
|
||||||
final String s1 = getValue(arguments[0]);
|
final String s1 = getValue(arguments[0]);
|
||||||
final String s2 = getValue(arguments[1]);
|
final String s2 = getValue(arguments[1]);
|
||||||
|
|
||||||
return new StringValue(StringUtils.isNotBlank(s1) ? s1 : StringUtils.isNotBlank(s2) ? s2 : "");
|
return new StringValue(
|
||||||
|
StringUtils.isNotBlank(s1) ? s1 : StringUtils.isNotBlank(s2) ? s2 : "");
|
||||||
}
|
}
|
||||||
|
|
||||||
private String getValue(final Sequence arg) throws XPathException {
|
private String getValue(final Sequence arg) throws XPathException {
|
||||||
|
@ -56,5 +57,4 @@ public class PickFirst extends AbstractExtensionFunction {
|
||||||
public SequenceType getResultType(SequenceType[] suppliedArgumentTypes) {
|
public SequenceType getResultType(SequenceType[] suppliedArgumentTypes) {
|
||||||
return SequenceType.SINGLE_STRING;
|
return SequenceType.SINGLE_STRING;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,17 +1,17 @@
|
||||||
package eu.dnetlib.dhp.utils.saxon;
|
package eu.dnetlib.dhp.utils.saxon;
|
||||||
|
|
||||||
import net.sf.saxon.Configuration;
|
import java.io.StringReader;
|
||||||
import net.sf.saxon.TransformerFactoryImpl;
|
|
||||||
|
|
||||||
import javax.xml.transform.Transformer;
|
import javax.xml.transform.Transformer;
|
||||||
import javax.xml.transform.TransformerException;
|
import javax.xml.transform.TransformerException;
|
||||||
import javax.xml.transform.stream.StreamSource;
|
import javax.xml.transform.stream.StreamSource;
|
||||||
import java.io.StringReader;
|
import net.sf.saxon.Configuration;
|
||||||
|
import net.sf.saxon.TransformerFactoryImpl;
|
||||||
|
|
||||||
public class SaxonTransformerFactory {
|
public class SaxonTransformerFactory {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Creates the index record transformer from the given XSLT
|
* Creates the index record transformer from the given XSLT
|
||||||
|
*
|
||||||
* @param xslt
|
* @param xslt
|
||||||
* @return
|
* @return
|
||||||
* @throws TransformerException
|
* @throws TransformerException
|
||||||
|
@ -26,5 +26,4 @@ public class SaxonTransformerFactory {
|
||||||
|
|
||||||
return factory.newTransformer(new StreamSource(new StringReader(xslt)));
|
return factory.newTransformer(new StreamSource(new StringReader(xslt)));
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -2,7 +2,6 @@ package eu.dnetlib.message;
|
||||||
|
|
||||||
import com.fasterxml.jackson.core.JsonProcessingException;
|
import com.fasterxml.jackson.core.JsonProcessingException;
|
||||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
|
@ -16,20 +15,12 @@ public class Message {
|
||||||
|
|
||||||
private Map<String, String> body;
|
private Map<String, String> body;
|
||||||
|
|
||||||
|
|
||||||
public static Message fromJson(final String json) throws IOException {
|
public static Message fromJson(final String json) throws IOException {
|
||||||
final ObjectMapper jsonMapper = new ObjectMapper();
|
final ObjectMapper jsonMapper = new ObjectMapper();
|
||||||
return jsonMapper.readValue(json, Message.class);
|
return jsonMapper.readValue(json, Message.class);
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public Message() {}
|
||||||
public Message() {
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
public Message(String workflowId, String jobName, MessageType type, Map<String, String> body) {
|
public Message(String workflowId, String jobName, MessageType type, Map<String, String> body) {
|
||||||
this.workflowId = workflowId;
|
this.workflowId = workflowId;
|
||||||
|
|
|
@ -4,7 +4,6 @@ import com.rabbitmq.client.AMQP;
|
||||||
import com.rabbitmq.client.Channel;
|
import com.rabbitmq.client.Channel;
|
||||||
import com.rabbitmq.client.DefaultConsumer;
|
import com.rabbitmq.client.DefaultConsumer;
|
||||||
import com.rabbitmq.client.Envelope;
|
import com.rabbitmq.client.Envelope;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.nio.charset.StandardCharsets;
|
import java.nio.charset.StandardCharsets;
|
||||||
import java.util.concurrent.LinkedBlockingQueue;
|
import java.util.concurrent.LinkedBlockingQueue;
|
||||||
|
@ -13,7 +12,6 @@ public class MessageConsumer extends DefaultConsumer {
|
||||||
|
|
||||||
final LinkedBlockingQueue<Message> queueMessages;
|
final LinkedBlockingQueue<Message> queueMessages;
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Constructs a new instance and records its association to the passed-in channel.
|
* Constructs a new instance and records its association to the passed-in channel.
|
||||||
*
|
*
|
||||||
|
@ -25,9 +23,10 @@ public class MessageConsumer extends DefaultConsumer {
|
||||||
this.queueMessages = queueMessages;
|
this.queueMessages = queueMessages;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void handleDelivery(String consumerTag, Envelope envelope, AMQP.BasicProperties properties, byte[] body) throws IOException {
|
public void handleDelivery(
|
||||||
|
String consumerTag, Envelope envelope, AMQP.BasicProperties properties, byte[] body)
|
||||||
|
throws IOException {
|
||||||
final String json = new String(body, StandardCharsets.UTF_8);
|
final String json = new String(body, StandardCharsets.UTF_8);
|
||||||
Message message = Message.fromJson(json);
|
Message message = Message.fromJson(json);
|
||||||
try {
|
try {
|
||||||
|
|
|
@ -3,8 +3,6 @@ package eu.dnetlib.message;
|
||||||
import com.rabbitmq.client.Channel;
|
import com.rabbitmq.client.Channel;
|
||||||
import com.rabbitmq.client.Connection;
|
import com.rabbitmq.client.Connection;
|
||||||
import com.rabbitmq.client.ConnectionFactory;
|
import com.rabbitmq.client.ConnectionFactory;
|
||||||
import sun.rmi.runtime.Log;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
@ -27,17 +25,26 @@ public class MessageManager {
|
||||||
|
|
||||||
private boolean autodelete;
|
private boolean autodelete;
|
||||||
|
|
||||||
final private LinkedBlockingQueue<Message> queueMessages;
|
private final LinkedBlockingQueue<Message> queueMessages;
|
||||||
|
|
||||||
public MessageManager(String messageHost, String username, String password, final LinkedBlockingQueue<Message> queueMessages) {
|
public MessageManager(
|
||||||
|
String messageHost,
|
||||||
|
String username,
|
||||||
|
String password,
|
||||||
|
final LinkedBlockingQueue<Message> queueMessages) {
|
||||||
this.queueMessages = queueMessages;
|
this.queueMessages = queueMessages;
|
||||||
this.messageHost = messageHost;
|
this.messageHost = messageHost;
|
||||||
this.username = username;
|
this.username = username;
|
||||||
this.password = password;
|
this.password = password;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public MessageManager(
|
||||||
public MessageManager(String messageHost, String username, String password, boolean durable, boolean autodelete, final LinkedBlockingQueue<Message> queueMessages) {
|
String messageHost,
|
||||||
|
String username,
|
||||||
|
String password,
|
||||||
|
boolean durable,
|
||||||
|
boolean autodelete,
|
||||||
|
final LinkedBlockingQueue<Message> queueMessages) {
|
||||||
this.queueMessages = queueMessages;
|
this.queueMessages = queueMessages;
|
||||||
this.messageHost = messageHost;
|
this.messageHost = messageHost;
|
||||||
this.username = username;
|
this.username = username;
|
||||||
|
@ -55,7 +62,12 @@ public class MessageManager {
|
||||||
return factory.newConnection();
|
return factory.newConnection();
|
||||||
}
|
}
|
||||||
|
|
||||||
private Channel createChannel(final Connection connection, final String queueName, final boolean durable, final boolean autodelete ) throws Exception {
|
private Channel createChannel(
|
||||||
|
final Connection connection,
|
||||||
|
final String queueName,
|
||||||
|
final boolean durable,
|
||||||
|
final boolean autodelete)
|
||||||
|
throws Exception {
|
||||||
Map<String, Object> args = new HashMap<>();
|
Map<String, Object> args = new HashMap<>();
|
||||||
args.put("x-message-ttl", 10000);
|
args.put("x-message-ttl", 10000);
|
||||||
Channel channel = connection.createChannel();
|
Channel channel = connection.createChannel();
|
||||||
|
@ -63,7 +75,8 @@ public class MessageManager {
|
||||||
return channel;
|
return channel;
|
||||||
}
|
}
|
||||||
|
|
||||||
private Channel getOrCreateChannel(final String queueName, boolean durable, boolean autodelete) throws Exception {
|
private Channel getOrCreateChannel(final String queueName, boolean durable, boolean autodelete)
|
||||||
|
throws Exception {
|
||||||
if (channels.containsKey(queueName)) {
|
if (channels.containsKey(queueName)) {
|
||||||
return channels.get(queueName);
|
return channels.get(queueName);
|
||||||
}
|
}
|
||||||
|
@ -75,10 +88,10 @@ public class MessageManager {
|
||||||
return channels.get(queueName);
|
return channels.get(queueName);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
public void close() throws IOException {
|
public void close() throws IOException {
|
||||||
channels.values().forEach(ch-> {
|
channels.values()
|
||||||
|
.forEach(
|
||||||
|
ch -> {
|
||||||
try {
|
try {
|
||||||
ch.close();
|
ch.close();
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
|
@ -99,7 +112,9 @@ public class MessageManager {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean sendMessage(final Message message, String queueName, boolean durable_var, boolean autodelete_var) throws Exception {
|
public boolean sendMessage(
|
||||||
|
final Message message, String queueName, boolean durable_var, boolean autodelete_var)
|
||||||
|
throws Exception {
|
||||||
try {
|
try {
|
||||||
Channel channel = getOrCreateChannel(queueName, durable_var, autodelete_var);
|
Channel channel = getOrCreateChannel(queueName, durable_var, autodelete_var);
|
||||||
channel.basicPublish("", queueName, null, message.toString().getBytes());
|
channel.basicPublish("", queueName, null, message.toString().getBytes());
|
||||||
|
@ -109,7 +124,9 @@ public class MessageManager {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public void startConsumingMessage(final String queueName, final boolean durable, final boolean autodelete) throws Exception{
|
public void startConsumingMessage(
|
||||||
|
final String queueName, final boolean durable, final boolean autodelete)
|
||||||
|
throws Exception {
|
||||||
|
|
||||||
Channel channel = createChannel(createConnection(), queueName, durable, autodelete);
|
Channel channel = createChannel(createConnection(), queueName, durable, autodelete);
|
||||||
channel.basicConsume(queueName, false, new MessageConsumer(channel, queueMessages));
|
channel.basicConsume(queueName, false, new MessageConsumer(channel, queueMessages));
|
||||||
|
|
|
@ -1,8 +1,6 @@
|
||||||
package eu.dnetlib.message;
|
package eu.dnetlib.message;
|
||||||
|
|
||||||
public enum MessageType {
|
public enum MessageType {
|
||||||
|
|
||||||
ONGOING,
|
ONGOING,
|
||||||
REPORT
|
REPORT
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,19 +1,18 @@
|
||||||
package eu.dnetlib.scholexplorer.relation;
|
package eu.dnetlib.scholexplorer.relation;
|
||||||
|
|
||||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
import org.apache.commons.io.IOUtils;
|
|
||||||
|
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
|
import org.apache.commons.io.IOUtils;
|
||||||
|
|
||||||
public class RelationMapper extends HashMap<String, RelInfo> implements Serializable {
|
public class RelationMapper extends HashMap<String, RelInfo> implements Serializable {
|
||||||
|
|
||||||
public static RelationMapper load() throws Exception {
|
public static RelationMapper load() throws Exception {
|
||||||
|
|
||||||
final String json = IOUtils.toString(RelationMapper.class.getResourceAsStream("relations.json"));
|
final String json =
|
||||||
|
IOUtils.toString(RelationMapper.class.getResourceAsStream("relations.json"));
|
||||||
|
|
||||||
ObjectMapper mapper = new ObjectMapper();
|
ObjectMapper mapper = new ObjectMapper();
|
||||||
return mapper.readValue(json, RelationMapper.class);
|
return mapper.readValue(json, RelationMapper.class);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,29 +1,45 @@
|
||||||
package eu.dnetlib.dhp.application;
|
package eu.dnetlib.dhp.application;
|
||||||
|
|
||||||
import org.apache.commons.io.IOUtils;
|
|
||||||
import org.junit.jupiter.api.Test;
|
|
||||||
|
|
||||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||||
import static org.junit.jupiter.api.Assertions.assertNotNull;
|
import static org.junit.jupiter.api.Assertions.assertNotNull;
|
||||||
|
|
||||||
|
import org.apache.commons.io.IOUtils;
|
||||||
|
import org.junit.jupiter.api.Test;
|
||||||
|
|
||||||
public class ArgumentApplicationParserTest {
|
public class ArgumentApplicationParserTest {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testParseParameter() throws Exception {
|
public void testParseParameter() throws Exception {
|
||||||
final String jsonConfiguration = IOUtils.toString(this.getClass().getResourceAsStream("/eu/dnetlib/application/parameters.json"));
|
final String jsonConfiguration =
|
||||||
|
IOUtils.toString(
|
||||||
|
this.getClass()
|
||||||
|
.getResourceAsStream("/eu/dnetlib/application/parameters.json"));
|
||||||
assertNotNull(jsonConfiguration);
|
assertNotNull(jsonConfiguration);
|
||||||
ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
|
ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
|
||||||
parser.parseArgument(new String[]{"-p", "value0",
|
parser.parseArgument(
|
||||||
"-a", "value1",
|
new String[] {
|
||||||
"-n", "value2",
|
"-p",
|
||||||
"-u", "value3",
|
"value0",
|
||||||
"-ru", "value4",
|
"-a",
|
||||||
"-rp", "value5",
|
"value1",
|
||||||
"-rh", "value6",
|
"-n",
|
||||||
"-ro", "value7",
|
"value2",
|
||||||
"-rr", "value8",
|
"-u",
|
||||||
"-w", "value9",
|
"value3",
|
||||||
"-cc", ArgumentApplicationParser.compressArgument(jsonConfiguration)
|
"-ru",
|
||||||
|
"value4",
|
||||||
|
"-rp",
|
||||||
|
"value5",
|
||||||
|
"-rh",
|
||||||
|
"value6",
|
||||||
|
"-ro",
|
||||||
|
"value7",
|
||||||
|
"-rr",
|
||||||
|
"value8",
|
||||||
|
"-w",
|
||||||
|
"value9",
|
||||||
|
"-cc",
|
||||||
|
ArgumentApplicationParser.compressArgument(jsonConfiguration)
|
||||||
});
|
});
|
||||||
assertNotNull(parser.get("hdfsPath"));
|
assertNotNull(parser.get("hdfsPath"));
|
||||||
assertNotNull(parser.get("apidescriptor"));
|
assertNotNull(parser.get("apidescriptor"));
|
||||||
|
@ -47,10 +63,4 @@ public class ArgumentApplicationParserTest {
|
||||||
assertEquals("value9", parser.get("workflowId"));
|
assertEquals("value9", parser.get("workflowId"));
|
||||||
assertEquals(jsonConfiguration, parser.get("ccCoco"));
|
assertEquals(jsonConfiguration, parser.get("ccCoco"));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,9 +1,6 @@
|
||||||
package eu.dnetlib.dhp.common;
|
package eu.dnetlib.dhp.common;
|
||||||
|
|
||||||
import org.apache.hadoop.conf.Configuration;
|
import static org.junit.jupiter.api.Assertions.*;
|
||||||
import org.junit.jupiter.api.Nested;
|
|
||||||
import org.junit.jupiter.api.Test;
|
|
||||||
import org.junit.jupiter.api.io.TempDir;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.nio.file.Files;
|
import java.nio.file.Files;
|
||||||
|
@ -11,8 +8,10 @@ import java.nio.file.Path;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
import static org.junit.jupiter.api.Assertions.*;
|
import org.junit.jupiter.api.Nested;
|
||||||
|
import org.junit.jupiter.api.Test;
|
||||||
|
import org.junit.jupiter.api.io.TempDir;
|
||||||
|
|
||||||
public class HdfsSupportTest {
|
public class HdfsSupportTest {
|
||||||
|
|
||||||
|
@ -22,8 +21,8 @@ public class HdfsSupportTest {
|
||||||
@Test
|
@Test
|
||||||
public void shouldThrowARuntimeExceptionOnError() {
|
public void shouldThrowARuntimeExceptionOnError() {
|
||||||
// when
|
// when
|
||||||
assertThrows(RuntimeException.class, () ->
|
assertThrows(
|
||||||
HdfsSupport.remove(null, new Configuration()));
|
RuntimeException.class, () -> HdfsSupport.remove(null, new Configuration()));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
@ -54,8 +53,8 @@ public class HdfsSupportTest {
|
||||||
@Test
|
@Test
|
||||||
public void shouldThrowARuntimeExceptionOnError() {
|
public void shouldThrowARuntimeExceptionOnError() {
|
||||||
// when
|
// when
|
||||||
assertThrows(RuntimeException.class, () ->
|
assertThrows(
|
||||||
HdfsSupport.listFiles(null, new Configuration()));
|
RuntimeException.class, () -> HdfsSupport.listFiles(null, new Configuration()));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
@ -68,8 +67,10 @@ public class HdfsSupportTest {
|
||||||
|
|
||||||
// then
|
// then
|
||||||
assertEquals(2, paths.size());
|
assertEquals(2, paths.size());
|
||||||
List<String> expecteds = Arrays.stream(new String[]{subDir1.toString(), subDir2.toString()})
|
List<String> expecteds =
|
||||||
.sorted().collect(Collectors.toList());
|
Arrays.stream(new String[] {subDir1.toString(), subDir2.toString()})
|
||||||
|
.sorted()
|
||||||
|
.collect(Collectors.toList());
|
||||||
List<String> actuals = paths.stream().sorted().collect(Collectors.toList());
|
List<String> actuals = paths.stream().sorted().collect(Collectors.toList());
|
||||||
assertTrue(actuals.get(0).contains(expecteds.get(0)));
|
assertTrue(actuals.get(0).contains(expecteds.get(0)));
|
||||||
assertTrue(actuals.get(1).contains(expecteds.get(1)));
|
assertTrue(actuals.get(1).contains(expecteds.get(1)));
|
||||||
|
|
|
@ -1,22 +1,22 @@
|
||||||
package eu.dnetlib.dhp.common;
|
package eu.dnetlib.dhp.common;
|
||||||
|
|
||||||
|
import static org.mockito.Mockito.*;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.common.FunctionalInterfaceSupport.ThrowingConsumer;
|
import eu.dnetlib.dhp.common.FunctionalInterfaceSupport.ThrowingConsumer;
|
||||||
|
import java.util.function.Function;
|
||||||
import org.apache.spark.SparkConf;
|
import org.apache.spark.SparkConf;
|
||||||
import org.apache.spark.sql.SparkSession;
|
import org.apache.spark.sql.SparkSession;
|
||||||
import org.junit.jupiter.api.Nested;
|
import org.junit.jupiter.api.Nested;
|
||||||
import org.junit.jupiter.api.Test;
|
import org.junit.jupiter.api.Test;
|
||||||
|
|
||||||
import java.util.function.Function;
|
|
||||||
|
|
||||||
import static org.mockito.Mockito.*;
|
|
||||||
|
|
||||||
public class SparkSessionSupportTest {
|
public class SparkSessionSupportTest {
|
||||||
|
|
||||||
@Nested
|
@Nested
|
||||||
class RunWithSparkSession {
|
class RunWithSparkSession {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void shouldExecuteFunctionAndNotStopSparkSessionWhenSparkSessionIsNotManaged() throws Exception {
|
public void shouldExecuteFunctionAndNotStopSparkSessionWhenSparkSessionIsNotManaged()
|
||||||
|
throws Exception {
|
||||||
// given
|
// given
|
||||||
SparkSession spark = mock(SparkSession.class);
|
SparkSession spark = mock(SparkSession.class);
|
||||||
SparkConf conf = mock(SparkConf.class);
|
SparkConf conf = mock(SparkConf.class);
|
||||||
|
@ -34,7 +34,8 @@ public class SparkSessionSupportTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void shouldExecuteFunctionAndStopSparkSessionWhenSparkSessionIsManaged() throws Exception {
|
public void shouldExecuteFunctionAndStopSparkSessionWhenSparkSessionIsManaged()
|
||||||
|
throws Exception {
|
||||||
// given
|
// given
|
||||||
SparkSession spark = mock(SparkSession.class);
|
SparkSession spark = mock(SparkSession.class);
|
||||||
SparkConf conf = mock(SparkConf.class);
|
SparkConf conf = mock(SparkConf.class);
|
||||||
|
|
|
@ -1,9 +1,9 @@
|
||||||
package eu.dnetlib.dhp.model.mdstore;
|
package eu.dnetlib.dhp.model.mdstore;
|
||||||
|
|
||||||
import org.junit.jupiter.api.Test;
|
|
||||||
|
|
||||||
import static org.junit.jupiter.api.Assertions.assertTrue;
|
import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||||
|
|
||||||
|
import org.junit.jupiter.api.Test;
|
||||||
|
|
||||||
public class MetadataRecordTest {
|
public class MetadataRecordTest {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
|
|
@ -1,12 +1,11 @@
|
||||||
package eu.dnetlib.message;
|
package eu.dnetlib.message;
|
||||||
|
|
||||||
import org.junit.jupiter.api.Test;
|
import static org.junit.jupiter.api.Assertions.*;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
import org.junit.jupiter.api.Test;
|
||||||
import static org.junit.jupiter.api.Assertions.*;
|
|
||||||
|
|
||||||
public class MessageTest {
|
public class MessageTest {
|
||||||
|
|
||||||
|
@ -28,13 +27,16 @@ public class MessageTest {
|
||||||
assertEquals(m1.getJobName(), m.getJobName());
|
assertEquals(m1.getJobName(), m.getJobName());
|
||||||
|
|
||||||
assertNotNull(m1.getBody());
|
assertNotNull(m1.getBody());
|
||||||
m1.getBody().keySet().forEach(it -> assertEquals(m1.getBody().get(it), m.getBody().get(it)));
|
m1.getBody()
|
||||||
|
.keySet()
|
||||||
|
.forEach(it -> assertEquals(m1.getBody().get(it), m.getBody().get(it)));
|
||||||
assertEquals(m1.getJobName(), m.getJobName());
|
assertEquals(m1.getJobName(), m.getJobName());
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void toStringTest() {
|
public void toStringTest() {
|
||||||
final String expectedJson= "{\"workflowId\":\"wId\",\"jobName\":\"Collection\",\"type\":\"ONGOING\",\"body\":{\"ExecutionTime\":\"30s\",\"parsedItem\":\"300\"}}";
|
final String expectedJson =
|
||||||
|
"{\"workflowId\":\"wId\",\"jobName\":\"Collection\",\"type\":\"ONGOING\",\"body\":{\"ExecutionTime\":\"30s\",\"parsedItem\":\"300\"}}";
|
||||||
Message m = new Message();
|
Message m = new Message();
|
||||||
m.setWorkflowId("wId");
|
m.setWorkflowId("wId");
|
||||||
m.setType(MessageType.ONGOING);
|
m.setType(MessageType.ONGOING);
|
||||||
|
@ -46,10 +48,5 @@ public class MessageTest {
|
||||||
m.setBody(body);
|
m.setBody(body);
|
||||||
|
|
||||||
assertEquals(expectedJson, m.toString());
|
assertEquals(expectedJson, m.toString());
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
|
@ -2,7 +2,6 @@ package eu.dnetlib.scholexplorer.relation;
|
||||||
|
|
||||||
import org.junit.jupiter.api.Test;
|
import org.junit.jupiter.api.Test;
|
||||||
|
|
||||||
|
|
||||||
public class RelationMapperTest {
|
public class RelationMapperTest {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
@ -10,6 +9,5 @@ public class RelationMapperTest {
|
||||||
|
|
||||||
RelationMapper relationMapper = RelationMapper.load();
|
RelationMapper relationMapper = RelationMapper.load();
|
||||||
relationMapper.keySet().forEach(System.out::println);
|
relationMapper.keySet().forEach(System.out::println);
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -2,7 +2,6 @@ package eu.dnetlib.dhp.schema.action;
|
||||||
|
|
||||||
import com.fasterxml.jackson.databind.annotation.JsonDeserialize;
|
import com.fasterxml.jackson.databind.annotation.JsonDeserialize;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Oaf;
|
import eu.dnetlib.dhp.schema.oaf.Oaf;
|
||||||
|
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
|
|
||||||
@JsonDeserialize(using = AtomicActionDeserializer.class)
|
@JsonDeserialize(using = AtomicActionDeserializer.class)
|
||||||
|
@ -12,8 +11,7 @@ public class AtomicAction<T extends Oaf> implements Serializable {
|
||||||
|
|
||||||
private T payload;
|
private T payload;
|
||||||
|
|
||||||
public AtomicAction() {
|
public AtomicAction() {}
|
||||||
}
|
|
||||||
|
|
||||||
public AtomicAction(Class<T> clazz, T payload) {
|
public AtomicAction(Class<T> clazz, T payload) {
|
||||||
this.clazz = clazz;
|
this.clazz = clazz;
|
||||||
|
|
|
@ -7,13 +7,13 @@ import com.fasterxml.jackson.databind.JsonDeserializer;
|
||||||
import com.fasterxml.jackson.databind.JsonNode;
|
import com.fasterxml.jackson.databind.JsonNode;
|
||||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Oaf;
|
import eu.dnetlib.dhp.schema.oaf.Oaf;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
|
||||||
public class AtomicActionDeserializer extends JsonDeserializer {
|
public class AtomicActionDeserializer extends JsonDeserializer {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Object deserialize(JsonParser jp, DeserializationContext ctxt) throws IOException, JsonProcessingException {
|
public Object deserialize(JsonParser jp, DeserializationContext ctxt)
|
||||||
|
throws IOException, JsonProcessingException {
|
||||||
JsonNode node = jp.getCodec().readTree(jp);
|
JsonNode node = jp.getCodec().readTree(jp);
|
||||||
String classTag = node.get("clazz").asText();
|
String classTag = node.get("clazz").asText();
|
||||||
JsonNode payload = node.get("payload");
|
JsonNode payload = node.get("payload");
|
||||||
|
|
|
@ -2,15 +2,19 @@ package eu.dnetlib.dhp.schema.common;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.OafEntity;
|
import eu.dnetlib.dhp.schema.oaf.OafEntity;
|
||||||
|
|
||||||
/**
|
/** Actual entity types in the Graph */
|
||||||
* Actual entity types in the Graph
|
|
||||||
*/
|
|
||||||
public enum EntityType {
|
public enum EntityType {
|
||||||
|
publication,
|
||||||
publication, dataset, otherresearchproduct, software, datasource, organization, project;
|
dataset,
|
||||||
|
otherresearchproduct,
|
||||||
|
software,
|
||||||
|
datasource,
|
||||||
|
organization,
|
||||||
|
project;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Resolves the EntityType, given the relative class name
|
* Resolves the EntityType, given the relative class name
|
||||||
|
*
|
||||||
* @param clazz the given class name
|
* @param clazz the given class name
|
||||||
* @param <T> actual OafEntity subclass
|
* @param <T> actual OafEntity subclass
|
||||||
* @return the EntityType associated to the given class
|
* @return the EntityType associated to the given class
|
||||||
|
@ -19,5 +23,4 @@ public enum EntityType {
|
||||||
|
|
||||||
return EntityType.valueOf(clazz.getSimpleName().toLowerCase());
|
return EntityType.valueOf(clazz.getSimpleName().toLowerCase());
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,9 +1,9 @@
|
||||||
package eu.dnetlib.dhp.schema.common;
|
package eu.dnetlib.dhp.schema.common;
|
||||||
|
|
||||||
/**
|
/** Main entity types in the Graph */
|
||||||
* Main entity types in the Graph
|
|
||||||
*/
|
|
||||||
public enum MainEntityType {
|
public enum MainEntityType {
|
||||||
|
result,
|
||||||
result, datasource, organization, project
|
datasource,
|
||||||
|
organization,
|
||||||
|
project
|
||||||
}
|
}
|
|
@ -2,17 +2,12 @@ package eu.dnetlib.dhp.schema.common;
|
||||||
|
|
||||||
import com.google.common.collect.Maps;
|
import com.google.common.collect.Maps;
|
||||||
import eu.dnetlib.dhp.schema.oaf.*;
|
import eu.dnetlib.dhp.schema.oaf.*;
|
||||||
|
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
/**
|
/** Oaf model utility methods. */
|
||||||
* Oaf model utility methods.
|
|
||||||
*/
|
|
||||||
public class ModelSupport {
|
public class ModelSupport {
|
||||||
|
|
||||||
/**
|
/** Defines the mapping between the actual entity type and the main entity type */
|
||||||
* Defines the mapping between the actual entity type and the main entity type
|
|
||||||
*/
|
|
||||||
private static Map<EntityType, MainEntityType> entityMapping = Maps.newHashMap();
|
private static Map<EntityType, MainEntityType> entityMapping = Maps.newHashMap();
|
||||||
|
|
||||||
static {
|
static {
|
||||||
|
@ -26,9 +21,10 @@ public class ModelSupport {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Defines the mapping between the actual entity types and the relative classes implementing them
|
* Defines the mapping between the actual entity types and the relative classes implementing
|
||||||
|
* them
|
||||||
*/
|
*/
|
||||||
public final static Map<EntityType, Class> entityTypes = Maps.newHashMap();
|
public static final Map<EntityType, Class> entityTypes = Maps.newHashMap();
|
||||||
|
|
||||||
static {
|
static {
|
||||||
entityTypes.put(EntityType.datasource, Datasource.class);
|
entityTypes.put(EntityType.datasource, Datasource.class);
|
||||||
|
@ -40,7 +36,7 @@ public class ModelSupport {
|
||||||
entityTypes.put(EntityType.publication, Publication.class);
|
entityTypes.put(EntityType.publication, Publication.class);
|
||||||
}
|
}
|
||||||
|
|
||||||
public final static Map<String, Class> oafTypes = Maps.newHashMap();
|
public static final Map<String, Class> oafTypes = Maps.newHashMap();
|
||||||
|
|
||||||
static {
|
static {
|
||||||
oafTypes.put("datasource", Datasource.class);
|
oafTypes.put("datasource", Datasource.class);
|
||||||
|
@ -55,8 +51,7 @@ public class ModelSupport {
|
||||||
|
|
||||||
private static final String schemeTemplate = "dnet:%s_%s_relations";
|
private static final String schemeTemplate = "dnet:%s_%s_relations";
|
||||||
|
|
||||||
private ModelSupport() {
|
private ModelSupport() {}
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Checks subclass-superclass relationship.
|
* Checks subclass-superclass relationship.
|
||||||
|
@ -67,7 +62,8 @@ public class ModelSupport {
|
||||||
* @param <Y> Superclass type
|
* @param <Y> Superclass type
|
||||||
* @return True if X is a subclass of Y
|
* @return True if X is a subclass of Y
|
||||||
*/
|
*/
|
||||||
public static <X extends Oaf, Y extends Oaf> Boolean isSubClass(X subClazzObject, Y superClazzObject) {
|
public static <X extends Oaf, Y extends Oaf> Boolean isSubClass(
|
||||||
|
X subClazzObject, Y superClazzObject) {
|
||||||
return isSubClass(subClazzObject.getClass(), superClazzObject.getClass());
|
return isSubClass(subClazzObject.getClass(), superClazzObject.getClass());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -80,7 +76,8 @@ public class ModelSupport {
|
||||||
* @param <Y> Superclass type
|
* @param <Y> Superclass type
|
||||||
* @return True if X is a subclass of Y
|
* @return True if X is a subclass of Y
|
||||||
*/
|
*/
|
||||||
public static <X extends Oaf, Y extends Oaf> Boolean isSubClass(X subClazzObject, Class<Y> superClazz) {
|
public static <X extends Oaf, Y extends Oaf> Boolean isSubClass(
|
||||||
|
X subClazzObject, Class<Y> superClazz) {
|
||||||
return isSubClass(subClazzObject.getClass(), superClazz);
|
return isSubClass(subClazzObject.getClass(), superClazz);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -93,7 +90,8 @@ public class ModelSupport {
|
||||||
* @param <Y> Superclass type
|
* @param <Y> Superclass type
|
||||||
* @return True if X is a subclass of Y
|
* @return True if X is a subclass of Y
|
||||||
*/
|
*/
|
||||||
public static <X extends Oaf, Y extends Oaf> Boolean isSubClass(Class<X> subClazz, Class<Y> superClazz) {
|
public static <X extends Oaf, Y extends Oaf> Boolean isSubClass(
|
||||||
|
Class<X> subClazz, Class<Y> superClazz) {
|
||||||
return superClazz.isAssignableFrom(subClazz);
|
return superClazz.isAssignableFrom(subClazz);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -143,9 +141,9 @@ public class ModelSupport {
|
||||||
}
|
}
|
||||||
|
|
||||||
public static String getScheme(final String sourceType, final String targetType) {
|
public static String getScheme(final String sourceType, final String targetType) {
|
||||||
return String.format(schemeTemplate,
|
return String.format(
|
||||||
|
schemeTemplate,
|
||||||
entityMapping.get(EntityType.valueOf(sourceType)).name(),
|
entityMapping.get(EntityType.valueOf(sourceType)).name(),
|
||||||
entityMapping.get(EntityType.valueOf(targetType)).name());
|
entityMapping.get(EntityType.valueOf(targetType)).name());
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
|
@ -71,12 +71,12 @@ public class Author implements Serializable {
|
||||||
if (this == o) return true;
|
if (this == o) return true;
|
||||||
if (o == null || getClass() != o.getClass()) return false;
|
if (o == null || getClass() != o.getClass()) return false;
|
||||||
Author author = (Author) o;
|
Author author = (Author) o;
|
||||||
return Objects.equals(fullname, author.fullname) &&
|
return Objects.equals(fullname, author.fullname)
|
||||||
Objects.equals(name, author.name) &&
|
&& Objects.equals(name, author.name)
|
||||||
Objects.equals(surname, author.surname) &&
|
&& Objects.equals(surname, author.surname)
|
||||||
Objects.equals(rank, author.rank) &&
|
&& Objects.equals(rank, author.rank)
|
||||||
Objects.equals(pid, author.pid) &&
|
&& Objects.equals(pid, author.pid)
|
||||||
Objects.equals(affiliation, author.affiliation);
|
&& Objects.equals(affiliation, author.affiliation);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -31,12 +31,9 @@ public class Context implements Serializable {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean equals(Object obj) {
|
public boolean equals(Object obj) {
|
||||||
if (this == obj)
|
if (this == obj) return true;
|
||||||
return true;
|
if (obj == null) return false;
|
||||||
if (obj == null)
|
if (getClass() != obj.getClass()) return false;
|
||||||
return false;
|
|
||||||
if (getClass() != obj.getClass())
|
|
||||||
return false;
|
|
||||||
|
|
||||||
Context other = (Context) obj;
|
Context other = (Context) obj;
|
||||||
|
|
||||||
|
|
|
@ -1,7 +1,6 @@
|
||||||
package eu.dnetlib.dhp.schema.oaf;
|
package eu.dnetlib.dhp.schema.oaf;
|
||||||
|
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
import java.util.List;
|
|
||||||
import java.util.Objects;
|
import java.util.Objects;
|
||||||
|
|
||||||
public class DataInfo implements Serializable {
|
public class DataInfo implements Serializable {
|
||||||
|
@ -13,7 +12,6 @@ public class DataInfo implements Serializable {
|
||||||
private String inferenceprovenance;
|
private String inferenceprovenance;
|
||||||
private Qualifier provenanceaction;
|
private Qualifier provenanceaction;
|
||||||
|
|
||||||
|
|
||||||
public Boolean getInvisible() {
|
public Boolean getInvisible() {
|
||||||
return invisible;
|
return invisible;
|
||||||
}
|
}
|
||||||
|
@ -67,16 +65,22 @@ public class DataInfo implements Serializable {
|
||||||
if (this == o) return true;
|
if (this == o) return true;
|
||||||
if (o == null || getClass() != o.getClass()) return false;
|
if (o == null || getClass() != o.getClass()) return false;
|
||||||
DataInfo dataInfo = (DataInfo) o;
|
DataInfo dataInfo = (DataInfo) o;
|
||||||
return Objects.equals(invisible, dataInfo.invisible) &&
|
return Objects.equals(invisible, dataInfo.invisible)
|
||||||
Objects.equals(inferred, dataInfo.inferred) &&
|
&& Objects.equals(inferred, dataInfo.inferred)
|
||||||
Objects.equals(deletedbyinference, dataInfo.deletedbyinference) &&
|
&& Objects.equals(deletedbyinference, dataInfo.deletedbyinference)
|
||||||
Objects.equals(trust, dataInfo.trust) &&
|
&& Objects.equals(trust, dataInfo.trust)
|
||||||
Objects.equals(inferenceprovenance, dataInfo.inferenceprovenance) &&
|
&& Objects.equals(inferenceprovenance, dataInfo.inferenceprovenance)
|
||||||
Objects.equals(provenanceaction, dataInfo.provenanceaction);
|
&& Objects.equals(provenanceaction, dataInfo.provenanceaction);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int hashCode() {
|
public int hashCode() {
|
||||||
return Objects.hash(invisible, inferred, deletedbyinference, trust, inferenceprovenance, provenanceaction);
|
return Objects.hash(
|
||||||
|
invisible,
|
||||||
|
inferred,
|
||||||
|
deletedbyinference,
|
||||||
|
trust,
|
||||||
|
inferenceprovenance,
|
||||||
|
provenanceaction);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -86,7 +86,10 @@ public class Dataset extends Result implements Serializable {
|
||||||
|
|
||||||
final Dataset d = (Dataset) e;
|
final Dataset d = (Dataset) e;
|
||||||
|
|
||||||
storagedate = d.getStoragedate() != null && compareTrust(this, e)<0? d.getStoragedate() : storagedate;
|
storagedate =
|
||||||
|
d.getStoragedate() != null && compareTrust(this, e) < 0
|
||||||
|
? d.getStoragedate()
|
||||||
|
: storagedate;
|
||||||
|
|
||||||
device = d.getDevice() != null && compareTrust(this, e) < 0 ? d.getDevice() : device;
|
device = d.getDevice() != null && compareTrust(this, e) < 0 ? d.getDevice() : device;
|
||||||
|
|
||||||
|
@ -94,9 +97,15 @@ public class Dataset extends Result implements Serializable {
|
||||||
|
|
||||||
version = d.getVersion() != null && compareTrust(this, e) < 0 ? d.getVersion() : version;
|
version = d.getVersion() != null && compareTrust(this, e) < 0 ? d.getVersion() : version;
|
||||||
|
|
||||||
lastmetadataupdate= d.getLastmetadataupdate() != null && compareTrust(this, e)<0? d.getLastmetadataupdate() :lastmetadataupdate;
|
lastmetadataupdate =
|
||||||
|
d.getLastmetadataupdate() != null && compareTrust(this, e) < 0
|
||||||
|
? d.getLastmetadataupdate()
|
||||||
|
: lastmetadataupdate;
|
||||||
|
|
||||||
metadataversionnumber= d.getMetadataversionnumber() != null && compareTrust(this, e)<0? d.getMetadataversionnumber() : metadataversionnumber;
|
metadataversionnumber =
|
||||||
|
d.getMetadataversionnumber() != null && compareTrust(this, e) < 0
|
||||||
|
? d.getMetadataversionnumber()
|
||||||
|
: metadataversionnumber;
|
||||||
|
|
||||||
geolocation = mergeLists(geolocation, d.getGeolocation());
|
geolocation = mergeLists(geolocation, d.getGeolocation());
|
||||||
|
|
||||||
|
@ -109,17 +118,25 @@ public class Dataset extends Result implements Serializable {
|
||||||
if (o == null || getClass() != o.getClass()) return false;
|
if (o == null || getClass() != o.getClass()) return false;
|
||||||
if (!super.equals(o)) return false;
|
if (!super.equals(o)) return false;
|
||||||
Dataset dataset = (Dataset) o;
|
Dataset dataset = (Dataset) o;
|
||||||
return Objects.equals(storagedate, dataset.storagedate) &&
|
return Objects.equals(storagedate, dataset.storagedate)
|
||||||
Objects.equals(device, dataset.device) &&
|
&& Objects.equals(device, dataset.device)
|
||||||
Objects.equals(size, dataset.size) &&
|
&& Objects.equals(size, dataset.size)
|
||||||
Objects.equals(version, dataset.version) &&
|
&& Objects.equals(version, dataset.version)
|
||||||
Objects.equals(lastmetadataupdate, dataset.lastmetadataupdate) &&
|
&& Objects.equals(lastmetadataupdate, dataset.lastmetadataupdate)
|
||||||
Objects.equals(metadataversionnumber, dataset.metadataversionnumber) &&
|
&& Objects.equals(metadataversionnumber, dataset.metadataversionnumber)
|
||||||
Objects.equals(geolocation, dataset.geolocation);
|
&& Objects.equals(geolocation, dataset.geolocation);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int hashCode() {
|
public int hashCode() {
|
||||||
return Objects.hash(super.hashCode(), storagedate, device, size, version, lastmetadataupdate, metadataversionnumber, geolocation);
|
return Objects.hash(
|
||||||
|
super.hashCode(),
|
||||||
|
storagedate,
|
||||||
|
device,
|
||||||
|
size,
|
||||||
|
version,
|
||||||
|
lastmetadataupdate,
|
||||||
|
metadataversionnumber,
|
||||||
|
geolocation);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -373,55 +373,138 @@ public class Datasource extends OafEntity implements Serializable {
|
||||||
|
|
||||||
Datasource d = (Datasource) e;
|
Datasource d = (Datasource) e;
|
||||||
|
|
||||||
datasourcetype = d.getDatasourcetype() != null && compareTrust(this, e)<0? d.getDatasourcetype() : datasourcetype;
|
datasourcetype =
|
||||||
openairecompatibility = d.getOpenairecompatibility() != null && compareTrust(this, e)<0? d.getOpenairecompatibility() : openairecompatibility;
|
d.getDatasourcetype() != null && compareTrust(this, e) < 0
|
||||||
officialname = d.getOfficialname() != null && compareTrust(this, e)<0? d.getOfficialname() : officialname;
|
? d.getDatasourcetype()
|
||||||
englishname = d.getEnglishname() != null && compareTrust(this, e)<0? d.getEnglishname() : officialname;
|
: datasourcetype;
|
||||||
websiteurl = d.getWebsiteurl() != null && compareTrust(this, e)<0? d.getWebsiteurl() : websiteurl;
|
openairecompatibility =
|
||||||
logourl = d.getLogourl() != null && compareTrust(this, e)<0? d.getLogourl() : getLogourl();
|
d.getOpenairecompatibility() != null && compareTrust(this, e) < 0
|
||||||
contactemail = d.getContactemail() != null && compareTrust(this, e)<0? d.getContactemail() : contactemail;
|
? d.getOpenairecompatibility()
|
||||||
namespaceprefix = d.getNamespaceprefix() != null && compareTrust(this, e)<0? d.getNamespaceprefix() : namespaceprefix;
|
: openairecompatibility;
|
||||||
latitude = d.getLatitude() != null && compareTrust(this, e)<0? d.getLatitude() : latitude;
|
officialname =
|
||||||
longitude = d.getLongitude() != null && compareTrust(this, e)<0? d.getLongitude() : longitude;
|
d.getOfficialname() != null && compareTrust(this, e) < 0
|
||||||
dateofvalidation = d.getDateofvalidation() != null && compareTrust(this, e)<0? d.getDateofvalidation() : dateofvalidation;
|
? d.getOfficialname()
|
||||||
description = d.getDescription() != null && compareTrust(this, e)<0? d.getDescription() : description;
|
: officialname;
|
||||||
|
englishname =
|
||||||
|
d.getEnglishname() != null && compareTrust(this, e) < 0
|
||||||
|
? d.getEnglishname()
|
||||||
|
: officialname;
|
||||||
|
websiteurl =
|
||||||
|
d.getWebsiteurl() != null && compareTrust(this, e) < 0
|
||||||
|
? d.getWebsiteurl()
|
||||||
|
: websiteurl;
|
||||||
|
logourl =
|
||||||
|
d.getLogourl() != null && compareTrust(this, e) < 0 ? d.getLogourl() : getLogourl();
|
||||||
|
contactemail =
|
||||||
|
d.getContactemail() != null && compareTrust(this, e) < 0
|
||||||
|
? d.getContactemail()
|
||||||
|
: contactemail;
|
||||||
|
namespaceprefix =
|
||||||
|
d.getNamespaceprefix() != null && compareTrust(this, e) < 0
|
||||||
|
? d.getNamespaceprefix()
|
||||||
|
: namespaceprefix;
|
||||||
|
latitude =
|
||||||
|
d.getLatitude() != null && compareTrust(this, e) < 0 ? d.getLatitude() : latitude;
|
||||||
|
longitude =
|
||||||
|
d.getLongitude() != null && compareTrust(this, e) < 0
|
||||||
|
? d.getLongitude()
|
||||||
|
: longitude;
|
||||||
|
dateofvalidation =
|
||||||
|
d.getDateofvalidation() != null && compareTrust(this, e) < 0
|
||||||
|
? d.getDateofvalidation()
|
||||||
|
: dateofvalidation;
|
||||||
|
description =
|
||||||
|
d.getDescription() != null && compareTrust(this, e) < 0
|
||||||
|
? d.getDescription()
|
||||||
|
: description;
|
||||||
subjects = mergeLists(subjects, d.getSubjects());
|
subjects = mergeLists(subjects, d.getSubjects());
|
||||||
|
|
||||||
// opendoar specific fields (od*)
|
// opendoar specific fields (od*)
|
||||||
odnumberofitems = d.getOdnumberofitems() != null && compareTrust(this, e)<0? d.getOdnumberofitems() : odnumberofitems;
|
odnumberofitems =
|
||||||
odnumberofitemsdate = d.getOdnumberofitemsdate() != null && compareTrust(this, e)<0? d.getOdnumberofitemsdate() : odnumberofitemsdate;
|
d.getOdnumberofitems() != null && compareTrust(this, e) < 0
|
||||||
odpolicies = d.getOdpolicies() != null && compareTrust(this, e)<0? d.getOdpolicies() : odpolicies;
|
? d.getOdnumberofitems()
|
||||||
|
: odnumberofitems;
|
||||||
|
odnumberofitemsdate =
|
||||||
|
d.getOdnumberofitemsdate() != null && compareTrust(this, e) < 0
|
||||||
|
? d.getOdnumberofitemsdate()
|
||||||
|
: odnumberofitemsdate;
|
||||||
|
odpolicies =
|
||||||
|
d.getOdpolicies() != null && compareTrust(this, e) < 0
|
||||||
|
? d.getOdpolicies()
|
||||||
|
: odpolicies;
|
||||||
odlanguages = mergeLists(odlanguages, d.getOdlanguages());
|
odlanguages = mergeLists(odlanguages, d.getOdlanguages());
|
||||||
odcontenttypes = mergeLists(odcontenttypes, d.getOdcontenttypes());
|
odcontenttypes = mergeLists(odcontenttypes, d.getOdcontenttypes());
|
||||||
accessinfopackage = mergeLists(accessinfopackage, d.getAccessinfopackage());
|
accessinfopackage = mergeLists(accessinfopackage, d.getAccessinfopackage());
|
||||||
|
|
||||||
// re3data fields
|
// re3data fields
|
||||||
releasestartdate = d.getReleasestartdate() != null && compareTrust(this, e)<0? d.getReleasestartdate() : releasestartdate;
|
releasestartdate =
|
||||||
releaseenddate = d.getReleaseenddate() != null && compareTrust(this, e)<0? d.getReleaseenddate() : releaseenddate;
|
d.getReleasestartdate() != null && compareTrust(this, e) < 0
|
||||||
missionstatementurl = d.getMissionstatementurl() != null && compareTrust(this, e)<0? d.getMissionstatementurl() : missionstatementurl;
|
? d.getReleasestartdate()
|
||||||
dataprovider = d.getDataprovider() != null && compareTrust(this, e)<0? d.getDataprovider() : dataprovider;
|
: releasestartdate;
|
||||||
serviceprovider = d.getServiceprovider() != null && compareTrust(this, e)<0? d.getServiceprovider() : serviceprovider;
|
releaseenddate =
|
||||||
|
d.getReleaseenddate() != null && compareTrust(this, e) < 0
|
||||||
|
? d.getReleaseenddate()
|
||||||
|
: releaseenddate;
|
||||||
|
missionstatementurl =
|
||||||
|
d.getMissionstatementurl() != null && compareTrust(this, e) < 0
|
||||||
|
? d.getMissionstatementurl()
|
||||||
|
: missionstatementurl;
|
||||||
|
dataprovider =
|
||||||
|
d.getDataprovider() != null && compareTrust(this, e) < 0
|
||||||
|
? d.getDataprovider()
|
||||||
|
: dataprovider;
|
||||||
|
serviceprovider =
|
||||||
|
d.getServiceprovider() != null && compareTrust(this, e) < 0
|
||||||
|
? d.getServiceprovider()
|
||||||
|
: serviceprovider;
|
||||||
|
|
||||||
// {open, restricted or closed}
|
// {open, restricted or closed}
|
||||||
databaseaccesstype = d.getDatabaseaccesstype() != null && compareTrust(this, e)<0? d.getDatabaseaccesstype() : databaseaccesstype;
|
databaseaccesstype =
|
||||||
|
d.getDatabaseaccesstype() != null && compareTrust(this, e) < 0
|
||||||
|
? d.getDatabaseaccesstype()
|
||||||
|
: databaseaccesstype;
|
||||||
|
|
||||||
// {open, restricted or closed}
|
// {open, restricted or closed}
|
||||||
datauploadtype = d.getDatauploadtype() != null && compareTrust(this, e)<0? d.getDatauploadtype() : datauploadtype;
|
datauploadtype =
|
||||||
|
d.getDatauploadtype() != null && compareTrust(this, e) < 0
|
||||||
|
? d.getDatauploadtype()
|
||||||
|
: datauploadtype;
|
||||||
|
|
||||||
// {feeRequired, registration, other}
|
// {feeRequired, registration, other}
|
||||||
databaseaccessrestriction = d.getDatabaseaccessrestriction() != null && compareTrust(this, e)<0? d.getDatabaseaccessrestriction() : databaseaccessrestriction;
|
databaseaccessrestriction =
|
||||||
|
d.getDatabaseaccessrestriction() != null && compareTrust(this, e) < 0
|
||||||
|
? d.getDatabaseaccessrestriction()
|
||||||
|
: databaseaccessrestriction;
|
||||||
|
|
||||||
// {feeRequired, registration, other}
|
// {feeRequired, registration, other}
|
||||||
datauploadrestriction = d.getDatauploadrestriction() != null && compareTrust(this, e)<0? d.getDatauploadrestriction() : datauploadrestriction;
|
datauploadrestriction =
|
||||||
|
d.getDatauploadrestriction() != null && compareTrust(this, e) < 0
|
||||||
|
? d.getDatauploadrestriction()
|
||||||
|
: datauploadrestriction;
|
||||||
|
|
||||||
versioning = d.getVersioning() != null && compareTrust(this, e)<0? d.getVersioning() : versioning;
|
versioning =
|
||||||
citationguidelineurl = d.getCitationguidelineurl() != null && compareTrust(this, e)<0? d.getCitationguidelineurl() : citationguidelineurl;
|
d.getVersioning() != null && compareTrust(this, e) < 0
|
||||||
|
? d.getVersioning()
|
||||||
|
: versioning;
|
||||||
|
citationguidelineurl =
|
||||||
|
d.getCitationguidelineurl() != null && compareTrust(this, e) < 0
|
||||||
|
? d.getCitationguidelineurl()
|
||||||
|
: citationguidelineurl;
|
||||||
|
|
||||||
// {yes, no, unknown}
|
// {yes, no, unknown}
|
||||||
qualitymanagementkind = d.getQualitymanagementkind() != null && compareTrust(this, e)<0? d.getQualitymanagementkind() : qualitymanagementkind;
|
qualitymanagementkind =
|
||||||
pidsystems = d.getPidsystems() != null && compareTrust(this, e)<0? d.getPidsystems() : pidsystems;
|
d.getQualitymanagementkind() != null && compareTrust(this, e) < 0
|
||||||
|
? d.getQualitymanagementkind()
|
||||||
|
: qualitymanagementkind;
|
||||||
|
pidsystems =
|
||||||
|
d.getPidsystems() != null && compareTrust(this, e) < 0
|
||||||
|
? d.getPidsystems()
|
||||||
|
: pidsystems;
|
||||||
|
|
||||||
certificates = d.getCertificates() != null && compareTrust(this, e)<0? d.getCertificates() : certificates;
|
certificates =
|
||||||
|
d.getCertificates() != null && compareTrust(this, e) < 0
|
||||||
|
? d.getCertificates()
|
||||||
|
: certificates;
|
||||||
|
|
||||||
policies = mergeLists(policies, d.getPolicies());
|
policies = mergeLists(policies, d.getPolicies());
|
||||||
|
|
||||||
|
@ -436,45 +519,81 @@ public class Datasource extends OafEntity implements Serializable {
|
||||||
if (o == null || getClass() != o.getClass()) return false;
|
if (o == null || getClass() != o.getClass()) return false;
|
||||||
if (!super.equals(o)) return false;
|
if (!super.equals(o)) return false;
|
||||||
Datasource that = (Datasource) o;
|
Datasource that = (Datasource) o;
|
||||||
return Objects.equals(datasourcetype, that.datasourcetype) &&
|
return Objects.equals(datasourcetype, that.datasourcetype)
|
||||||
Objects.equals(openairecompatibility, that.openairecompatibility) &&
|
&& Objects.equals(openairecompatibility, that.openairecompatibility)
|
||||||
Objects.equals(officialname, that.officialname) &&
|
&& Objects.equals(officialname, that.officialname)
|
||||||
Objects.equals(englishname, that.englishname) &&
|
&& Objects.equals(englishname, that.englishname)
|
||||||
Objects.equals(websiteurl, that.websiteurl) &&
|
&& Objects.equals(websiteurl, that.websiteurl)
|
||||||
Objects.equals(logourl, that.logourl) &&
|
&& Objects.equals(logourl, that.logourl)
|
||||||
Objects.equals(contactemail, that.contactemail) &&
|
&& Objects.equals(contactemail, that.contactemail)
|
||||||
Objects.equals(namespaceprefix, that.namespaceprefix) &&
|
&& Objects.equals(namespaceprefix, that.namespaceprefix)
|
||||||
Objects.equals(latitude, that.latitude) &&
|
&& Objects.equals(latitude, that.latitude)
|
||||||
Objects.equals(longitude, that.longitude) &&
|
&& Objects.equals(longitude, that.longitude)
|
||||||
Objects.equals(dateofvalidation, that.dateofvalidation) &&
|
&& Objects.equals(dateofvalidation, that.dateofvalidation)
|
||||||
Objects.equals(description, that.description) &&
|
&& Objects.equals(description, that.description)
|
||||||
Objects.equals(subjects, that.subjects) &&
|
&& Objects.equals(subjects, that.subjects)
|
||||||
Objects.equals(odnumberofitems, that.odnumberofitems) &&
|
&& Objects.equals(odnumberofitems, that.odnumberofitems)
|
||||||
Objects.equals(odnumberofitemsdate, that.odnumberofitemsdate) &&
|
&& Objects.equals(odnumberofitemsdate, that.odnumberofitemsdate)
|
||||||
Objects.equals(odpolicies, that.odpolicies) &&
|
&& Objects.equals(odpolicies, that.odpolicies)
|
||||||
Objects.equals(odlanguages, that.odlanguages) &&
|
&& Objects.equals(odlanguages, that.odlanguages)
|
||||||
Objects.equals(odcontenttypes, that.odcontenttypes) &&
|
&& Objects.equals(odcontenttypes, that.odcontenttypes)
|
||||||
Objects.equals(accessinfopackage, that.accessinfopackage) &&
|
&& Objects.equals(accessinfopackage, that.accessinfopackage)
|
||||||
Objects.equals(releasestartdate, that.releasestartdate) &&
|
&& Objects.equals(releasestartdate, that.releasestartdate)
|
||||||
Objects.equals(releaseenddate, that.releaseenddate) &&
|
&& Objects.equals(releaseenddate, that.releaseenddate)
|
||||||
Objects.equals(missionstatementurl, that.missionstatementurl) &&
|
&& Objects.equals(missionstatementurl, that.missionstatementurl)
|
||||||
Objects.equals(dataprovider, that.dataprovider) &&
|
&& Objects.equals(dataprovider, that.dataprovider)
|
||||||
Objects.equals(serviceprovider, that.serviceprovider) &&
|
&& Objects.equals(serviceprovider, that.serviceprovider)
|
||||||
Objects.equals(databaseaccesstype, that.databaseaccesstype) &&
|
&& Objects.equals(databaseaccesstype, that.databaseaccesstype)
|
||||||
Objects.equals(datauploadtype, that.datauploadtype) &&
|
&& Objects.equals(datauploadtype, that.datauploadtype)
|
||||||
Objects.equals(databaseaccessrestriction, that.databaseaccessrestriction) &&
|
&& Objects.equals(databaseaccessrestriction, that.databaseaccessrestriction)
|
||||||
Objects.equals(datauploadrestriction, that.datauploadrestriction) &&
|
&& Objects.equals(datauploadrestriction, that.datauploadrestriction)
|
||||||
Objects.equals(versioning, that.versioning) &&
|
&& Objects.equals(versioning, that.versioning)
|
||||||
Objects.equals(citationguidelineurl, that.citationguidelineurl) &&
|
&& Objects.equals(citationguidelineurl, that.citationguidelineurl)
|
||||||
Objects.equals(qualitymanagementkind, that.qualitymanagementkind) &&
|
&& Objects.equals(qualitymanagementkind, that.qualitymanagementkind)
|
||||||
Objects.equals(pidsystems, that.pidsystems) &&
|
&& Objects.equals(pidsystems, that.pidsystems)
|
||||||
Objects.equals(certificates, that.certificates) &&
|
&& Objects.equals(certificates, that.certificates)
|
||||||
Objects.equals(policies, that.policies) &&
|
&& Objects.equals(policies, that.policies)
|
||||||
Objects.equals(journal, that.journal);
|
&& Objects.equals(journal, that.journal);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int hashCode() {
|
public int hashCode() {
|
||||||
return Objects.hash(super.hashCode(), datasourcetype, openairecompatibility, officialname, englishname, websiteurl, logourl, contactemail, namespaceprefix, latitude, longitude, dateofvalidation, description, subjects, odnumberofitems, odnumberofitemsdate, odpolicies, odlanguages, odcontenttypes, accessinfopackage, releasestartdate, releaseenddate, missionstatementurl, dataprovider, serviceprovider, databaseaccesstype, datauploadtype, databaseaccessrestriction, datauploadrestriction, versioning, citationguidelineurl, qualitymanagementkind, pidsystems, certificates, policies, journal);
|
return Objects.hash(
|
||||||
|
super.hashCode(),
|
||||||
|
datasourcetype,
|
||||||
|
openairecompatibility,
|
||||||
|
officialname,
|
||||||
|
englishname,
|
||||||
|
websiteurl,
|
||||||
|
logourl,
|
||||||
|
contactemail,
|
||||||
|
namespaceprefix,
|
||||||
|
latitude,
|
||||||
|
longitude,
|
||||||
|
dateofvalidation,
|
||||||
|
description,
|
||||||
|
subjects,
|
||||||
|
odnumberofitems,
|
||||||
|
odnumberofitemsdate,
|
||||||
|
odpolicies,
|
||||||
|
odlanguages,
|
||||||
|
odcontenttypes,
|
||||||
|
accessinfopackage,
|
||||||
|
releasestartdate,
|
||||||
|
releaseenddate,
|
||||||
|
missionstatementurl,
|
||||||
|
dataprovider,
|
||||||
|
serviceprovider,
|
||||||
|
databaseaccesstype,
|
||||||
|
datauploadtype,
|
||||||
|
databaseaccessrestriction,
|
||||||
|
datauploadrestriction,
|
||||||
|
versioning,
|
||||||
|
citationguidelineurl,
|
||||||
|
qualitymanagementkind,
|
||||||
|
pidsystems,
|
||||||
|
certificates,
|
||||||
|
policies,
|
||||||
|
journal);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -97,18 +97,19 @@ public class ExternalReference implements Serializable {
|
||||||
if (this == o) return true;
|
if (this == o) return true;
|
||||||
if (o == null || getClass() != o.getClass()) return false;
|
if (o == null || getClass() != o.getClass()) return false;
|
||||||
ExternalReference that = (ExternalReference) o;
|
ExternalReference that = (ExternalReference) o;
|
||||||
return Objects.equals(sitename, that.sitename) &&
|
return Objects.equals(sitename, that.sitename)
|
||||||
Objects.equals(label, that.label) &&
|
&& Objects.equals(label, that.label)
|
||||||
Objects.equals(url, that.url) &&
|
&& Objects.equals(url, that.url)
|
||||||
Objects.equals(description, that.description) &&
|
&& Objects.equals(description, that.description)
|
||||||
Objects.equals(qualifier, that.qualifier) &&
|
&& Objects.equals(qualifier, that.qualifier)
|
||||||
Objects.equals(refidentifier, that.refidentifier) &&
|
&& Objects.equals(refidentifier, that.refidentifier)
|
||||||
Objects.equals(query, that.query) &&
|
&& Objects.equals(query, that.query)
|
||||||
Objects.equals(dataInfo, that.dataInfo);
|
&& Objects.equals(dataInfo, that.dataInfo);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int hashCode() {
|
public int hashCode() {
|
||||||
return Objects.hash(sitename, label, url, description, qualifier, refidentifier, query, dataInfo);
|
return Objects.hash(
|
||||||
|
sitename, label, url, description, qualifier, refidentifier, query, dataInfo);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -60,11 +60,11 @@ public class ExtraInfo implements Serializable {
|
||||||
if (this == o) return true;
|
if (this == o) return true;
|
||||||
if (o == null || getClass() != o.getClass()) return false;
|
if (o == null || getClass() != o.getClass()) return false;
|
||||||
ExtraInfo extraInfo = (ExtraInfo) o;
|
ExtraInfo extraInfo = (ExtraInfo) o;
|
||||||
return Objects.equals(name, extraInfo.name) &&
|
return Objects.equals(name, extraInfo.name)
|
||||||
Objects.equals(typology, extraInfo.typology) &&
|
&& Objects.equals(typology, extraInfo.typology)
|
||||||
Objects.equals(provenance, extraInfo.provenance) &&
|
&& Objects.equals(provenance, extraInfo.provenance)
|
||||||
Objects.equals(trust, extraInfo.trust) &&
|
&& Objects.equals(trust, extraInfo.trust)
|
||||||
Objects.equals(value, extraInfo.value);
|
&& Objects.equals(value, extraInfo.value);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -31,12 +31,9 @@ public class Field<T> implements Serializable {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean equals(Object obj) {
|
public boolean equals(Object obj) {
|
||||||
if (this == obj)
|
if (this == obj) return true;
|
||||||
return true;
|
if (obj == null) return false;
|
||||||
if (obj == null)
|
if (getClass() != obj.getClass()) return false;
|
||||||
return false;
|
|
||||||
if (getClass() != obj.getClass())
|
|
||||||
return false;
|
|
||||||
Field<T> other = (Field<T>) obj;
|
Field<T> other = (Field<T>) obj;
|
||||||
return getValue().equals(other.getValue());
|
return getValue().equals(other.getValue());
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,9 +1,8 @@
|
||||||
package eu.dnetlib.dhp.schema.oaf;
|
package eu.dnetlib.dhp.schema.oaf;
|
||||||
|
|
||||||
import com.fasterxml.jackson.annotation.JsonIgnore;
|
import com.fasterxml.jackson.annotation.JsonIgnore;
|
||||||
import org.apache.commons.lang3.StringUtils;
|
|
||||||
|
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
|
import org.apache.commons.lang3.StringUtils;
|
||||||
|
|
||||||
public class GeoLocation implements Serializable {
|
public class GeoLocation implements Serializable {
|
||||||
|
|
||||||
|
@ -39,13 +38,17 @@ public class GeoLocation implements Serializable {
|
||||||
|
|
||||||
@JsonIgnore
|
@JsonIgnore
|
||||||
public boolean isBlank() {
|
public boolean isBlank() {
|
||||||
return StringUtils.isBlank(point) &&
|
return StringUtils.isBlank(point) && StringUtils.isBlank(box) && StringUtils.isBlank(place);
|
||||||
StringUtils.isBlank(box) &&
|
|
||||||
StringUtils.isBlank(place);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public String toComparableString() {
|
public String toComparableString() {
|
||||||
return isBlank()?"":String.format("%s::%s%s", point != null ? point.toLowerCase() : "", box != null ? box.toLowerCase() : "", place != null ? place.toLowerCase() : "");
|
return isBlank()
|
||||||
|
? ""
|
||||||
|
: String.format(
|
||||||
|
"%s::%s%s",
|
||||||
|
point != null ? point.toLowerCase() : "",
|
||||||
|
box != null ? box.toLowerCase() : "",
|
||||||
|
place != null ? place.toLowerCase() : "");
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -55,16 +58,12 @@ public class GeoLocation implements Serializable {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean equals(Object obj) {
|
public boolean equals(Object obj) {
|
||||||
if (this == obj)
|
if (this == obj) return true;
|
||||||
return true;
|
if (obj == null) return false;
|
||||||
if (obj == null)
|
if (getClass() != obj.getClass()) return false;
|
||||||
return false;
|
|
||||||
if (getClass() != obj.getClass())
|
|
||||||
return false;
|
|
||||||
|
|
||||||
GeoLocation other = (GeoLocation) obj;
|
GeoLocation other = (GeoLocation) obj;
|
||||||
|
|
||||||
return toComparableString()
|
return toComparableString().equals(other.toComparableString());
|
||||||
.equals(other.toComparableString());
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -22,10 +22,12 @@ public class Instance implements Serializable {
|
||||||
|
|
||||||
private Field<String> dateofacceptance;
|
private Field<String> dateofacceptance;
|
||||||
|
|
||||||
// ( article | book ) processing charges. Defined here to cope with possible wrongly typed results
|
// ( article | book ) processing charges. Defined here to cope with possible wrongly typed
|
||||||
|
// results
|
||||||
private Field<String> processingchargeamount;
|
private Field<String> processingchargeamount;
|
||||||
|
|
||||||
// currency - alphabetic code describe in ISO-4217. Defined here to cope with possible wrongly typed results
|
// currency - alphabetic code describe in ISO-4217. Defined here to cope with possible wrongly
|
||||||
|
// typed results
|
||||||
private Field<String> processingchargecurrency;
|
private Field<String> processingchargecurrency;
|
||||||
|
|
||||||
private Field<String> refereed; // peer-review status
|
private Field<String> refereed; // peer-review status
|
||||||
|
@ -119,10 +121,17 @@ public class Instance implements Serializable {
|
||||||
}
|
}
|
||||||
|
|
||||||
public String toComparableString() {
|
public String toComparableString() {
|
||||||
return String.format("%s::%s::%s::%s",
|
return String.format(
|
||||||
hostedby != null && hostedby.getKey()!= null ? hostedby.getKey().toLowerCase() : "",
|
"%s::%s::%s::%s",
|
||||||
accessright!= null && accessright.getClassid()!= null ? accessright.getClassid() : "",
|
hostedby != null && hostedby.getKey() != null
|
||||||
instancetype!= null && instancetype.getClassid()!= null ? instancetype.getClassid() : "",
|
? hostedby.getKey().toLowerCase()
|
||||||
|
: "",
|
||||||
|
accessright != null && accessright.getClassid() != null
|
||||||
|
? accessright.getClassid()
|
||||||
|
: "",
|
||||||
|
instancetype != null && instancetype.getClassid() != null
|
||||||
|
? instancetype.getClassid()
|
||||||
|
: "",
|
||||||
url != null ? url : "");
|
url != null ? url : "");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -133,16 +142,12 @@ public class Instance implements Serializable {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean equals(Object obj) {
|
public boolean equals(Object obj) {
|
||||||
if (this == obj)
|
if (this == obj) return true;
|
||||||
return true;
|
if (obj == null) return false;
|
||||||
if (obj == null)
|
if (getClass() != obj.getClass()) return false;
|
||||||
return false;
|
|
||||||
if (getClass() != obj.getClass())
|
|
||||||
return false;
|
|
||||||
|
|
||||||
Instance other = (Instance) obj;
|
Instance other = (Instance) obj;
|
||||||
|
|
||||||
return toComparableString()
|
return toComparableString().equals(other.toComparableString());
|
||||||
.equals(other.toComparableString());
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -130,22 +130,34 @@ public class Journal implements Serializable {
|
||||||
if (this == o) return true;
|
if (this == o) return true;
|
||||||
if (o == null || getClass() != o.getClass()) return false;
|
if (o == null || getClass() != o.getClass()) return false;
|
||||||
Journal journal = (Journal) o;
|
Journal journal = (Journal) o;
|
||||||
return Objects.equals(name, journal.name) &&
|
return Objects.equals(name, journal.name)
|
||||||
Objects.equals(issnPrinted, journal.issnPrinted) &&
|
&& Objects.equals(issnPrinted, journal.issnPrinted)
|
||||||
Objects.equals(issnOnline, journal.issnOnline) &&
|
&& Objects.equals(issnOnline, journal.issnOnline)
|
||||||
Objects.equals(issnLinking, journal.issnLinking) &&
|
&& Objects.equals(issnLinking, journal.issnLinking)
|
||||||
Objects.equals(ep, journal.ep) &&
|
&& Objects.equals(ep, journal.ep)
|
||||||
Objects.equals(iss, journal.iss) &&
|
&& Objects.equals(iss, journal.iss)
|
||||||
Objects.equals(sp, journal.sp) &&
|
&& Objects.equals(sp, journal.sp)
|
||||||
Objects.equals(vol, journal.vol) &&
|
&& Objects.equals(vol, journal.vol)
|
||||||
Objects.equals(edition, journal.edition) &&
|
&& Objects.equals(edition, journal.edition)
|
||||||
Objects.equals(conferenceplace, journal.conferenceplace) &&
|
&& Objects.equals(conferenceplace, journal.conferenceplace)
|
||||||
Objects.equals(conferencedate, journal.conferencedate) &&
|
&& Objects.equals(conferencedate, journal.conferencedate)
|
||||||
Objects.equals(dataInfo, journal.dataInfo);
|
&& Objects.equals(dataInfo, journal.dataInfo);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int hashCode() {
|
public int hashCode() {
|
||||||
return Objects.hash(name, issnPrinted, issnOnline, issnLinking, ep, iss, sp, vol, edition, conferenceplace, conferencedate, dataInfo);
|
return Objects.hash(
|
||||||
|
name,
|
||||||
|
issnPrinted,
|
||||||
|
issnOnline,
|
||||||
|
issnLinking,
|
||||||
|
ep,
|
||||||
|
iss,
|
||||||
|
sp,
|
||||||
|
vol,
|
||||||
|
edition,
|
||||||
|
conferenceplace,
|
||||||
|
conferencedate,
|
||||||
|
dataInfo);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,9 +1,8 @@
|
||||||
package eu.dnetlib.dhp.schema.oaf;
|
package eu.dnetlib.dhp.schema.oaf;
|
||||||
|
|
||||||
import com.fasterxml.jackson.annotation.JsonIgnore;
|
import com.fasterxml.jackson.annotation.JsonIgnore;
|
||||||
import org.apache.commons.lang3.StringUtils;
|
|
||||||
|
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
|
import org.apache.commons.lang3.StringUtils;
|
||||||
|
|
||||||
public class KeyValue implements Serializable {
|
public class KeyValue implements Serializable {
|
||||||
|
|
||||||
|
@ -38,7 +37,12 @@ public class KeyValue implements Serializable {
|
||||||
}
|
}
|
||||||
|
|
||||||
public String toComparableString() {
|
public String toComparableString() {
|
||||||
return isBlank()?"":String.format("%s::%s", key != null ? key.toLowerCase() : "", value != null ? value.toLowerCase() : "");
|
return isBlank()
|
||||||
|
? ""
|
||||||
|
: String.format(
|
||||||
|
"%s::%s",
|
||||||
|
key != null ? key.toLowerCase() : "",
|
||||||
|
value != null ? value.toLowerCase() : "");
|
||||||
}
|
}
|
||||||
|
|
||||||
@JsonIgnore
|
@JsonIgnore
|
||||||
|
@ -53,12 +57,9 @@ public class KeyValue implements Serializable {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean equals(Object obj) {
|
public boolean equals(Object obj) {
|
||||||
if (this == obj)
|
if (this == obj) return true;
|
||||||
return true;
|
if (obj == null) return false;
|
||||||
if (obj == null)
|
if (getClass() != obj.getClass()) return false;
|
||||||
return false;
|
|
||||||
if (getClass() != obj.getClass())
|
|
||||||
return false;
|
|
||||||
|
|
||||||
KeyValue other = (KeyValue) obj;
|
KeyValue other = (KeyValue) obj;
|
||||||
|
|
||||||
|
|
|
@ -25,24 +25,18 @@ public abstract class Oaf implements Serializable {
|
||||||
this.lastupdatetimestamp = lastupdatetimestamp;
|
this.lastupdatetimestamp = lastupdatetimestamp;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public void mergeOAFDataInfo(Oaf e) {
|
public void mergeOAFDataInfo(Oaf e) {
|
||||||
if (e.getDataInfo()!= null && compareTrust(this,e)<0)
|
if (e.getDataInfo() != null && compareTrust(this, e) < 0) dataInfo = e.getDataInfo();
|
||||||
dataInfo = e.getDataInfo();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
protected String extractTrust(Oaf e) {
|
protected String extractTrust(Oaf e) {
|
||||||
if (e == null || e.getDataInfo() == null || e.getDataInfo().getTrust() == null)
|
if (e == null || e.getDataInfo() == null || e.getDataInfo().getTrust() == null)
|
||||||
return "0.0";
|
return "0.0";
|
||||||
return e.getDataInfo().getTrust();
|
return e.getDataInfo().getTrust();
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
protected int compareTrust(Oaf a, Oaf b) {
|
protected int compareTrust(Oaf a, Oaf b) {
|
||||||
return extractTrust(a).compareTo(extractTrust(b));
|
return extractTrust(a).compareTo(extractTrust(b));
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -50,8 +44,8 @@ public abstract class Oaf implements Serializable {
|
||||||
if (this == o) return true;
|
if (this == o) return true;
|
||||||
if (o == null || getClass() != o.getClass()) return false;
|
if (o == null || getClass() != o.getClass()) return false;
|
||||||
Oaf oaf = (Oaf) o;
|
Oaf oaf = (Oaf) o;
|
||||||
return Objects.equals(dataInfo, oaf.dataInfo) &&
|
return Objects.equals(dataInfo, oaf.dataInfo)
|
||||||
Objects.equals(lastupdatetimestamp, oaf.lastupdatetimestamp);
|
&& Objects.equals(lastupdatetimestamp, oaf.lastupdatetimestamp);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -86,11 +86,9 @@ public abstract class OafEntity extends Oaf implements Serializable {
|
||||||
this.oaiprovenance = oaiprovenance;
|
this.oaiprovenance = oaiprovenance;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public void mergeFrom(OafEntity e) {
|
public void mergeFrom(OafEntity e) {
|
||||||
|
|
||||||
if (e == null)
|
if (e == null) return;
|
||||||
return;
|
|
||||||
|
|
||||||
originalId = mergeLists(originalId, e.getOriginalId());
|
originalId = mergeLists(originalId, e.getOriginalId());
|
||||||
|
|
||||||
|
@ -108,12 +106,15 @@ public abstract class OafEntity extends Oaf implements Serializable {
|
||||||
|
|
||||||
if (e.getOaiprovenance() != null && compareTrust(this, e) < 0)
|
if (e.getOaiprovenance() != null && compareTrust(this, e) < 0)
|
||||||
oaiprovenance = e.getOaiprovenance();
|
oaiprovenance = e.getOaiprovenance();
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
protected <T> List<T> mergeLists(final List<T>... lists) {
|
protected <T> List<T> mergeLists(final List<T>... lists) {
|
||||||
|
|
||||||
return Arrays.stream(lists).filter(Objects::nonNull).flatMap(List::stream).distinct().collect(Collectors.toList());
|
return Arrays.stream(lists)
|
||||||
|
.filter(Objects::nonNull)
|
||||||
|
.flatMap(List::stream)
|
||||||
|
.distinct()
|
||||||
|
.collect(Collectors.toList());
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -122,18 +123,27 @@ public abstract class OafEntity extends Oaf implements Serializable {
|
||||||
if (o == null || getClass() != o.getClass()) return false;
|
if (o == null || getClass() != o.getClass()) return false;
|
||||||
if (!super.equals(o)) return false;
|
if (!super.equals(o)) return false;
|
||||||
OafEntity oafEntity = (OafEntity) o;
|
OafEntity oafEntity = (OafEntity) o;
|
||||||
return Objects.equals(id, oafEntity.id) &&
|
return Objects.equals(id, oafEntity.id)
|
||||||
Objects.equals(originalId, oafEntity.originalId) &&
|
&& Objects.equals(originalId, oafEntity.originalId)
|
||||||
Objects.equals(collectedfrom, oafEntity.collectedfrom) &&
|
&& Objects.equals(collectedfrom, oafEntity.collectedfrom)
|
||||||
Objects.equals(pid, oafEntity.pid) &&
|
&& Objects.equals(pid, oafEntity.pid)
|
||||||
Objects.equals(dateofcollection, oafEntity.dateofcollection) &&
|
&& Objects.equals(dateofcollection, oafEntity.dateofcollection)
|
||||||
Objects.equals(dateoftransformation, oafEntity.dateoftransformation) &&
|
&& Objects.equals(dateoftransformation, oafEntity.dateoftransformation)
|
||||||
Objects.equals(extraInfo, oafEntity.extraInfo) &&
|
&& Objects.equals(extraInfo, oafEntity.extraInfo)
|
||||||
Objects.equals(oaiprovenance, oafEntity.oaiprovenance);
|
&& Objects.equals(oaiprovenance, oafEntity.oaiprovenance);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int hashCode() {
|
public int hashCode() {
|
||||||
return Objects.hash(super.hashCode(), id, originalId, collectedfrom, pid, dateofcollection, dateoftransformation, extraInfo, oaiprovenance);
|
return Objects.hash(
|
||||||
|
super.hashCode(),
|
||||||
|
id,
|
||||||
|
originalId,
|
||||||
|
collectedfrom,
|
||||||
|
pid,
|
||||||
|
dateofcollection,
|
||||||
|
dateoftransformation,
|
||||||
|
extraInfo,
|
||||||
|
oaiprovenance);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -122,7 +122,8 @@ public class Organization extends OafEntity implements Serializable {
|
||||||
return ecinternationalorganizationeurinterests;
|
return ecinternationalorganizationeurinterests;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setEcinternationalorganizationeurinterests(Field<String> ecinternationalorganizationeurinterests) {
|
public void setEcinternationalorganizationeurinterests(
|
||||||
|
Field<String> ecinternationalorganizationeurinterests) {
|
||||||
this.ecinternationalorganizationeurinterests = ecinternationalorganizationeurinterests;
|
this.ecinternationalorganizationeurinterests = ecinternationalorganizationeurinterests;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -166,7 +167,6 @@ public class Organization extends OafEntity implements Serializable {
|
||||||
this.country = country;
|
this.country = country;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void mergeFrom(OafEntity e) {
|
public void mergeFrom(OafEntity e) {
|
||||||
super.mergeFrom(e);
|
super.mergeFrom(e);
|
||||||
|
@ -176,21 +176,60 @@ public class Organization extends OafEntity implements Serializable {
|
||||||
}
|
}
|
||||||
|
|
||||||
final Organization o = (Organization) e;
|
final Organization o = (Organization) e;
|
||||||
legalshortname = o.getLegalshortname() != null && compareTrust(this, e)<0? o.getLegalshortname() : legalshortname;
|
legalshortname =
|
||||||
legalname = o.getLegalname() != null && compareTrust(this, e)<0 ? o.getLegalname() : legalname;
|
o.getLegalshortname() != null && compareTrust(this, e) < 0
|
||||||
|
? o.getLegalshortname()
|
||||||
|
: legalshortname;
|
||||||
|
legalname =
|
||||||
|
o.getLegalname() != null && compareTrust(this, e) < 0
|
||||||
|
? o.getLegalname()
|
||||||
|
: legalname;
|
||||||
alternativeNames = mergeLists(o.getAlternativeNames(), alternativeNames);
|
alternativeNames = mergeLists(o.getAlternativeNames(), alternativeNames);
|
||||||
websiteurl = o.getWebsiteurl() != null && compareTrust(this, e)<0? o.getWebsiteurl() : websiteurl;
|
websiteurl =
|
||||||
|
o.getWebsiteurl() != null && compareTrust(this, e) < 0
|
||||||
|
? o.getWebsiteurl()
|
||||||
|
: websiteurl;
|
||||||
logourl = o.getLogourl() != null && compareTrust(this, e) < 0 ? o.getLogourl() : logourl;
|
logourl = o.getLogourl() != null && compareTrust(this, e) < 0 ? o.getLogourl() : logourl;
|
||||||
eclegalbody = o.getEclegalbody() != null && compareTrust(this, e)<0? o.getEclegalbody() : eclegalbody;
|
eclegalbody =
|
||||||
eclegalperson = o.getEclegalperson() != null && compareTrust(this, e)<0? o.getEclegalperson() : eclegalperson;
|
o.getEclegalbody() != null && compareTrust(this, e) < 0
|
||||||
ecnonprofit = o.getEcnonprofit() != null && compareTrust(this, e)<0? o.getEcnonprofit() : ecnonprofit;
|
? o.getEclegalbody()
|
||||||
ecresearchorganization = o.getEcresearchorganization() != null && compareTrust(this, e)<0? o.getEcresearchorganization() : ecresearchorganization;
|
: eclegalbody;
|
||||||
echighereducation = o.getEchighereducation() != null && compareTrust(this, e)<0? o.getEchighereducation() : echighereducation;
|
eclegalperson =
|
||||||
ecinternationalorganizationeurinterests = o.getEcinternationalorganizationeurinterests() != null && compareTrust(this, e)<0? o.getEcinternationalorganizationeurinterests() : ecinternationalorganizationeurinterests;
|
o.getEclegalperson() != null && compareTrust(this, e) < 0
|
||||||
ecinternationalorganization = o.getEcinternationalorganization() != null && compareTrust(this, e)<0? o.getEcinternationalorganization() : ecinternationalorganization;
|
? o.getEclegalperson()
|
||||||
ecenterprise = o.getEcenterprise() != null && compareTrust(this, e)<0? o.getEcenterprise() :ecenterprise;
|
: eclegalperson;
|
||||||
ecsmevalidated = o.getEcsmevalidated() != null && compareTrust(this, e)<0? o.getEcsmevalidated() :ecsmevalidated;
|
ecnonprofit =
|
||||||
ecnutscode = o.getEcnutscode() != null && compareTrust(this, e)<0? o.getEcnutscode() :ecnutscode;
|
o.getEcnonprofit() != null && compareTrust(this, e) < 0
|
||||||
|
? o.getEcnonprofit()
|
||||||
|
: ecnonprofit;
|
||||||
|
ecresearchorganization =
|
||||||
|
o.getEcresearchorganization() != null && compareTrust(this, e) < 0
|
||||||
|
? o.getEcresearchorganization()
|
||||||
|
: ecresearchorganization;
|
||||||
|
echighereducation =
|
||||||
|
o.getEchighereducation() != null && compareTrust(this, e) < 0
|
||||||
|
? o.getEchighereducation()
|
||||||
|
: echighereducation;
|
||||||
|
ecinternationalorganizationeurinterests =
|
||||||
|
o.getEcinternationalorganizationeurinterests() != null && compareTrust(this, e) < 0
|
||||||
|
? o.getEcinternationalorganizationeurinterests()
|
||||||
|
: ecinternationalorganizationeurinterests;
|
||||||
|
ecinternationalorganization =
|
||||||
|
o.getEcinternationalorganization() != null && compareTrust(this, e) < 0
|
||||||
|
? o.getEcinternationalorganization()
|
||||||
|
: ecinternationalorganization;
|
||||||
|
ecenterprise =
|
||||||
|
o.getEcenterprise() != null && compareTrust(this, e) < 0
|
||||||
|
? o.getEcenterprise()
|
||||||
|
: ecenterprise;
|
||||||
|
ecsmevalidated =
|
||||||
|
o.getEcsmevalidated() != null && compareTrust(this, e) < 0
|
||||||
|
? o.getEcsmevalidated()
|
||||||
|
: ecsmevalidated;
|
||||||
|
ecnutscode =
|
||||||
|
o.getEcnutscode() != null && compareTrust(this, e) < 0
|
||||||
|
? o.getEcnutscode()
|
||||||
|
: ecnutscode;
|
||||||
country = o.getCountry() != null && compareTrust(this, e) < 0 ? o.getCountry() : country;
|
country = o.getCountry() != null && compareTrust(this, e) < 0 ? o.getCountry() : country;
|
||||||
mergeOAFDataInfo(o);
|
mergeOAFDataInfo(o);
|
||||||
}
|
}
|
||||||
|
@ -201,26 +240,45 @@ public class Organization extends OafEntity implements Serializable {
|
||||||
if (o == null || getClass() != o.getClass()) return false;
|
if (o == null || getClass() != o.getClass()) return false;
|
||||||
if (!super.equals(o)) return false;
|
if (!super.equals(o)) return false;
|
||||||
Organization that = (Organization) o;
|
Organization that = (Organization) o;
|
||||||
return Objects.equals(legalshortname, that.legalshortname) &&
|
return Objects.equals(legalshortname, that.legalshortname)
|
||||||
Objects.equals(legalname, that.legalname) &&
|
&& Objects.equals(legalname, that.legalname)
|
||||||
Objects.equals(alternativeNames, that.alternativeNames) &&
|
&& Objects.equals(alternativeNames, that.alternativeNames)
|
||||||
Objects.equals(websiteurl, that.websiteurl) &&
|
&& Objects.equals(websiteurl, that.websiteurl)
|
||||||
Objects.equals(logourl, that.logourl) &&
|
&& Objects.equals(logourl, that.logourl)
|
||||||
Objects.equals(eclegalbody, that.eclegalbody) &&
|
&& Objects.equals(eclegalbody, that.eclegalbody)
|
||||||
Objects.equals(eclegalperson, that.eclegalperson) &&
|
&& Objects.equals(eclegalperson, that.eclegalperson)
|
||||||
Objects.equals(ecnonprofit, that.ecnonprofit) &&
|
&& Objects.equals(ecnonprofit, that.ecnonprofit)
|
||||||
Objects.equals(ecresearchorganization, that.ecresearchorganization) &&
|
&& Objects.equals(ecresearchorganization, that.ecresearchorganization)
|
||||||
Objects.equals(echighereducation, that.echighereducation) &&
|
&& Objects.equals(echighereducation, that.echighereducation)
|
||||||
Objects.equals(ecinternationalorganizationeurinterests, that.ecinternationalorganizationeurinterests) &&
|
&& Objects.equals(
|
||||||
Objects.equals(ecinternationalorganization, that.ecinternationalorganization) &&
|
ecinternationalorganizationeurinterests,
|
||||||
Objects.equals(ecenterprise, that.ecenterprise) &&
|
that.ecinternationalorganizationeurinterests)
|
||||||
Objects.equals(ecsmevalidated, that.ecsmevalidated) &&
|
&& Objects.equals(ecinternationalorganization, that.ecinternationalorganization)
|
||||||
Objects.equals(ecnutscode, that.ecnutscode) &&
|
&& Objects.equals(ecenterprise, that.ecenterprise)
|
||||||
Objects.equals(country, that.country);
|
&& Objects.equals(ecsmevalidated, that.ecsmevalidated)
|
||||||
|
&& Objects.equals(ecnutscode, that.ecnutscode)
|
||||||
|
&& Objects.equals(country, that.country);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int hashCode() {
|
public int hashCode() {
|
||||||
return Objects.hash(super.hashCode(), legalshortname, legalname, alternativeNames, websiteurl, logourl, eclegalbody, eclegalperson, ecnonprofit, ecresearchorganization, echighereducation, ecinternationalorganizationeurinterests, ecinternationalorganization, ecenterprise, ecsmevalidated, ecnutscode, country);
|
return Objects.hash(
|
||||||
|
super.hashCode(),
|
||||||
|
legalshortname,
|
||||||
|
legalname,
|
||||||
|
alternativeNames,
|
||||||
|
websiteurl,
|
||||||
|
logourl,
|
||||||
|
eclegalbody,
|
||||||
|
eclegalperson,
|
||||||
|
ecnonprofit,
|
||||||
|
ecresearchorganization,
|
||||||
|
echighereducation,
|
||||||
|
ecinternationalorganizationeurinterests,
|
||||||
|
ecinternationalorganization,
|
||||||
|
ecenterprise,
|
||||||
|
ecsmevalidated,
|
||||||
|
ecnutscode,
|
||||||
|
country);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -70,16 +70,17 @@ public class OriginDescription implements Serializable {
|
||||||
if (this == o) return true;
|
if (this == o) return true;
|
||||||
if (o == null || getClass() != o.getClass()) return false;
|
if (o == null || getClass() != o.getClass()) return false;
|
||||||
OriginDescription that = (OriginDescription) o;
|
OriginDescription that = (OriginDescription) o;
|
||||||
return Objects.equals(harvestDate, that.harvestDate) &&
|
return Objects.equals(harvestDate, that.harvestDate)
|
||||||
Objects.equals(altered, that.altered) &&
|
&& Objects.equals(altered, that.altered)
|
||||||
Objects.equals(baseURL, that.baseURL) &&
|
&& Objects.equals(baseURL, that.baseURL)
|
||||||
Objects.equals(identifier, that.identifier) &&
|
&& Objects.equals(identifier, that.identifier)
|
||||||
Objects.equals(datestamp, that.datestamp) &&
|
&& Objects.equals(datestamp, that.datestamp)
|
||||||
Objects.equals(metadataNamespace, that.metadataNamespace);
|
&& Objects.equals(metadataNamespace, that.metadataNamespace);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int hashCode() {
|
public int hashCode() {
|
||||||
return Objects.hash(harvestDate, altered, baseURL, identifier, datestamp, metadataNamespace);
|
return Objects.hash(
|
||||||
|
harvestDate, altered, baseURL, identifier, datestamp, metadataNamespace);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -58,9 +58,9 @@ public class OtherResearchProduct extends Result implements Serializable {
|
||||||
if (o == null || getClass() != o.getClass()) return false;
|
if (o == null || getClass() != o.getClass()) return false;
|
||||||
if (!super.equals(o)) return false;
|
if (!super.equals(o)) return false;
|
||||||
OtherResearchProduct that = (OtherResearchProduct) o;
|
OtherResearchProduct that = (OtherResearchProduct) o;
|
||||||
return Objects.equals(contactperson, that.contactperson) &&
|
return Objects.equals(contactperson, that.contactperson)
|
||||||
Objects.equals(contactgroup, that.contactgroup) &&
|
&& Objects.equals(contactgroup, that.contactgroup)
|
||||||
Objects.equals(tool, that.tool);
|
&& Objects.equals(tool, that.tool);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -266,7 +266,6 @@ public class Project extends OafEntity implements Serializable {
|
||||||
this.fundedamount = fundedamount;
|
this.fundedamount = fundedamount;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void mergeFrom(OafEntity e) {
|
public void mergeFrom(OafEntity e) {
|
||||||
super.mergeFrom(e);
|
super.mergeFrom(e);
|
||||||
|
@ -277,32 +276,80 @@ public class Project extends OafEntity implements Serializable {
|
||||||
|
|
||||||
Project p = (Project) e;
|
Project p = (Project) e;
|
||||||
|
|
||||||
websiteurl= p.getWebsiteurl()!= null && compareTrust(this,e)<0?p.getWebsiteurl():websiteurl;
|
websiteurl =
|
||||||
|
p.getWebsiteurl() != null && compareTrust(this, e) < 0
|
||||||
|
? p.getWebsiteurl()
|
||||||
|
: websiteurl;
|
||||||
code = p.getCode() != null && compareTrust(this, e) < 0 ? p.getCode() : code;
|
code = p.getCode() != null && compareTrust(this, e) < 0 ? p.getCode() : code;
|
||||||
acronym = p.getAcronym() != null && compareTrust(this, e) < 0 ? p.getAcronym() : acronym;
|
acronym = p.getAcronym() != null && compareTrust(this, e) < 0 ? p.getAcronym() : acronym;
|
||||||
title = p.getTitle() != null && compareTrust(this, e) < 0 ? p.getTitle() : title;
|
title = p.getTitle() != null && compareTrust(this, e) < 0 ? p.getTitle() : title;
|
||||||
startdate= p.getStartdate()!=null && compareTrust(this,e)<0?p.getStartdate():startdate;
|
startdate =
|
||||||
|
p.getStartdate() != null && compareTrust(this, e) < 0
|
||||||
|
? p.getStartdate()
|
||||||
|
: startdate;
|
||||||
enddate = p.getEnddate() != null && compareTrust(this, e) < 0 ? p.getEnddate() : enddate;
|
enddate = p.getEnddate() != null && compareTrust(this, e) < 0 ? p.getEnddate() : enddate;
|
||||||
callidentifier= p.getCallidentifier()!=null && compareTrust(this,e)<0?p.getCallidentifier():callidentifier;
|
callidentifier =
|
||||||
keywords= p.getKeywords()!=null && compareTrust(this,e)<0?p.getKeywords():keywords;
|
p.getCallidentifier() != null && compareTrust(this, e) < 0
|
||||||
duration= p.getDuration()!=null && compareTrust(this,e)<0?p.getDuration():duration;
|
? p.getCallidentifier()
|
||||||
|
: callidentifier;
|
||||||
|
keywords =
|
||||||
|
p.getKeywords() != null && compareTrust(this, e) < 0 ? p.getKeywords() : keywords;
|
||||||
|
duration =
|
||||||
|
p.getDuration() != null && compareTrust(this, e) < 0 ? p.getDuration() : duration;
|
||||||
ecsc39 = p.getEcsc39() != null && compareTrust(this, e) < 0 ? p.getEcsc39() : ecsc39;
|
ecsc39 = p.getEcsc39() != null && compareTrust(this, e) < 0 ? p.getEcsc39() : ecsc39;
|
||||||
oamandatepublications= p.getOamandatepublications()!=null && compareTrust(this,e)<0?p.getOamandatepublications():oamandatepublications;
|
oamandatepublications =
|
||||||
ecarticle29_3= p.getEcarticle29_3()!=null && compareTrust(this,e)<0?p.getEcarticle29_3():ecarticle29_3;
|
p.getOamandatepublications() != null && compareTrust(this, e) < 0
|
||||||
|
? p.getOamandatepublications()
|
||||||
|
: oamandatepublications;
|
||||||
|
ecarticle29_3 =
|
||||||
|
p.getEcarticle29_3() != null && compareTrust(this, e) < 0
|
||||||
|
? p.getEcarticle29_3()
|
||||||
|
: ecarticle29_3;
|
||||||
subjects = mergeLists(subjects, p.getSubjects());
|
subjects = mergeLists(subjects, p.getSubjects());
|
||||||
fundingtree = mergeLists(fundingtree, p.getFundingtree());
|
fundingtree = mergeLists(fundingtree, p.getFundingtree());
|
||||||
contracttype= p.getContracttype()!=null && compareTrust(this,e)<0?p.getContracttype():contracttype;
|
contracttype =
|
||||||
optional1= p.getOptional1()!=null && compareTrust(this,e)<0?p.getOptional1():optional1;
|
p.getContracttype() != null && compareTrust(this, e) < 0
|
||||||
optional2= p.getOptional2()!=null && compareTrust(this,e)<0?p.getOptional2():optional2;
|
? p.getContracttype()
|
||||||
jsonextrainfo= p.getJsonextrainfo()!=null && compareTrust(this,e)<0?p.getJsonextrainfo():jsonextrainfo;
|
: contracttype;
|
||||||
contactfullname= p.getContactfullname()!=null && compareTrust(this,e)<0?p.getContactfullname():contactfullname;
|
optional1 =
|
||||||
contactfax= p.getContactfax()!=null && compareTrust(this,e)<0?p.getContactfax():contactfax;
|
p.getOptional1() != null && compareTrust(this, e) < 0
|
||||||
contactphone= p.getContactphone()!=null && compareTrust(this,e)<0?p.getContactphone():contactphone;
|
? p.getOptional1()
|
||||||
contactemail= p.getContactemail()!=null && compareTrust(this,e)<0?p.getContactemail():contactemail;
|
: optional1;
|
||||||
|
optional2 =
|
||||||
|
p.getOptional2() != null && compareTrust(this, e) < 0
|
||||||
|
? p.getOptional2()
|
||||||
|
: optional2;
|
||||||
|
jsonextrainfo =
|
||||||
|
p.getJsonextrainfo() != null && compareTrust(this, e) < 0
|
||||||
|
? p.getJsonextrainfo()
|
||||||
|
: jsonextrainfo;
|
||||||
|
contactfullname =
|
||||||
|
p.getContactfullname() != null && compareTrust(this, e) < 0
|
||||||
|
? p.getContactfullname()
|
||||||
|
: contactfullname;
|
||||||
|
contactfax =
|
||||||
|
p.getContactfax() != null && compareTrust(this, e) < 0
|
||||||
|
? p.getContactfax()
|
||||||
|
: contactfax;
|
||||||
|
contactphone =
|
||||||
|
p.getContactphone() != null && compareTrust(this, e) < 0
|
||||||
|
? p.getContactphone()
|
||||||
|
: contactphone;
|
||||||
|
contactemail =
|
||||||
|
p.getContactemail() != null && compareTrust(this, e) < 0
|
||||||
|
? p.getContactemail()
|
||||||
|
: contactemail;
|
||||||
summary = p.getSummary() != null && compareTrust(this, e) < 0 ? p.getSummary() : summary;
|
summary = p.getSummary() != null && compareTrust(this, e) < 0 ? p.getSummary() : summary;
|
||||||
currency= p.getCurrency()!=null && compareTrust(this,e)<0?p.getCurrency():currency;
|
currency =
|
||||||
totalcost= p.getTotalcost()!=null && compareTrust(this,e)<0?p.getTotalcost():totalcost;
|
p.getCurrency() != null && compareTrust(this, e) < 0 ? p.getCurrency() : currency;
|
||||||
fundedamount= p.getFundedamount()!= null && compareTrust(this,e)<0?p.getFundedamount():fundedamount;
|
totalcost =
|
||||||
|
p.getTotalcost() != null && compareTrust(this, e) < 0
|
||||||
|
? p.getTotalcost()
|
||||||
|
: totalcost;
|
||||||
|
fundedamount =
|
||||||
|
p.getFundedamount() != null && compareTrust(this, e) < 0
|
||||||
|
? p.getFundedamount()
|
||||||
|
: fundedamount;
|
||||||
mergeOAFDataInfo(e);
|
mergeOAFDataInfo(e);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -312,36 +359,63 @@ public class Project extends OafEntity implements Serializable {
|
||||||
if (o == null || getClass() != o.getClass()) return false;
|
if (o == null || getClass() != o.getClass()) return false;
|
||||||
if (!super.equals(o)) return false;
|
if (!super.equals(o)) return false;
|
||||||
Project project = (Project) o;
|
Project project = (Project) o;
|
||||||
return Objects.equals(websiteurl, project.websiteurl) &&
|
return Objects.equals(websiteurl, project.websiteurl)
|
||||||
Objects.equals(code, project.code) &&
|
&& Objects.equals(code, project.code)
|
||||||
Objects.equals(acronym, project.acronym) &&
|
&& Objects.equals(acronym, project.acronym)
|
||||||
Objects.equals(title, project.title) &&
|
&& Objects.equals(title, project.title)
|
||||||
Objects.equals(startdate, project.startdate) &&
|
&& Objects.equals(startdate, project.startdate)
|
||||||
Objects.equals(enddate, project.enddate) &&
|
&& Objects.equals(enddate, project.enddate)
|
||||||
Objects.equals(callidentifier, project.callidentifier) &&
|
&& Objects.equals(callidentifier, project.callidentifier)
|
||||||
Objects.equals(keywords, project.keywords) &&
|
&& Objects.equals(keywords, project.keywords)
|
||||||
Objects.equals(duration, project.duration) &&
|
&& Objects.equals(duration, project.duration)
|
||||||
Objects.equals(ecsc39, project.ecsc39) &&
|
&& Objects.equals(ecsc39, project.ecsc39)
|
||||||
Objects.equals(oamandatepublications, project.oamandatepublications) &&
|
&& Objects.equals(oamandatepublications, project.oamandatepublications)
|
||||||
Objects.equals(ecarticle29_3, project.ecarticle29_3) &&
|
&& Objects.equals(ecarticle29_3, project.ecarticle29_3)
|
||||||
Objects.equals(subjects, project.subjects) &&
|
&& Objects.equals(subjects, project.subjects)
|
||||||
Objects.equals(fundingtree, project.fundingtree) &&
|
&& Objects.equals(fundingtree, project.fundingtree)
|
||||||
Objects.equals(contracttype, project.contracttype) &&
|
&& Objects.equals(contracttype, project.contracttype)
|
||||||
Objects.equals(optional1, project.optional1) &&
|
&& Objects.equals(optional1, project.optional1)
|
||||||
Objects.equals(optional2, project.optional2) &&
|
&& Objects.equals(optional2, project.optional2)
|
||||||
Objects.equals(jsonextrainfo, project.jsonextrainfo) &&
|
&& Objects.equals(jsonextrainfo, project.jsonextrainfo)
|
||||||
Objects.equals(contactfullname, project.contactfullname) &&
|
&& Objects.equals(contactfullname, project.contactfullname)
|
||||||
Objects.equals(contactfax, project.contactfax) &&
|
&& Objects.equals(contactfax, project.contactfax)
|
||||||
Objects.equals(contactphone, project.contactphone) &&
|
&& Objects.equals(contactphone, project.contactphone)
|
||||||
Objects.equals(contactemail, project.contactemail) &&
|
&& Objects.equals(contactemail, project.contactemail)
|
||||||
Objects.equals(summary, project.summary) &&
|
&& Objects.equals(summary, project.summary)
|
||||||
Objects.equals(currency, project.currency) &&
|
&& Objects.equals(currency, project.currency)
|
||||||
Objects.equals(totalcost, project.totalcost) &&
|
&& Objects.equals(totalcost, project.totalcost)
|
||||||
Objects.equals(fundedamount, project.fundedamount);
|
&& Objects.equals(fundedamount, project.fundedamount);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int hashCode() {
|
public int hashCode() {
|
||||||
return Objects.hash(super.hashCode(), websiteurl, code, acronym, title, startdate, enddate, callidentifier, keywords, duration, ecsc39, oamandatepublications, ecarticle29_3, subjects, fundingtree, contracttype, optional1, optional2, jsonextrainfo, contactfullname, contactfax, contactphone, contactemail, summary, currency, totalcost, fundedamount);
|
return Objects.hash(
|
||||||
|
super.hashCode(),
|
||||||
|
websiteurl,
|
||||||
|
code,
|
||||||
|
acronym,
|
||||||
|
title,
|
||||||
|
startdate,
|
||||||
|
enddate,
|
||||||
|
callidentifier,
|
||||||
|
keywords,
|
||||||
|
duration,
|
||||||
|
ecsc39,
|
||||||
|
oamandatepublications,
|
||||||
|
ecarticle29_3,
|
||||||
|
subjects,
|
||||||
|
fundingtree,
|
||||||
|
contracttype,
|
||||||
|
optional1,
|
||||||
|
optional2,
|
||||||
|
jsonextrainfo,
|
||||||
|
contactfullname,
|
||||||
|
contactfax,
|
||||||
|
contactphone,
|
||||||
|
contactemail,
|
||||||
|
summary,
|
||||||
|
currency,
|
||||||
|
totalcost,
|
||||||
|
fundedamount);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -26,8 +26,7 @@ public class Publication extends Result implements Serializable {
|
||||||
|
|
||||||
Publication p = (Publication) e;
|
Publication p = (Publication) e;
|
||||||
|
|
||||||
if (p.getJournal() != null && compareTrust(this, e)<0)
|
if (p.getJournal() != null && compareTrust(this, e) < 0) journal = p.getJournal();
|
||||||
journal = p.getJournal();
|
|
||||||
mergeOAFDataInfo(e);
|
mergeOAFDataInfo(e);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1,9 +1,8 @@
|
||||||
package eu.dnetlib.dhp.schema.oaf;
|
package eu.dnetlib.dhp.schema.oaf;
|
||||||
|
|
||||||
import com.fasterxml.jackson.annotation.JsonIgnore;
|
import com.fasterxml.jackson.annotation.JsonIgnore;
|
||||||
import org.apache.commons.lang3.StringUtils;
|
|
||||||
|
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
|
import org.apache.commons.lang3.StringUtils;
|
||||||
|
|
||||||
public class Qualifier implements Serializable {
|
public class Qualifier implements Serializable {
|
||||||
|
|
||||||
|
@ -45,7 +44,10 @@ public class Qualifier implements Serializable {
|
||||||
}
|
}
|
||||||
|
|
||||||
public String toComparableString() {
|
public String toComparableString() {
|
||||||
return isBlank()?"": String.format("%s::%s::%s::%s",
|
return isBlank()
|
||||||
|
? ""
|
||||||
|
: String.format(
|
||||||
|
"%s::%s::%s::%s",
|
||||||
classid != null ? classid : "",
|
classid != null ? classid : "",
|
||||||
classname != null ? classname : "",
|
classname != null ? classname : "",
|
||||||
schemeid != null ? schemeid : "",
|
schemeid != null ? schemeid : "",
|
||||||
|
@ -54,11 +56,12 @@ public class Qualifier implements Serializable {
|
||||||
|
|
||||||
@JsonIgnore
|
@JsonIgnore
|
||||||
public boolean isBlank() {
|
public boolean isBlank() {
|
||||||
return StringUtils.isBlank(classid) &&
|
return StringUtils.isBlank(classid)
|
||||||
StringUtils.isBlank(classname) &&
|
&& StringUtils.isBlank(classname)
|
||||||
StringUtils.isBlank(schemeid) &&
|
&& StringUtils.isBlank(schemeid)
|
||||||
StringUtils.isBlank(schemename);
|
&& StringUtils.isBlank(schemename);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int hashCode() {
|
public int hashCode() {
|
||||||
return toComparableString().hashCode();
|
return toComparableString().hashCode();
|
||||||
|
@ -66,16 +69,12 @@ public class Qualifier implements Serializable {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean equals(Object obj) {
|
public boolean equals(Object obj) {
|
||||||
if (this == obj)
|
if (this == obj) return true;
|
||||||
return true;
|
if (obj == null) return false;
|
||||||
if (obj == null)
|
if (getClass() != obj.getClass()) return false;
|
||||||
return false;
|
|
||||||
if (getClass() != obj.getClass())
|
|
||||||
return false;
|
|
||||||
|
|
||||||
Qualifier other = (Qualifier) obj;
|
Qualifier other = (Qualifier) obj;
|
||||||
|
|
||||||
return toComparableString()
|
return toComparableString().equals(other.toComparableString());
|
||||||
.equals(other.toComparableString());
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,11 +1,11 @@
|
||||||
package eu.dnetlib.dhp.schema.oaf;
|
package eu.dnetlib.dhp.schema.oaf;
|
||||||
|
|
||||||
|
import static com.google.common.base.Preconditions.checkArgument;
|
||||||
|
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
import java.util.stream.Stream;
|
import java.util.stream.Stream;
|
||||||
|
|
||||||
import static com.google.common.base.Preconditions.checkArgument;
|
|
||||||
|
|
||||||
public class Relation extends Oaf {
|
public class Relation extends Oaf {
|
||||||
|
|
||||||
private String relType;
|
private String relType;
|
||||||
|
@ -73,13 +73,18 @@ public class Relation extends Oaf {
|
||||||
checkArgument(Objects.equals(getSource(), r.getSource()), "source ids must be equal");
|
checkArgument(Objects.equals(getSource(), r.getSource()), "source ids must be equal");
|
||||||
checkArgument(Objects.equals(getTarget(), r.getTarget()), "target ids must be equal");
|
checkArgument(Objects.equals(getTarget(), r.getTarget()), "target ids must be equal");
|
||||||
checkArgument(Objects.equals(getRelType(), r.getRelType()), "relType(s) must be equal");
|
checkArgument(Objects.equals(getRelType(), r.getRelType()), "relType(s) must be equal");
|
||||||
checkArgument(Objects.equals(getSubRelType(), r.getSubRelType()),"subRelType(s) must be equal");
|
checkArgument(
|
||||||
|
Objects.equals(getSubRelType(), r.getSubRelType()), "subRelType(s) must be equal");
|
||||||
checkArgument(Objects.equals(getRelClass(), r.getRelClass()), "relClass(es) must be equal");
|
checkArgument(Objects.equals(getRelClass(), r.getRelClass()), "relClass(es) must be equal");
|
||||||
|
|
||||||
setCollectedFrom(
|
setCollectedFrom(
|
||||||
Stream
|
Stream.concat(
|
||||||
.concat(Optional.ofNullable(getCollectedFrom()).map(Collection::stream).orElse(Stream.empty()),
|
Optional.ofNullable(getCollectedFrom())
|
||||||
Optional.ofNullable(r.getCollectedFrom()).map(Collection::stream).orElse(Stream.empty()))
|
.map(Collection::stream)
|
||||||
|
.orElse(Stream.empty()),
|
||||||
|
Optional.ofNullable(r.getCollectedFrom())
|
||||||
|
.map(Collection::stream)
|
||||||
|
.orElse(Stream.empty()))
|
||||||
.distinct() // relies on KeyValue.equals
|
.distinct() // relies on KeyValue.equals
|
||||||
.collect(Collectors.toList()));
|
.collect(Collectors.toList()));
|
||||||
}
|
}
|
||||||
|
@ -89,16 +94,15 @@ public class Relation extends Oaf {
|
||||||
if (this == o) return true;
|
if (this == o) return true;
|
||||||
if (o == null || getClass() != o.getClass()) return false;
|
if (o == null || getClass() != o.getClass()) return false;
|
||||||
Relation relation = (Relation) o;
|
Relation relation = (Relation) o;
|
||||||
return relType.equals(relation.relType) &&
|
return relType.equals(relation.relType)
|
||||||
subRelType.equals(relation.subRelType) &&
|
&& subRelType.equals(relation.subRelType)
|
||||||
relClass.equals(relation.relClass) &&
|
&& relClass.equals(relation.relClass)
|
||||||
source.equals(relation.source) &&
|
&& source.equals(relation.source)
|
||||||
target.equals(relation.target);
|
&& target.equals(relation.target);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int hashCode() {
|
public int hashCode() {
|
||||||
return Objects.hash(relType, subRelType, relClass, source, target, collectedFrom);
|
return Objects.hash(relType, subRelType, relClass, source, target, collectedFrom);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -231,11 +231,9 @@ public class Result extends OafEntity implements Serializable {
|
||||||
|
|
||||||
instance = mergeLists(instance, r.getInstance());
|
instance = mergeLists(instance, r.getInstance());
|
||||||
|
|
||||||
if (r.getResulttype() != null && compareTrust(this, r) < 0)
|
if (r.getResulttype() != null && compareTrust(this, r) < 0) resulttype = r.getResulttype();
|
||||||
resulttype = r.getResulttype();
|
|
||||||
|
|
||||||
if (r.getLanguage() != null && compareTrust(this, r) < 0)
|
if (r.getLanguage() != null && compareTrust(this, r) < 0) language = r.getLanguage();
|
||||||
language = r.getLanguage();
|
|
||||||
|
|
||||||
country = mergeLists(country, r.getCountry());
|
country = mergeLists(country, r.getCountry());
|
||||||
|
|
||||||
|
@ -247,8 +245,7 @@ public class Result extends OafEntity implements Serializable {
|
||||||
|
|
||||||
description = longestLists(description, r.getDescription());
|
description = longestLists(description, r.getDescription());
|
||||||
|
|
||||||
if (r.getPublisher() != null && compareTrust(this, r) < 0)
|
if (r.getPublisher() != null && compareTrust(this, r) < 0) publisher = r.getPublisher();
|
||||||
publisher = r.getPublisher();
|
|
||||||
|
|
||||||
if (r.getEmbargoenddate() != null && compareTrust(this, r) < 0)
|
if (r.getEmbargoenddate() != null && compareTrust(this, r) < 0)
|
||||||
embargoenddate = r.getEmbargoenddate();
|
embargoenddate = r.getEmbargoenddate();
|
||||||
|
@ -261,8 +258,7 @@ public class Result extends OafEntity implements Serializable {
|
||||||
|
|
||||||
contributor = mergeLists(contributor, r.getContributor());
|
contributor = mergeLists(contributor, r.getContributor());
|
||||||
|
|
||||||
if (r.getResourcetype() != null)
|
if (r.getResourcetype() != null) resourcetype = r.getResourcetype();
|
||||||
resourcetype = r.getResourcetype();
|
|
||||||
|
|
||||||
coverage = mergeLists(coverage, r.getCoverage());
|
coverage = mergeLists(coverage, r.getCoverage());
|
||||||
|
|
||||||
|
@ -271,13 +267,21 @@ public class Result extends OafEntity implements Serializable {
|
||||||
externalReference = mergeLists(externalReference, r.getExternalReference());
|
externalReference = mergeLists(externalReference, r.getExternalReference());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
private List<Field<String>> longestLists(List<Field<String>> a, List<Field<String>> b) {
|
private List<Field<String>> longestLists(List<Field<String>> a, List<Field<String>> b) {
|
||||||
if (a == null || b == null)
|
if (a == null || b == null) return a == null ? b : a;
|
||||||
return a == null ? b : a;
|
|
||||||
if (a.size() == b.size()) {
|
if (a.size() == b.size()) {
|
||||||
int msa = a.stream().filter(i -> i.getValue() != null).map(i -> i.getValue().length()).max(Comparator.naturalOrder()).orElse(0);
|
int msa =
|
||||||
int msb = b.stream().filter(i -> i.getValue() != null).map(i -> i.getValue().length()).max(Comparator.naturalOrder()).orElse(0);
|
a.stream()
|
||||||
|
.filter(i -> i.getValue() != null)
|
||||||
|
.map(i -> i.getValue().length())
|
||||||
|
.max(Comparator.naturalOrder())
|
||||||
|
.orElse(0);
|
||||||
|
int msb =
|
||||||
|
b.stream()
|
||||||
|
.filter(i -> i.getValue() != null)
|
||||||
|
.map(i -> i.getValue().length())
|
||||||
|
.max(Comparator.naturalOrder())
|
||||||
|
.orElse(0);
|
||||||
return msa > msb ? a : b;
|
return msa > msb ? a : b;
|
||||||
}
|
}
|
||||||
return a.size() > b.size() ? a : b;
|
return a.size() > b.size() ? a : b;
|
||||||
|
@ -289,31 +293,53 @@ public class Result extends OafEntity implements Serializable {
|
||||||
if (o == null || getClass() != o.getClass()) return false;
|
if (o == null || getClass() != o.getClass()) return false;
|
||||||
if (!super.equals(o)) return false;
|
if (!super.equals(o)) return false;
|
||||||
Result result = (Result) o;
|
Result result = (Result) o;
|
||||||
return Objects.equals(author, result.author) &&
|
return Objects.equals(author, result.author)
|
||||||
Objects.equals(resulttype, result.resulttype) &&
|
&& Objects.equals(resulttype, result.resulttype)
|
||||||
Objects.equals(language, result.language) &&
|
&& Objects.equals(language, result.language)
|
||||||
Objects.equals(country, result.country) &&
|
&& Objects.equals(country, result.country)
|
||||||
Objects.equals(subject, result.subject) &&
|
&& Objects.equals(subject, result.subject)
|
||||||
Objects.equals(title, result.title) &&
|
&& Objects.equals(title, result.title)
|
||||||
Objects.equals(relevantdate, result.relevantdate) &&
|
&& Objects.equals(relevantdate, result.relevantdate)
|
||||||
Objects.equals(description, result.description) &&
|
&& Objects.equals(description, result.description)
|
||||||
Objects.equals(dateofacceptance, result.dateofacceptance) &&
|
&& Objects.equals(dateofacceptance, result.dateofacceptance)
|
||||||
Objects.equals(publisher, result.publisher) &&
|
&& Objects.equals(publisher, result.publisher)
|
||||||
Objects.equals(embargoenddate, result.embargoenddate) &&
|
&& Objects.equals(embargoenddate, result.embargoenddate)
|
||||||
Objects.equals(source, result.source) &&
|
&& Objects.equals(source, result.source)
|
||||||
Objects.equals(fulltext, result.fulltext) &&
|
&& Objects.equals(fulltext, result.fulltext)
|
||||||
Objects.equals(format, result.format) &&
|
&& Objects.equals(format, result.format)
|
||||||
Objects.equals(contributor, result.contributor) &&
|
&& Objects.equals(contributor, result.contributor)
|
||||||
Objects.equals(resourcetype, result.resourcetype) &&
|
&& Objects.equals(resourcetype, result.resourcetype)
|
||||||
Objects.equals(coverage, result.coverage) &&
|
&& Objects.equals(coverage, result.coverage)
|
||||||
Objects.equals(bestaccessright, result.bestaccessright) &&
|
&& Objects.equals(bestaccessright, result.bestaccessright)
|
||||||
Objects.equals(context, result.context) &&
|
&& Objects.equals(context, result.context)
|
||||||
Objects.equals(externalReference, result.externalReference) &&
|
&& Objects.equals(externalReference, result.externalReference)
|
||||||
Objects.equals(instance, result.instance);
|
&& Objects.equals(instance, result.instance);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int hashCode() {
|
public int hashCode() {
|
||||||
return Objects.hash(super.hashCode(), author, resulttype, language, country, subject, title, relevantdate, description, dateofacceptance, publisher, embargoenddate, source, fulltext, format, contributor, resourcetype, coverage, bestaccessright, context, externalReference, instance);
|
return Objects.hash(
|
||||||
|
super.hashCode(),
|
||||||
|
author,
|
||||||
|
resulttype,
|
||||||
|
language,
|
||||||
|
country,
|
||||||
|
subject,
|
||||||
|
title,
|
||||||
|
relevantdate,
|
||||||
|
description,
|
||||||
|
dateofacceptance,
|
||||||
|
publisher,
|
||||||
|
embargoenddate,
|
||||||
|
source,
|
||||||
|
fulltext,
|
||||||
|
format,
|
||||||
|
contributor,
|
||||||
|
resourcetype,
|
||||||
|
coverage,
|
||||||
|
bestaccessright,
|
||||||
|
context,
|
||||||
|
externalReference,
|
||||||
|
instance);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -59,9 +59,15 @@ public class Software extends Result implements Serializable {
|
||||||
|
|
||||||
license = mergeLists(license, s.getLicense());
|
license = mergeLists(license, s.getLicense());
|
||||||
|
|
||||||
codeRepositoryUrl = s.getCodeRepositoryUrl()!= null && compareTrust(this, s)<0?s.getCodeRepositoryUrl():codeRepositoryUrl;
|
codeRepositoryUrl =
|
||||||
|
s.getCodeRepositoryUrl() != null && compareTrust(this, s) < 0
|
||||||
|
? s.getCodeRepositoryUrl()
|
||||||
|
: codeRepositoryUrl;
|
||||||
|
|
||||||
programmingLanguage= s.getProgrammingLanguage()!= null && compareTrust(this, s)<0?s.getProgrammingLanguage():programmingLanguage;
|
programmingLanguage =
|
||||||
|
s.getProgrammingLanguage() != null && compareTrust(this, s) < 0
|
||||||
|
? s.getProgrammingLanguage()
|
||||||
|
: programmingLanguage;
|
||||||
|
|
||||||
mergeOAFDataInfo(e);
|
mergeOAFDataInfo(e);
|
||||||
}
|
}
|
||||||
|
@ -72,14 +78,19 @@ public class Software extends Result implements Serializable {
|
||||||
if (o == null || getClass() != o.getClass()) return false;
|
if (o == null || getClass() != o.getClass()) return false;
|
||||||
if (!super.equals(o)) return false;
|
if (!super.equals(o)) return false;
|
||||||
Software software = (Software) o;
|
Software software = (Software) o;
|
||||||
return Objects.equals(documentationUrl, software.documentationUrl) &&
|
return Objects.equals(documentationUrl, software.documentationUrl)
|
||||||
Objects.equals(license, software.license) &&
|
&& Objects.equals(license, software.license)
|
||||||
Objects.equals(codeRepositoryUrl, software.codeRepositoryUrl) &&
|
&& Objects.equals(codeRepositoryUrl, software.codeRepositoryUrl)
|
||||||
Objects.equals(programmingLanguage, software.programmingLanguage);
|
&& Objects.equals(programmingLanguage, software.programmingLanguage);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int hashCode() {
|
public int hashCode() {
|
||||||
return Objects.hash(super.hashCode(), documentationUrl, license, codeRepositoryUrl, programmingLanguage);
|
return Objects.hash(
|
||||||
|
super.hashCode(),
|
||||||
|
documentationUrl,
|
||||||
|
license,
|
||||||
|
codeRepositoryUrl,
|
||||||
|
programmingLanguage);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -45,16 +45,12 @@ public class StructuredProperty implements Serializable {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean equals(Object obj) {
|
public boolean equals(Object obj) {
|
||||||
if (this == obj)
|
if (this == obj) return true;
|
||||||
return true;
|
if (obj == null) return false;
|
||||||
if (obj == null)
|
if (getClass() != obj.getClass()) return false;
|
||||||
return false;
|
|
||||||
if (getClass() != obj.getClass())
|
|
||||||
return false;
|
|
||||||
|
|
||||||
StructuredProperty other = (StructuredProperty) obj;
|
StructuredProperty other = (StructuredProperty) obj;
|
||||||
|
|
||||||
return toComparableString()
|
return toComparableString().equals(other.toComparableString());
|
||||||
.equals(other.toComparableString());
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -2,12 +2,11 @@ package eu.dnetlib.dhp.schema.scholexplorer;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.Dataset;
|
import eu.dnetlib.dhp.schema.oaf.Dataset;
|
||||||
import eu.dnetlib.dhp.schema.oaf.OafEntity;
|
import eu.dnetlib.dhp.schema.oaf.OafEntity;
|
||||||
import org.apache.commons.lang3.StringUtils;
|
|
||||||
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
import org.apache.commons.lang3.StringUtils;
|
||||||
|
|
||||||
public class DLIDataset extends Dataset {
|
public class DLIDataset extends Dataset {
|
||||||
|
|
||||||
|
@ -47,17 +46,23 @@ public class DLIDataset extends Dataset {
|
||||||
DLIDataset p = (DLIDataset) e;
|
DLIDataset p = (DLIDataset) e;
|
||||||
if (StringUtils.isBlank(completionStatus) && StringUtils.isNotBlank(p.completionStatus))
|
if (StringUtils.isBlank(completionStatus) && StringUtils.isNotBlank(p.completionStatus))
|
||||||
completionStatus = p.completionStatus;
|
completionStatus = p.completionStatus;
|
||||||
if ("complete".equalsIgnoreCase(p.completionStatus))
|
if ("complete".equalsIgnoreCase(p.completionStatus)) completionStatus = "complete";
|
||||||
completionStatus = "complete";
|
|
||||||
dlicollectedfrom = mergeProvenance(dlicollectedfrom, p.getDlicollectedfrom());
|
dlicollectedfrom = mergeProvenance(dlicollectedfrom, p.getDlicollectedfrom());
|
||||||
}
|
}
|
||||||
|
|
||||||
private List<ProvenaceInfo> mergeProvenance(final List<ProvenaceInfo> a, final List<ProvenaceInfo> b) {
|
private List<ProvenaceInfo> mergeProvenance(
|
||||||
|
final List<ProvenaceInfo> a, final List<ProvenaceInfo> b) {
|
||||||
Map<String, ProvenaceInfo> result = new HashMap<>();
|
Map<String, ProvenaceInfo> result = new HashMap<>();
|
||||||
if (a != null)
|
if (a != null)
|
||||||
a.forEach(p -> {
|
a.forEach(
|
||||||
if (p != null && StringUtils.isNotBlank(p.getId()) && result.containsKey(p.getId())) {
|
p -> {
|
||||||
if ("incomplete".equalsIgnoreCase(result.get(p.getId()).getCompletionStatus()) && StringUtils.isNotBlank(p.getCompletionStatus())) {
|
if (p != null
|
||||||
|
&& StringUtils.isNotBlank(p.getId())
|
||||||
|
&& result.containsKey(p.getId())) {
|
||||||
|
if ("incomplete"
|
||||||
|
.equalsIgnoreCase(
|
||||||
|
result.get(p.getId()).getCompletionStatus())
|
||||||
|
&& StringUtils.isNotBlank(p.getCompletionStatus())) {
|
||||||
result.put(p.getId(), p);
|
result.put(p.getId(), p);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -65,9 +70,15 @@ public class DLIDataset extends Dataset {
|
||||||
result.put(p.getId(), p);
|
result.put(p.getId(), p);
|
||||||
});
|
});
|
||||||
if (b != null)
|
if (b != null)
|
||||||
b.forEach(p -> {
|
b.forEach(
|
||||||
if (p != null && StringUtils.isNotBlank(p.getId()) && result.containsKey(p.getId())) {
|
p -> {
|
||||||
if ("incomplete".equalsIgnoreCase(result.get(p.getId()).getCompletionStatus()) && StringUtils.isNotBlank(p.getCompletionStatus())) {
|
if (p != null
|
||||||
|
&& StringUtils.isNotBlank(p.getId())
|
||||||
|
&& result.containsKey(p.getId())) {
|
||||||
|
if ("incomplete"
|
||||||
|
.equalsIgnoreCase(
|
||||||
|
result.get(p.getId()).getCompletionStatus())
|
||||||
|
&& StringUtils.isNotBlank(p.getCompletionStatus())) {
|
||||||
result.put(p.getId(), p);
|
result.put(p.getId(), p);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -2,9 +2,9 @@ package eu.dnetlib.dhp.schema.scholexplorer;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.OafEntity;
|
import eu.dnetlib.dhp.schema.oaf.OafEntity;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Publication;
|
import eu.dnetlib.dhp.schema.oaf.Publication;
|
||||||
import org.apache.commons.lang3.StringUtils;
|
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
import org.apache.commons.lang3.StringUtils;
|
||||||
|
|
||||||
public class DLIPublication extends Publication implements Serializable {
|
public class DLIPublication extends Publication implements Serializable {
|
||||||
|
|
||||||
|
@ -44,17 +44,23 @@ public class DLIPublication extends Publication implements Serializable {
|
||||||
DLIPublication p = (DLIPublication) e;
|
DLIPublication p = (DLIPublication) e;
|
||||||
if (StringUtils.isBlank(completionStatus) && StringUtils.isNotBlank(p.completionStatus))
|
if (StringUtils.isBlank(completionStatus) && StringUtils.isNotBlank(p.completionStatus))
|
||||||
completionStatus = p.completionStatus;
|
completionStatus = p.completionStatus;
|
||||||
if ("complete".equalsIgnoreCase(p.completionStatus))
|
if ("complete".equalsIgnoreCase(p.completionStatus)) completionStatus = "complete";
|
||||||
completionStatus = "complete";
|
|
||||||
dlicollectedfrom = mergeProvenance(dlicollectedfrom, p.getDlicollectedfrom());
|
dlicollectedfrom = mergeProvenance(dlicollectedfrom, p.getDlicollectedfrom());
|
||||||
}
|
}
|
||||||
|
|
||||||
private List<ProvenaceInfo> mergeProvenance(final List<ProvenaceInfo> a, final List<ProvenaceInfo> b) {
|
private List<ProvenaceInfo> mergeProvenance(
|
||||||
|
final List<ProvenaceInfo> a, final List<ProvenaceInfo> b) {
|
||||||
Map<String, ProvenaceInfo> result = new HashMap<>();
|
Map<String, ProvenaceInfo> result = new HashMap<>();
|
||||||
if (a != null)
|
if (a != null)
|
||||||
a.forEach(p -> {
|
a.forEach(
|
||||||
if (p != null && StringUtils.isNotBlank(p.getId()) && result.containsKey(p.getId())) {
|
p -> {
|
||||||
if ("incomplete".equalsIgnoreCase(result.get(p.getId()).getCompletionStatus()) && StringUtils.isNotBlank(p.getCompletionStatus())) {
|
if (p != null
|
||||||
|
&& StringUtils.isNotBlank(p.getId())
|
||||||
|
&& result.containsKey(p.getId())) {
|
||||||
|
if ("incomplete"
|
||||||
|
.equalsIgnoreCase(
|
||||||
|
result.get(p.getId()).getCompletionStatus())
|
||||||
|
&& StringUtils.isNotBlank(p.getCompletionStatus())) {
|
||||||
result.put(p.getId(), p);
|
result.put(p.getId(), p);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -62,9 +68,15 @@ public class DLIPublication extends Publication implements Serializable {
|
||||||
result.put(p.getId(), p);
|
result.put(p.getId(), p);
|
||||||
});
|
});
|
||||||
if (b != null)
|
if (b != null)
|
||||||
b.forEach(p -> {
|
b.forEach(
|
||||||
if (p != null && StringUtils.isNotBlank(p.getId()) && result.containsKey(p.getId())) {
|
p -> {
|
||||||
if ("incomplete".equalsIgnoreCase(result.get(p.getId()).getCompletionStatus()) && StringUtils.isNotBlank(p.getCompletionStatus())) {
|
if (p != null
|
||||||
|
&& StringUtils.isNotBlank(p.getId())
|
||||||
|
&& result.containsKey(p.getId())) {
|
||||||
|
if ("incomplete"
|
||||||
|
.equalsIgnoreCase(
|
||||||
|
result.get(p.getId()).getCompletionStatus())
|
||||||
|
&& StringUtils.isNotBlank(p.getCompletionStatus())) {
|
||||||
result.put(p.getId(), p);
|
result.put(p.getId(), p);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1,15 +1,13 @@
|
||||||
package eu.dnetlib.dhp.schema.scholexplorer;
|
package eu.dnetlib.dhp.schema.scholexplorer;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.Oaf;
|
import eu.dnetlib.dhp.schema.oaf.Oaf;
|
||||||
import eu.dnetlib.dhp.schema.oaf.OafEntity;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
||||||
import org.apache.commons.lang3.StringUtils;
|
|
||||||
|
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
import org.apache.commons.lang3.StringUtils;
|
||||||
|
|
||||||
public class DLIUnknown extends Oaf implements Serializable {
|
public class DLIUnknown extends Oaf implements Serializable {
|
||||||
|
|
||||||
|
@ -49,7 +47,6 @@ public class DLIUnknown extends Oaf implements Serializable {
|
||||||
this.id = id;
|
this.id = id;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public List<StructuredProperty> getPid() {
|
public List<StructuredProperty> getPid() {
|
||||||
return pid;
|
return pid;
|
||||||
}
|
}
|
||||||
|
@ -75,17 +72,23 @@ public class DLIUnknown extends Oaf implements Serializable {
|
||||||
}
|
}
|
||||||
|
|
||||||
public void mergeFrom(DLIUnknown p) {
|
public void mergeFrom(DLIUnknown p) {
|
||||||
if ("complete".equalsIgnoreCase(p.completionStatus))
|
if ("complete".equalsIgnoreCase(p.completionStatus)) completionStatus = "complete";
|
||||||
completionStatus = "complete";
|
|
||||||
dlicollectedfrom = mergeProvenance(dlicollectedfrom, p.getDlicollectedfrom());
|
dlicollectedfrom = mergeProvenance(dlicollectedfrom, p.getDlicollectedfrom());
|
||||||
}
|
}
|
||||||
|
|
||||||
private List<ProvenaceInfo> mergeProvenance(final List<ProvenaceInfo> a, final List<ProvenaceInfo> b) {
|
private List<ProvenaceInfo> mergeProvenance(
|
||||||
|
final List<ProvenaceInfo> a, final List<ProvenaceInfo> b) {
|
||||||
Map<String, ProvenaceInfo> result = new HashMap<>();
|
Map<String, ProvenaceInfo> result = new HashMap<>();
|
||||||
if (a != null)
|
if (a != null)
|
||||||
a.forEach(p -> {
|
a.forEach(
|
||||||
if (p != null && StringUtils.isNotBlank(p.getId()) && result.containsKey(p.getId())) {
|
p -> {
|
||||||
if ("incomplete".equalsIgnoreCase(result.get(p.getId()).getCompletionStatus()) && StringUtils.isNotBlank(p.getCompletionStatus())) {
|
if (p != null
|
||||||
|
&& StringUtils.isNotBlank(p.getId())
|
||||||
|
&& result.containsKey(p.getId())) {
|
||||||
|
if ("incomplete"
|
||||||
|
.equalsIgnoreCase(
|
||||||
|
result.get(p.getId()).getCompletionStatus())
|
||||||
|
&& StringUtils.isNotBlank(p.getCompletionStatus())) {
|
||||||
result.put(p.getId(), p);
|
result.put(p.getId(), p);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -93,9 +96,15 @@ public class DLIUnknown extends Oaf implements Serializable {
|
||||||
result.put(p.getId(), p);
|
result.put(p.getId(), p);
|
||||||
});
|
});
|
||||||
if (b != null)
|
if (b != null)
|
||||||
b.forEach(p -> {
|
b.forEach(
|
||||||
if (p != null && StringUtils.isNotBlank(p.getId()) && result.containsKey(p.getId())) {
|
p -> {
|
||||||
if ("incomplete".equalsIgnoreCase(result.get(p.getId()).getCompletionStatus()) && StringUtils.isNotBlank(p.getCompletionStatus())) {
|
if (p != null
|
||||||
|
&& StringUtils.isNotBlank(p.getId())
|
||||||
|
&& result.containsKey(p.getId())) {
|
||||||
|
if ("incomplete"
|
||||||
|
.equalsIgnoreCase(
|
||||||
|
result.get(p.getId()).getCompletionStatus())
|
||||||
|
&& StringUtils.isNotBlank(p.getCompletionStatus())) {
|
||||||
result.put(p.getId(), p);
|
result.put(p.getId(), p);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1,18 +1,14 @@
|
||||||
package eu.dnetlib.dhp.schema.action;
|
package eu.dnetlib.dhp.schema.action;
|
||||||
|
|
||||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.Relation;
|
|
||||||
import org.apache.commons.lang3.StringUtils;
|
|
||||||
|
|
||||||
import org.junit.jupiter.api.Test;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
|
|
||||||
import static org.junit.jupiter.api.Assertions.*;
|
import static org.junit.jupiter.api.Assertions.*;
|
||||||
|
|
||||||
/**
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
* @author claudio.atzori
|
import eu.dnetlib.dhp.schema.oaf.Relation;
|
||||||
*/
|
import java.io.IOException;
|
||||||
|
import org.apache.commons.lang3.StringUtils;
|
||||||
|
import org.junit.jupiter.api.Test;
|
||||||
|
|
||||||
|
/** @author claudio.atzori */
|
||||||
public class AtomicActionTest {
|
public class AtomicActionTest {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
@ -36,7 +32,5 @@ public class AtomicActionTest {
|
||||||
|
|
||||||
assertEquals(aa1.getClazz(), aa2.getClazz());
|
assertEquals(aa1.getClazz(), aa2.getClazz());
|
||||||
assertEquals(aa1.getPayload(), aa2.getPayload());
|
assertEquals(aa1.getPayload(), aa2.getPayload());
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,14 +1,14 @@
|
||||||
package eu.dnetlib.dhp.schema.common;
|
package eu.dnetlib.dhp.schema.common;
|
||||||
|
|
||||||
|
import static org.junit.jupiter.api.Assertions.assertFalse;
|
||||||
|
import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.OafEntity;
|
import eu.dnetlib.dhp.schema.oaf.OafEntity;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Relation;
|
import eu.dnetlib.dhp.schema.oaf.Relation;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Result;
|
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||||
import org.junit.jupiter.api.Nested;
|
import org.junit.jupiter.api.Nested;
|
||||||
import org.junit.jupiter.api.Test;
|
import org.junit.jupiter.api.Test;
|
||||||
|
|
||||||
import static org.junit.jupiter.api.Assertions.assertFalse;
|
|
||||||
import static org.junit.jupiter.api.Assertions.assertTrue;
|
|
||||||
|
|
||||||
public class ModelSupportTest {
|
public class ModelSupportTest {
|
||||||
|
|
||||||
@Nested
|
@Nested
|
||||||
|
|
|
@ -1,11 +1,11 @@
|
||||||
package eu.dnetlib.dhp.schema.oaf;
|
package eu.dnetlib.dhp.schema.oaf;
|
||||||
|
|
||||||
import static org.junit.jupiter.api.Assertions.*;
|
import static org.junit.jupiter.api.Assertions.*;
|
||||||
import org.junit.jupiter.api.BeforeEach;
|
|
||||||
import org.junit.jupiter.api.Test;
|
|
||||||
|
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
import org.junit.jupiter.api.BeforeEach;
|
||||||
|
import org.junit.jupiter.api.Test;
|
||||||
|
|
||||||
public class MergeTest {
|
public class MergeTest {
|
||||||
|
|
||||||
|
@ -44,7 +44,6 @@ public class MergeTest {
|
||||||
|
|
||||||
assertNotNull(a.getCollectedfrom());
|
assertNotNull(a.getCollectedfrom());
|
||||||
assertEquals(3, a.getCollectedfrom().size());
|
assertEquals(3, a.getCollectedfrom().size());
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
@ -60,7 +59,6 @@ public class MergeTest {
|
||||||
|
|
||||||
assertNotNull(a.getSubject());
|
assertNotNull(a.getSubject());
|
||||||
assertEquals(3, a.getSubject().size());
|
assertEquals(3, a.getSubject().size());
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private KeyValue setKV(final String key, final String value) {
|
private KeyValue setKV(final String key, final String value) {
|
||||||
|
@ -73,7 +71,8 @@ public class MergeTest {
|
||||||
return k;
|
return k;
|
||||||
}
|
}
|
||||||
|
|
||||||
private StructuredProperty setSP(final String value, final String schema, final String classname) {
|
private StructuredProperty setSP(
|
||||||
|
final String value, final String schema, final String classname) {
|
||||||
StructuredProperty s = new StructuredProperty();
|
StructuredProperty s = new StructuredProperty();
|
||||||
s.setValue(value);
|
s.setValue(value);
|
||||||
Qualifier q = new Qualifier();
|
Qualifier q = new Qualifier();
|
||||||
|
|
|
@ -6,15 +6,13 @@ import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
import com.fasterxml.jackson.databind.SerializationFeature;
|
import com.fasterxml.jackson.databind.SerializationFeature;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Qualifier;
|
import eu.dnetlib.dhp.schema.oaf.Qualifier;
|
||||||
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
||||||
import org.junit.jupiter.api.Test;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
|
import org.junit.jupiter.api.Test;
|
||||||
|
|
||||||
public class DLItest {
|
public class DLItest {
|
||||||
|
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testMergePublication() throws JsonProcessingException {
|
public void testMergePublication() throws JsonProcessingException {
|
||||||
DLIPublication a1 = new DLIPublication();
|
DLIPublication a1 = new DLIPublication();
|
||||||
|
@ -24,30 +22,28 @@ public class DLItest {
|
||||||
a1.setCompletionStatus("complete");
|
a1.setCompletionStatus("complete");
|
||||||
|
|
||||||
DLIPublication a = new DLIPublication();
|
DLIPublication a = new DLIPublication();
|
||||||
a.setPid(Arrays.asList(createSP("10.11","doi","dnet:pid_types"), createSP("123456","pdb","dnet:pid_types")));
|
a.setPid(
|
||||||
|
Arrays.asList(
|
||||||
|
createSP("10.11", "doi", "dnet:pid_types"),
|
||||||
|
createSP("123456", "pdb", "dnet:pid_types")));
|
||||||
a.setTitle(Collections.singletonList(createSP("A Title", "title", "dnetTitle")));
|
a.setTitle(Collections.singletonList(createSP("A Title", "title", "dnetTitle")));
|
||||||
a.setDlicollectedfrom(Arrays.asList(createCollectedFrom("dct","datacite","complete"),createCollectedFrom("dct","datacite","incomplete")));
|
a.setDlicollectedfrom(
|
||||||
|
Arrays.asList(
|
||||||
|
createCollectedFrom("dct", "datacite", "complete"),
|
||||||
|
createCollectedFrom("dct", "datacite", "incomplete")));
|
||||||
a.setCompletionStatus("incomplete");
|
a.setCompletionStatus("incomplete");
|
||||||
|
|
||||||
a.mergeFrom(a1);
|
a.mergeFrom(a1);
|
||||||
|
|
||||||
ObjectMapper mapper = new ObjectMapper();
|
ObjectMapper mapper = new ObjectMapper();
|
||||||
System.out.println(mapper.writeValueAsString(a));
|
System.out.println(mapper.writeValueAsString(a));
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testDeserialization() throws IOException {
|
public void testDeserialization() throws IOException {
|
||||||
|
|
||||||
final String json ="{\"dataInfo\":{\"invisible\":false,\"inferred\":null,\"deletedbyinference\":false,\"trust\":\"0.9\",\"inferenceprovenance\":null,\"provenanceaction\":null},\"lastupdatetimestamp\":null,\"id\":\"60|bd9352547098929a394655ad1a44a479\",\"originalId\":[\"bd9352547098929a394655ad1a44a479\"],\"collectedfrom\":[{\"key\":\"dli_________::datacite\",\"value\":\"Datasets in Datacite\",\"dataInfo\":null,\"blank\":false}],\"pid\":[{\"value\":\"10.7925/DRS1.DUCHAS_5078760\",\"qualifier\":{\"classid\":\"doi\",\"classname\":\"doi\",\"schemeid\":\"dnet:pid_types\",\"schemename\":\"dnet:pid_types\",\"blank\":false},\"dataInfo\":null}],\"dateofcollection\":\"2020-01-09T08:29:31.885Z\",\"dateoftransformation\":null,\"extraInfo\":null,\"oaiprovenance\":null,\"author\":[{\"fullname\":\"Cathail, S. Ó\",\"name\":null,\"surname\":null,\"rank\":null,\"pid\":null,\"affiliation\":null},{\"fullname\":\"Donnell, Breda Mc\",\"name\":null,\"surname\":null,\"rank\":null,\"pid\":null,\"affiliation\":null},{\"fullname\":\"Ireland. Department of Arts, Culture, and the Gaeltacht\",\"name\":null,\"surname\":null,\"rank\":null,\"pid\":null,\"affiliation\":null},{\"fullname\":\"University College Dublin\",\"name\":null,\"surname\":null,\"rank\":null,\"pid\":null,\"affiliation\":null},{\"fullname\":\"National Folklore Foundation\",\"name\":null,\"surname\":null,\"rank\":null,\"pid\":null,\"affiliation\":null},{\"fullname\":\"Cathail, S. Ó\",\"name\":null,\"surname\":null,\"rank\":null,\"pid\":null,\"affiliation\":null},{\"fullname\":\"Donnell, Breda Mc\",\"name\":null,\"surname\":null,\"rank\":null,\"pid\":null,\"affiliation\":null}],\"resulttype\":null,\"language\":null,\"country\":null,\"subject\":[{\"value\":\"Recreation\",\"qualifier\":{\"classid\":\"dnet:subject\",\"classname\":\"dnet:subject\",\"schemeid\":\"unknown\",\"schemename\":\"unknown\",\"blank\":false},\"dataInfo\":null},{\"value\":\"Entertainments and recreational activities\",\"qualifier\":{\"classid\":\"dnet:subject\",\"classname\":\"dnet:subject\",\"schemeid\":\"unknown\",\"schemename\":\"unknown\",\"blank\":false},\"dataInfo\":null},{\"value\":\"Siamsaíocht agus caitheamh aimsire\",\"qualifier\":{\"classid\":\"dnet:subject\",\"classname\":\"dnet:subject\",\"schemeid\":\"unknown\",\"schemename\":\"unknown\",\"blank\":false},\"dataInfo\":null}],\"title\":[{\"value\":\"Games We Play\",\"qualifier\":null,\"dataInfo\":null}],\"relevantdate\":[{\"value\":\"1938-09-28\",\"qualifier\":{\"classid\":\"date\",\"classname\":\"date\",\"schemeid\":\"dnet::date\",\"schemename\":\"dnet::date\",\"blank\":false},\"dataInfo\":null}],\"description\":[{\"value\":\"Story collected by Breda Mc Donnell, a student at Tenure school (Tinure, Co. Louth) (no informant identified).\",\"dataInfo\":null}],\"dateofacceptance\":null,\"publisher\":{\"value\":\"University College Dublin\",\"dataInfo\":null},\"embargoenddate\":null,\"source\":null,\"fulltext\":null,\"format\":null,\"contributor\":null,\"resourcetype\":null,\"coverage\":null,\"refereed\":null,\"context\":null,\"processingchargeamount\":null,\"processingchargecurrency\":null,\"externalReference\":null,\"instance\":[],\"storagedate\":null,\"device\":null,\"size\":null,\"version\":null,\"lastmetadataupdate\":null,\"metadataversionnumber\":null,\"geolocation\":null,\"dlicollectedfrom\":[{\"id\":\"dli_________::datacite\",\"name\":\"Datasets in Datacite\",\"completionStatus\":\"complete\",\"collectionMode\":\"resolved\"}],\"completionStatus\":\"complete\"}";
|
final String json =
|
||||||
|
"{\"dataInfo\":{\"invisible\":false,\"inferred\":null,\"deletedbyinference\":false,\"trust\":\"0.9\",\"inferenceprovenance\":null,\"provenanceaction\":null},\"lastupdatetimestamp\":null,\"id\":\"60|bd9352547098929a394655ad1a44a479\",\"originalId\":[\"bd9352547098929a394655ad1a44a479\"],\"collectedfrom\":[{\"key\":\"dli_________::datacite\",\"value\":\"Datasets in Datacite\",\"dataInfo\":null,\"blank\":false}],\"pid\":[{\"value\":\"10.7925/DRS1.DUCHAS_5078760\",\"qualifier\":{\"classid\":\"doi\",\"classname\":\"doi\",\"schemeid\":\"dnet:pid_types\",\"schemename\":\"dnet:pid_types\",\"blank\":false},\"dataInfo\":null}],\"dateofcollection\":\"2020-01-09T08:29:31.885Z\",\"dateoftransformation\":null,\"extraInfo\":null,\"oaiprovenance\":null,\"author\":[{\"fullname\":\"Cathail, S. Ó\",\"name\":null,\"surname\":null,\"rank\":null,\"pid\":null,\"affiliation\":null},{\"fullname\":\"Donnell, Breda Mc\",\"name\":null,\"surname\":null,\"rank\":null,\"pid\":null,\"affiliation\":null},{\"fullname\":\"Ireland. Department of Arts, Culture, and the Gaeltacht\",\"name\":null,\"surname\":null,\"rank\":null,\"pid\":null,\"affiliation\":null},{\"fullname\":\"University College Dublin\",\"name\":null,\"surname\":null,\"rank\":null,\"pid\":null,\"affiliation\":null},{\"fullname\":\"National Folklore Foundation\",\"name\":null,\"surname\":null,\"rank\":null,\"pid\":null,\"affiliation\":null},{\"fullname\":\"Cathail, S. Ó\",\"name\":null,\"surname\":null,\"rank\":null,\"pid\":null,\"affiliation\":null},{\"fullname\":\"Donnell, Breda Mc\",\"name\":null,\"surname\":null,\"rank\":null,\"pid\":null,\"affiliation\":null}],\"resulttype\":null,\"language\":null,\"country\":null,\"subject\":[{\"value\":\"Recreation\",\"qualifier\":{\"classid\":\"dnet:subject\",\"classname\":\"dnet:subject\",\"schemeid\":\"unknown\",\"schemename\":\"unknown\",\"blank\":false},\"dataInfo\":null},{\"value\":\"Entertainments and recreational activities\",\"qualifier\":{\"classid\":\"dnet:subject\",\"classname\":\"dnet:subject\",\"schemeid\":\"unknown\",\"schemename\":\"unknown\",\"blank\":false},\"dataInfo\":null},{\"value\":\"Siamsaíocht agus caitheamh aimsire\",\"qualifier\":{\"classid\":\"dnet:subject\",\"classname\":\"dnet:subject\",\"schemeid\":\"unknown\",\"schemename\":\"unknown\",\"blank\":false},\"dataInfo\":null}],\"title\":[{\"value\":\"Games We Play\",\"qualifier\":null,\"dataInfo\":null}],\"relevantdate\":[{\"value\":\"1938-09-28\",\"qualifier\":{\"classid\":\"date\",\"classname\":\"date\",\"schemeid\":\"dnet::date\",\"schemename\":\"dnet::date\",\"blank\":false},\"dataInfo\":null}],\"description\":[{\"value\":\"Story collected by Breda Mc Donnell, a student at Tenure school (Tinure, Co. Louth) (no informant identified).\",\"dataInfo\":null}],\"dateofacceptance\":null,\"publisher\":{\"value\":\"University College Dublin\",\"dataInfo\":null},\"embargoenddate\":null,\"source\":null,\"fulltext\":null,\"format\":null,\"contributor\":null,\"resourcetype\":null,\"coverage\":null,\"refereed\":null,\"context\":null,\"processingchargeamount\":null,\"processingchargecurrency\":null,\"externalReference\":null,\"instance\":[],\"storagedate\":null,\"device\":null,\"size\":null,\"version\":null,\"lastmetadataupdate\":null,\"metadataversionnumber\":null,\"geolocation\":null,\"dlicollectedfrom\":[{\"id\":\"dli_________::datacite\",\"name\":\"Datasets in Datacite\",\"completionStatus\":\"complete\",\"collectionMode\":\"resolved\"}],\"completionStatus\":\"complete\"}";
|
||||||
|
|
||||||
ObjectMapper mapper = new ObjectMapper();
|
ObjectMapper mapper = new ObjectMapper();
|
||||||
mapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false);
|
mapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false);
|
||||||
|
@ -56,7 +52,8 @@ public class DLItest {
|
||||||
System.out.println(mapper.writeValueAsString(dliDataset));
|
System.out.println(mapper.writeValueAsString(dliDataset));
|
||||||
}
|
}
|
||||||
|
|
||||||
private ProvenaceInfo createCollectedFrom(final String id, final String name, final String completionStatus) {
|
private ProvenaceInfo createCollectedFrom(
|
||||||
|
final String id, final String name, final String completionStatus) {
|
||||||
ProvenaceInfo p = new ProvenaceInfo();
|
ProvenaceInfo p = new ProvenaceInfo();
|
||||||
p.setId(id);
|
p.setId(id);
|
||||||
p.setName(name);
|
p.setName(name);
|
||||||
|
@ -64,8 +61,8 @@ public class DLItest {
|
||||||
return p;
|
return p;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private StructuredProperty createSP(
|
||||||
private StructuredProperty createSP(final String value, final String className, final String schemeName) {
|
final String value, final String className, final String schemeName) {
|
||||||
StructuredProperty p = new StructuredProperty();
|
StructuredProperty p = new StructuredProperty();
|
||||||
p.setValue(value);
|
p.setValue(value);
|
||||||
Qualifier schema = new Qualifier();
|
Qualifier schema = new Qualifier();
|
||||||
|
@ -76,6 +73,4 @@ public class DLItest {
|
||||||
p.setQualifier(schema);
|
p.setQualifier(schema);
|
||||||
return p;
|
return p;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -10,22 +10,22 @@ import eu.dnetlib.dhp.actionmanager.partition.PartitionActionSetsByPayloadTypeJo
|
||||||
import eu.dnetlib.dhp.utils.ISLookupClientFactory;
|
import eu.dnetlib.dhp.utils.ISLookupClientFactory;
|
||||||
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
|
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
|
||||||
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
|
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
|
||||||
import org.dom4j.Document;
|
|
||||||
import org.dom4j.Element;
|
|
||||||
import org.dom4j.io.SAXReader;
|
|
||||||
import org.slf4j.Logger;
|
|
||||||
import org.slf4j.LoggerFactory;
|
|
||||||
|
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
import java.io.StringReader;
|
import java.io.StringReader;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.NoSuchElementException;
|
import java.util.NoSuchElementException;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
|
import org.dom4j.Document;
|
||||||
|
import org.dom4j.Element;
|
||||||
|
import org.dom4j.io.SAXReader;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
public class ISClient implements Serializable {
|
public class ISClient implements Serializable {
|
||||||
|
|
||||||
private static final Logger log = LoggerFactory.getLogger(PartitionActionSetsByPayloadTypeJob.class);
|
private static final Logger log =
|
||||||
|
LoggerFactory.getLogger(PartitionActionSetsByPayloadTypeJob.class);
|
||||||
|
|
||||||
private static final String INPUT_ACTION_SET_ID_SEPARATOR = ",";
|
private static final String INPUT_ACTION_SET_ID_SEPARATOR = ",";
|
||||||
|
|
||||||
|
@ -37,7 +37,9 @@ public class ISClient implements Serializable {
|
||||||
|
|
||||||
public List<String> getLatestRawsetPaths(String setIds) {
|
public List<String> getLatestRawsetPaths(String setIds) {
|
||||||
|
|
||||||
List<String> ids = Lists.newArrayList(Splitter.on(INPUT_ACTION_SET_ID_SEPARATOR)
|
List<String> ids =
|
||||||
|
Lists.newArrayList(
|
||||||
|
Splitter.on(INPUT_ACTION_SET_ID_SEPARATOR)
|
||||||
.omitEmptyStrings()
|
.omitEmptyStrings()
|
||||||
.trimResults()
|
.trimResults()
|
||||||
.split(setIds));
|
.split(setIds));
|
||||||
|
@ -50,8 +52,11 @@ public class ISClient implements Serializable {
|
||||||
|
|
||||||
private ActionManagerSet getSet(ISLookUpService isLookup, final String setId) {
|
private ActionManagerSet getSet(ISLookUpService isLookup, final String setId) {
|
||||||
|
|
||||||
final String q = "for $x in collection('/db/DRIVER/ActionManagerSetDSResources/ActionManagerSetDSResourceType') "
|
final String q =
|
||||||
+ "where $x//SET/@id = '" + setId + "' return $x";
|
"for $x in collection('/db/DRIVER/ActionManagerSetDSResources/ActionManagerSetDSResourceType') "
|
||||||
|
+ "where $x//SET/@id = '"
|
||||||
|
+ setId
|
||||||
|
+ "' return $x";
|
||||||
|
|
||||||
try {
|
try {
|
||||||
final String basePath = getBasePathHDFS(isLookup);
|
final String basePath = getBasePathHDFS(isLookup);
|
||||||
|
@ -62,7 +67,8 @@ public class ISClient implements Serializable {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private ActionManagerSet getActionManagerSet(final String basePath, final String profile) throws ActionManagerException {
|
private ActionManagerSet getActionManagerSet(final String basePath, final String profile)
|
||||||
|
throws ActionManagerException {
|
||||||
final SAXReader reader = new SAXReader();
|
final SAXReader reader = new SAXReader();
|
||||||
final ActionManagerSet set = new ActionManagerSet();
|
final ActionManagerSet set = new ActionManagerSet();
|
||||||
|
|
||||||
|
@ -72,13 +78,19 @@ public class ISClient implements Serializable {
|
||||||
set.setId(doc.valueOf("//SET/@id").trim());
|
set.setId(doc.valueOf("//SET/@id").trim());
|
||||||
set.setName(doc.valueOf("//SET").trim());
|
set.setName(doc.valueOf("//SET").trim());
|
||||||
set.setImpact(ImpactTypes.valueOf(doc.valueOf("//IMPACT").trim()));
|
set.setImpact(ImpactTypes.valueOf(doc.valueOf("//IMPACT").trim()));
|
||||||
set.setLatest(doc.valueOf("//RAW_SETS/LATEST/@id"), doc.valueOf("//RAW_SETS/LATEST/@creationDate"), doc.valueOf("//RAW_SETS/LATEST/@lastUpdate"));
|
set.setLatest(
|
||||||
|
doc.valueOf("//RAW_SETS/LATEST/@id"),
|
||||||
|
doc.valueOf("//RAW_SETS/LATEST/@creationDate"),
|
||||||
|
doc.valueOf("//RAW_SETS/LATEST/@lastUpdate"));
|
||||||
set.setDirectory(doc.valueOf("//SET/@directory"));
|
set.setDirectory(doc.valueOf("//SET/@directory"));
|
||||||
final List expiredNodes = doc.selectNodes("//RAW_SETS/EXPIRED");
|
final List expiredNodes = doc.selectNodes("//RAW_SETS/EXPIRED");
|
||||||
if (expiredNodes != null) {
|
if (expiredNodes != null) {
|
||||||
for (int i = 0; i < expiredNodes.size(); i++) {
|
for (int i = 0; i < expiredNodes.size(); i++) {
|
||||||
Element ex = (Element) expiredNodes.get(i);
|
Element ex = (Element) expiredNodes.get(i);
|
||||||
set.addExpired(ex.attributeValue("id"), ex.attributeValue("creationDate"), ex.attributeValue("lastUpdate"));
|
set.addExpired(
|
||||||
|
ex.attributeValue("id"),
|
||||||
|
ex.attributeValue("creationDate"),
|
||||||
|
ex.attributeValue("lastUpdate"));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -100,9 +112,12 @@ public class ISClient implements Serializable {
|
||||||
return queryServiceProperty(isLookup, "basePath");
|
return queryServiceProperty(isLookup, "basePath");
|
||||||
}
|
}
|
||||||
|
|
||||||
private String queryServiceProperty(ISLookUpService isLookup, final String propertyName) throws ActionManagerException {
|
private String queryServiceProperty(ISLookUpService isLookup, final String propertyName)
|
||||||
final String q = "for $x in /RESOURCE_PROFILE[.//RESOURCE_TYPE/@value='ActionManagerServiceResourceType'] return $x//SERVICE_PROPERTIES/PROPERTY[./@ key='"
|
throws ActionManagerException {
|
||||||
+ propertyName + "']/@value/string()";
|
final String q =
|
||||||
|
"for $x in /RESOURCE_PROFILE[.//RESOURCE_TYPE/@value='ActionManagerServiceResourceType'] return $x//SERVICE_PROPERTIES/PROPERTY[./@ key='"
|
||||||
|
+ propertyName
|
||||||
|
+ "']/@value/string()";
|
||||||
log.debug("quering for service property: " + q);
|
log.debug("quering for service property: " + q);
|
||||||
try {
|
try {
|
||||||
final List<String> value = isLookup.quickSearchProfile(q);
|
final List<String> value = isLookup.quickSearchProfile(q);
|
||||||
|
@ -121,6 +136,4 @@ public class ISClient implements Serializable {
|
||||||
throw new ActionManagerException(msg, e);
|
throw new ActionManagerException(msg, e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,9 +1,15 @@
|
||||||
package eu.dnetlib.dhp.actionmanager.partition;
|
package eu.dnetlib.dhp.actionmanager.partition;
|
||||||
|
|
||||||
|
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
||||||
|
import static org.apache.spark.sql.functions.*;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.actionmanager.ISClient;
|
import eu.dnetlib.dhp.actionmanager.ISClient;
|
||||||
import eu.dnetlib.dhp.common.HdfsSupport;
|
|
||||||
import eu.dnetlib.dhp.actionmanager.promote.PromoteActionPayloadForGraphTableJob;
|
import eu.dnetlib.dhp.actionmanager.promote.PromoteActionPayloadForGraphTableJob;
|
||||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
|
import eu.dnetlib.dhp.common.HdfsSupport;
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Optional;
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
import org.apache.hadoop.io.Text;
|
import org.apache.hadoop.io.Text;
|
||||||
import org.apache.spark.SparkConf;
|
import org.apache.spark.SparkConf;
|
||||||
|
@ -14,32 +20,27 @@ import org.apache.spark.sql.types.*;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
import java.util.Arrays;
|
/** Partitions given set of action sets by payload type. */
|
||||||
import java.util.Collections;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.Optional;
|
|
||||||
|
|
||||||
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
|
||||||
import static org.apache.spark.sql.functions.*;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Partitions given set of action sets by payload type.
|
|
||||||
*/
|
|
||||||
public class PartitionActionSetsByPayloadTypeJob {
|
public class PartitionActionSetsByPayloadTypeJob {
|
||||||
|
|
||||||
private static final Logger logger = LoggerFactory.getLogger(PartitionActionSetsByPayloadTypeJob.class);
|
private static final Logger logger =
|
||||||
|
LoggerFactory.getLogger(PartitionActionSetsByPayloadTypeJob.class);
|
||||||
|
|
||||||
private static final StructType KV_SCHEMA = StructType$.MODULE$.apply(
|
private static final StructType KV_SCHEMA =
|
||||||
|
StructType$.MODULE$.apply(
|
||||||
Arrays.asList(
|
Arrays.asList(
|
||||||
StructField$.MODULE$.apply("key", DataTypes.StringType, false, Metadata.empty()),
|
StructField$.MODULE$.apply(
|
||||||
StructField$.MODULE$.apply("value", DataTypes.StringType, false, Metadata.empty())
|
"key", DataTypes.StringType, false, Metadata.empty()),
|
||||||
));
|
StructField$.MODULE$.apply(
|
||||||
|
"value", DataTypes.StringType, false, Metadata.empty())));
|
||||||
|
|
||||||
private static final StructType ATOMIC_ACTION_SCHEMA = StructType$.MODULE$.apply(
|
private static final StructType ATOMIC_ACTION_SCHEMA =
|
||||||
|
StructType$.MODULE$.apply(
|
||||||
Arrays.asList(
|
Arrays.asList(
|
||||||
StructField$.MODULE$.apply("clazz", DataTypes.StringType, false, Metadata.empty()),
|
StructField$.MODULE$.apply(
|
||||||
StructField$.MODULE$.apply("payload", DataTypes.StringType, false, Metadata.empty())
|
"clazz", DataTypes.StringType, false, Metadata.empty()),
|
||||||
));
|
StructField$.MODULE$.apply(
|
||||||
|
"payload", DataTypes.StringType, false, Metadata.empty())));
|
||||||
|
|
||||||
private ISClient isClient;
|
private ISClient isClient;
|
||||||
|
|
||||||
|
@ -47,18 +48,18 @@ public class PartitionActionSetsByPayloadTypeJob {
|
||||||
this.isClient = new ISClient(isLookupUrl);
|
this.isClient = new ISClient(isLookupUrl);
|
||||||
}
|
}
|
||||||
|
|
||||||
public PartitionActionSetsByPayloadTypeJob() {
|
public PartitionActionSetsByPayloadTypeJob() {}
|
||||||
}
|
|
||||||
|
|
||||||
public static void main(String[] args) throws Exception {
|
public static void main(String[] args) throws Exception {
|
||||||
String jsonConfiguration = IOUtils.toString(
|
String jsonConfiguration =
|
||||||
PromoteActionPayloadForGraphTableJob.class
|
IOUtils.toString(
|
||||||
.getResourceAsStream("/eu/dnetlib/dhp/actionmanager/partition/partition_action_sets_by_payload_type_input_parameters.json"));
|
PromoteActionPayloadForGraphTableJob.class.getResourceAsStream(
|
||||||
|
"/eu/dnetlib/dhp/actionmanager/partition/partition_action_sets_by_payload_type_input_parameters.json"));
|
||||||
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
|
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
|
||||||
parser.parseArgument(args);
|
parser.parseArgument(args);
|
||||||
|
|
||||||
Boolean isSparkSessionManaged = Optional
|
Boolean isSparkSessionManaged =
|
||||||
.ofNullable(parser.get("isSparkSessionManaged"))
|
Optional.ofNullable(parser.get("isSparkSessionManaged"))
|
||||||
.map(Boolean::valueOf)
|
.map(Boolean::valueOf)
|
||||||
.orElse(Boolean.TRUE);
|
.orElse(Boolean.TRUE);
|
||||||
logger.info("isSparkSessionManaged: {}", isSparkSessionManaged);
|
logger.info("isSparkSessionManaged: {}", isSparkSessionManaged);
|
||||||
|
@ -72,7 +73,8 @@ public class PartitionActionSetsByPayloadTypeJob {
|
||||||
String isLookupUrl = parser.get("isLookupUrl");
|
String isLookupUrl = parser.get("isLookupUrl");
|
||||||
logger.info("isLookupUrl: {}", isLookupUrl);
|
logger.info("isLookupUrl: {}", isLookupUrl);
|
||||||
|
|
||||||
new PartitionActionSetsByPayloadTypeJob(isLookupUrl).run(isSparkSessionManaged, inputActionSetIds, outputPath);
|
new PartitionActionSetsByPayloadTypeJob(isLookupUrl)
|
||||||
|
.run(isSparkSessionManaged, inputActionSetIds, outputPath);
|
||||||
}
|
}
|
||||||
|
|
||||||
protected void run(Boolean isSparkSessionManaged, String inputActionSetIds, String outputPath) {
|
protected void run(Boolean isSparkSessionManaged, String inputActionSetIds, String outputPath) {
|
||||||
|
@ -83,36 +85,41 @@ public class PartitionActionSetsByPayloadTypeJob {
|
||||||
SparkConf conf = new SparkConf();
|
SparkConf conf = new SparkConf();
|
||||||
conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer");
|
conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer");
|
||||||
|
|
||||||
runWithSparkSession(conf, isSparkSessionManaged,
|
runWithSparkSession(
|
||||||
|
conf,
|
||||||
|
isSparkSessionManaged,
|
||||||
spark -> {
|
spark -> {
|
||||||
removeOutputDir(spark, outputPath);
|
removeOutputDir(spark, outputPath);
|
||||||
readAndWriteActionSetsFromPaths(spark, inputActionSetPaths, outputPath);
|
readAndWriteActionSetsFromPaths(spark, inputActionSetPaths, outputPath);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
private static void removeOutputDir(SparkSession spark,
|
private static void removeOutputDir(SparkSession spark, String path) {
|
||||||
String path) {
|
|
||||||
HdfsSupport.remove(path, spark.sparkContext().hadoopConfiguration());
|
HdfsSupport.remove(path, spark.sparkContext().hadoopConfiguration());
|
||||||
}
|
}
|
||||||
|
|
||||||
private static void readAndWriteActionSetsFromPaths(SparkSession spark,
|
private static void readAndWriteActionSetsFromPaths(
|
||||||
List<String> inputActionSetPaths,
|
SparkSession spark, List<String> inputActionSetPaths, String outputPath) {
|
||||||
String outputPath) {
|
inputActionSetPaths.stream()
|
||||||
inputActionSetPaths
|
.filter(
|
||||||
.forEach(inputActionSetPath -> {
|
path ->
|
||||||
Dataset<Row> actionDS = readActionSetFromPath(spark, inputActionSetPath);
|
HdfsSupport.exists(
|
||||||
|
path, spark.sparkContext().hadoopConfiguration()))
|
||||||
|
.forEach(
|
||||||
|
inputActionSetPath -> {
|
||||||
|
Dataset<Row> actionDS =
|
||||||
|
readActionSetFromPath(spark, inputActionSetPath);
|
||||||
saveActions(actionDS, outputPath);
|
saveActions(actionDS, outputPath);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
private static Dataset<Row> readActionSetFromPath(SparkSession spark,
|
private static Dataset<Row> readActionSetFromPath(SparkSession spark, String path) {
|
||||||
String path) {
|
|
||||||
logger.info("Reading actions from path: {}", path);
|
logger.info("Reading actions from path: {}", path);
|
||||||
|
|
||||||
JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||||
|
|
||||||
JavaRDD<Row> rdd = sc
|
JavaRDD<Row> rdd =
|
||||||
.sequenceFile(path, Text.class, Text.class)
|
sc.sequenceFile(path, Text.class, Text.class)
|
||||||
.map(x -> RowFactory.create(x._1().toString(), x._2().toString()));
|
.map(x -> RowFactory.create(x._1().toString(), x._2().toString()));
|
||||||
|
|
||||||
return spark.createDataFrame(rdd, KV_SCHEMA)
|
return spark.createDataFrame(rdd, KV_SCHEMA)
|
||||||
|
@ -120,14 +127,9 @@ public class PartitionActionSetsByPayloadTypeJob {
|
||||||
.select(expr("atomic_action.*"));
|
.select(expr("atomic_action.*"));
|
||||||
}
|
}
|
||||||
|
|
||||||
private static void saveActions(Dataset<Row> actionDS,
|
private static void saveActions(Dataset<Row> actionDS, String path) {
|
||||||
String path) {
|
|
||||||
logger.info("Saving actions to path: {}", path);
|
logger.info("Saving actions to path: {}", path);
|
||||||
actionDS
|
actionDS.write().partitionBy("clazz").mode(SaveMode.Append).parquet(path);
|
||||||
.write()
|
|
||||||
.partitionBy("clazz")
|
|
||||||
.mode(SaveMode.Append)
|
|
||||||
.parquet(path);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public ISClient getIsClient() {
|
public ISClient getIsClient() {
|
||||||
|
|
|
@ -1,30 +1,27 @@
|
||||||
package eu.dnetlib.dhp.actionmanager.promote;
|
package eu.dnetlib.dhp.actionmanager.promote;
|
||||||
|
|
||||||
|
import static eu.dnetlib.dhp.schema.common.ModelSupport.isSubClass;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.common.FunctionalInterfaceSupport.SerializableSupplier;
|
import eu.dnetlib.dhp.common.FunctionalInterfaceSupport.SerializableSupplier;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Oaf;
|
import eu.dnetlib.dhp.schema.oaf.Oaf;
|
||||||
import eu.dnetlib.dhp.schema.oaf.OafEntity;
|
import eu.dnetlib.dhp.schema.oaf.OafEntity;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Relation;
|
import eu.dnetlib.dhp.schema.oaf.Relation;
|
||||||
|
|
||||||
import java.util.function.BiFunction;
|
import java.util.function.BiFunction;
|
||||||
|
|
||||||
import static eu.dnetlib.dhp.schema.common.ModelSupport.isSubClass;
|
/** OAF model merging support. */
|
||||||
|
|
||||||
/**
|
|
||||||
* OAF model merging support.
|
|
||||||
*/
|
|
||||||
public class MergeAndGet {
|
public class MergeAndGet {
|
||||||
|
|
||||||
private MergeAndGet() {
|
private MergeAndGet() {}
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Strategy for merging OAF model objects.
|
* Strategy for merging OAF model objects.
|
||||||
* <p>
|
*
|
||||||
* MERGE_FROM_AND_GET: use OAF 'mergeFrom' method
|
* <p>MERGE_FROM_AND_GET: use OAF 'mergeFrom' method SELECT_NEWER_AND_GET: use last update
|
||||||
* SELECT_NEWER_AND_GET: use last update timestamp to return newer instance
|
* timestamp to return newer instance
|
||||||
*/
|
*/
|
||||||
public enum Strategy {
|
public enum Strategy {
|
||||||
MERGE_FROM_AND_GET, SELECT_NEWER_AND_GET
|
MERGE_FROM_AND_GET,
|
||||||
|
SELECT_NEWER_AND_GET
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -35,7 +32,8 @@ public class MergeAndGet {
|
||||||
* @param <A> Action payload type
|
* @param <A> Action payload type
|
||||||
* @return BiFunction to be used to merge OAF objects
|
* @return BiFunction to be used to merge OAF objects
|
||||||
*/
|
*/
|
||||||
public static <G extends Oaf, A extends Oaf> SerializableSupplier<BiFunction<G, A, G>> functionFor(Strategy strategy) {
|
public static <G extends Oaf, A extends Oaf>
|
||||||
|
SerializableSupplier<BiFunction<G, A, G>> functionFor(Strategy strategy) {
|
||||||
switch (strategy) {
|
switch (strategy) {
|
||||||
case MERGE_FROM_AND_GET:
|
case MERGE_FROM_AND_GET:
|
||||||
return () -> MergeAndGet::mergeFromAndGet;
|
return () -> MergeAndGet::mergeFromAndGet;
|
||||||
|
@ -49,26 +47,36 @@ public class MergeAndGet {
|
||||||
if (isSubClass(x, Relation.class) && isSubClass(y, Relation.class)) {
|
if (isSubClass(x, Relation.class) && isSubClass(y, Relation.class)) {
|
||||||
((Relation) x).mergeFrom((Relation) y);
|
((Relation) x).mergeFrom((Relation) y);
|
||||||
return x;
|
return x;
|
||||||
} else if (isSubClass(x, OafEntity.class) && isSubClass(y, OafEntity.class) && isSubClass(x, y)) {
|
} else if (isSubClass(x, OafEntity.class)
|
||||||
|
&& isSubClass(y, OafEntity.class)
|
||||||
|
&& isSubClass(x, y)) {
|
||||||
((OafEntity) x).mergeFrom((OafEntity) y);
|
((OafEntity) x).mergeFrom((OafEntity) y);
|
||||||
return x;
|
return x;
|
||||||
}
|
}
|
||||||
throw new RuntimeException(String.format("MERGE_FROM_AND_GET incompatible types: %s, %s",
|
throw new RuntimeException(
|
||||||
|
String.format(
|
||||||
|
"MERGE_FROM_AND_GET incompatible types: %s, %s",
|
||||||
x.getClass().getCanonicalName(), y.getClass().getCanonicalName()));
|
x.getClass().getCanonicalName(), y.getClass().getCanonicalName()));
|
||||||
}
|
}
|
||||||
|
|
||||||
private static <G extends Oaf, A extends Oaf> G selectNewerAndGet(G x, A y) {
|
private static <G extends Oaf, A extends Oaf> G selectNewerAndGet(G x, A y) {
|
||||||
if (x.getClass().equals(y.getClass()) && x.getLastupdatetimestamp() > y.getLastupdatetimestamp()) {
|
if (x.getClass().equals(y.getClass())
|
||||||
|
&& x.getLastupdatetimestamp() > y.getLastupdatetimestamp()) {
|
||||||
return x;
|
return x;
|
||||||
} else if (x.getClass().equals(y.getClass()) && x.getLastupdatetimestamp() < y.getLastupdatetimestamp()) {
|
} else if (x.getClass().equals(y.getClass())
|
||||||
|
&& x.getLastupdatetimestamp() < y.getLastupdatetimestamp()) {
|
||||||
return (G) y;
|
return (G) y;
|
||||||
} else if (isSubClass(x, y) && x.getLastupdatetimestamp() > y.getLastupdatetimestamp()) {
|
} else if (isSubClass(x, y) && x.getLastupdatetimestamp() > y.getLastupdatetimestamp()) {
|
||||||
return x;
|
return x;
|
||||||
} else if (isSubClass(x, y) && x.getLastupdatetimestamp() < y.getLastupdatetimestamp()) {
|
} else if (isSubClass(x, y) && x.getLastupdatetimestamp() < y.getLastupdatetimestamp()) {
|
||||||
throw new RuntimeException(String.format("SELECT_NEWER_AND_GET cannot return right type when it is not the same as left type: %s, %s",
|
throw new RuntimeException(
|
||||||
|
String.format(
|
||||||
|
"SELECT_NEWER_AND_GET cannot return right type when it is not the same as left type: %s, %s",
|
||||||
x.getClass().getCanonicalName(), y.getClass().getCanonicalName()));
|
x.getClass().getCanonicalName(), y.getClass().getCanonicalName()));
|
||||||
}
|
}
|
||||||
throw new RuntimeException(String.format("SELECT_NEWER_AND_GET cannot be used when left is not subtype of right: %s, %s",
|
throw new RuntimeException(
|
||||||
|
String.format(
|
||||||
|
"SELECT_NEWER_AND_GET cannot be used when left is not subtype of right: %s, %s",
|
||||||
x.getClass().getCanonicalName(), y.getClass().getCanonicalName()));
|
x.getClass().getCanonicalName(), y.getClass().getCanonicalName()));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,14 +1,20 @@
|
||||||
package eu.dnetlib.dhp.actionmanager.promote;
|
package eu.dnetlib.dhp.actionmanager.promote;
|
||||||
|
|
||||||
|
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
||||||
|
import static eu.dnetlib.dhp.schema.common.ModelSupport.isSubClass;
|
||||||
|
|
||||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
import eu.dnetlib.dhp.common.FunctionalInterfaceSupport.SerializableSupplier;
|
import eu.dnetlib.dhp.common.FunctionalInterfaceSupport.SerializableSupplier;
|
||||||
import eu.dnetlib.dhp.common.HdfsSupport;
|
import eu.dnetlib.dhp.common.HdfsSupport;
|
||||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
|
||||||
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
||||||
import eu.dnetlib.dhp.schema.oaf.*;
|
import eu.dnetlib.dhp.schema.oaf.*;
|
||||||
|
import java.util.Objects;
|
||||||
|
import java.util.Optional;
|
||||||
|
import java.util.function.BiFunction;
|
||||||
|
import java.util.function.Function;
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
import org.apache.spark.SparkConf;
|
import org.apache.spark.SparkConf;
|
||||||
import org.apache.spark.api.java.function.FilterFunction;
|
|
||||||
import org.apache.spark.api.java.function.MapFunction;
|
import org.apache.spark.api.java.function.MapFunction;
|
||||||
import org.apache.spark.sql.Dataset;
|
import org.apache.spark.sql.Dataset;
|
||||||
import org.apache.spark.sql.Encoders;
|
import org.apache.spark.sql.Encoders;
|
||||||
|
@ -17,31 +23,23 @@ import org.apache.spark.sql.SparkSession;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
import java.util.Objects;
|
/** Applies a given action payload file to graph table of compatible type. */
|
||||||
import java.util.Optional;
|
|
||||||
import java.util.function.BiFunction;
|
|
||||||
import java.util.function.Function;
|
|
||||||
|
|
||||||
import static eu.dnetlib.dhp.schema.common.ModelSupport.isSubClass;
|
|
||||||
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Applies a given action payload file to graph table of compatible type.
|
|
||||||
*/
|
|
||||||
public class PromoteActionPayloadForGraphTableJob {
|
public class PromoteActionPayloadForGraphTableJob {
|
||||||
private static final Logger logger = LoggerFactory.getLogger(PromoteActionPayloadForGraphTableJob.class);
|
private static final Logger logger =
|
||||||
|
LoggerFactory.getLogger(PromoteActionPayloadForGraphTableJob.class);
|
||||||
|
|
||||||
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
||||||
|
|
||||||
public static void main(String[] args) throws Exception {
|
public static void main(String[] args) throws Exception {
|
||||||
String jsonConfiguration = IOUtils.toString(
|
String jsonConfiguration =
|
||||||
PromoteActionPayloadForGraphTableJob.class
|
IOUtils.toString(
|
||||||
.getResourceAsStream("/eu/dnetlib/dhp/actionmanager/promote/promote_action_payload_for_graph_table_input_parameters.json"));
|
PromoteActionPayloadForGraphTableJob.class.getResourceAsStream(
|
||||||
|
"/eu/dnetlib/dhp/actionmanager/promote/promote_action_payload_for_graph_table_input_parameters.json"));
|
||||||
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
|
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
|
||||||
parser.parseArgument(args);
|
parser.parseArgument(args);
|
||||||
|
|
||||||
Boolean isSparkSessionManaged = Optional
|
Boolean isSparkSessionManaged =
|
||||||
.ofNullable(parser.get("isSparkSessionManaged"))
|
Optional.ofNullable(parser.get("isSparkSessionManaged"))
|
||||||
.map(Boolean::valueOf)
|
.map(Boolean::valueOf)
|
||||||
.orElse(Boolean.TRUE);
|
.orElse(Boolean.TRUE);
|
||||||
logger.info("isSparkSessionManaged: {}", isSparkSessionManaged);
|
logger.info("isSparkSessionManaged: {}", isSparkSessionManaged);
|
||||||
|
@ -61,11 +59,13 @@ public class PromoteActionPayloadForGraphTableJob {
|
||||||
String outputGraphTablePath = parser.get("outputGraphTablePath");
|
String outputGraphTablePath = parser.get("outputGraphTablePath");
|
||||||
logger.info("outputGraphTablePath: {}", outputGraphTablePath);
|
logger.info("outputGraphTablePath: {}", outputGraphTablePath);
|
||||||
|
|
||||||
MergeAndGet.Strategy strategy = MergeAndGet.Strategy.valueOf(parser.get("mergeAndGetStrategy").toUpperCase());
|
MergeAndGet.Strategy strategy =
|
||||||
|
MergeAndGet.Strategy.valueOf(parser.get("mergeAndGetStrategy").toUpperCase());
|
||||||
logger.info("strategy: {}", strategy);
|
logger.info("strategy: {}", strategy);
|
||||||
|
|
||||||
Class<? extends Oaf> rowClazz = (Class<? extends Oaf>) Class.forName(graphTableClassName);
|
Class<? extends Oaf> rowClazz = (Class<? extends Oaf>) Class.forName(graphTableClassName);
|
||||||
Class<? extends Oaf> actionPayloadClazz = (Class<? extends Oaf>) Class.forName(actionPayloadClassName);
|
Class<? extends Oaf> actionPayloadClazz =
|
||||||
|
(Class<? extends Oaf>) Class.forName(actionPayloadClassName);
|
||||||
|
|
||||||
throwIfGraphTableClassIsNotSubClassOfActionPayloadClass(rowClazz, actionPayloadClazz);
|
throwIfGraphTableClassIsNotSubClassOfActionPayloadClass(rowClazz, actionPayloadClazz);
|
||||||
|
|
||||||
|
@ -73,10 +73,13 @@ public class PromoteActionPayloadForGraphTableJob {
|
||||||
conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer");
|
conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer");
|
||||||
conf.registerKryoClasses(ModelSupport.getOafModelClasses());
|
conf.registerKryoClasses(ModelSupport.getOafModelClasses());
|
||||||
|
|
||||||
runWithSparkSession(conf, isSparkSessionManaged,
|
runWithSparkSession(
|
||||||
|
conf,
|
||||||
|
isSparkSessionManaged,
|
||||||
spark -> {
|
spark -> {
|
||||||
removeOutputDir(spark, outputGraphTablePath);
|
removeOutputDir(spark, outputGraphTablePath);
|
||||||
promoteActionPayloadForGraphTable(spark,
|
promoteActionPayloadForGraphTable(
|
||||||
|
spark,
|
||||||
inputGraphTablePath,
|
inputGraphTablePath,
|
||||||
inputActionPayloadPath,
|
inputActionPayloadPath,
|
||||||
outputGraphTablePath,
|
outputGraphTablePath,
|
||||||
|
@ -86,21 +89,23 @@ public class PromoteActionPayloadForGraphTableJob {
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
private static void throwIfGraphTableClassIsNotSubClassOfActionPayloadClass(Class<? extends Oaf> rowClazz,
|
private static void throwIfGraphTableClassIsNotSubClassOfActionPayloadClass(
|
||||||
Class<? extends Oaf> actionPayloadClazz) {
|
Class<? extends Oaf> rowClazz, Class<? extends Oaf> actionPayloadClazz) {
|
||||||
if (!isSubClass(rowClazz, actionPayloadClazz)) {
|
if (!isSubClass(rowClazz, actionPayloadClazz)) {
|
||||||
String msg = String.format("graph table class is not a subclass of action payload class: graph=%s, action=%s",
|
String msg =
|
||||||
|
String.format(
|
||||||
|
"graph table class is not a subclass of action payload class: graph=%s, action=%s",
|
||||||
rowClazz.getCanonicalName(), actionPayloadClazz.getCanonicalName());
|
rowClazz.getCanonicalName(), actionPayloadClazz.getCanonicalName());
|
||||||
throw new RuntimeException(msg);
|
throw new RuntimeException(msg);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private static void removeOutputDir(SparkSession spark,
|
private static void removeOutputDir(SparkSession spark, String path) {
|
||||||
String path) {
|
|
||||||
HdfsSupport.remove(path, spark.sparkContext().hadoopConfiguration());
|
HdfsSupport.remove(path, spark.sparkContext().hadoopConfiguration());
|
||||||
}
|
}
|
||||||
|
|
||||||
private static <G extends Oaf, A extends Oaf> void promoteActionPayloadForGraphTable(SparkSession spark,
|
private static <G extends Oaf, A extends Oaf> void promoteActionPayloadForGraphTable(
|
||||||
|
SparkSession spark,
|
||||||
String inputGraphTablePath,
|
String inputGraphTablePath,
|
||||||
String inputActionPayloadPath,
|
String inputActionPayloadPath,
|
||||||
String outputGraphTablePath,
|
String outputGraphTablePath,
|
||||||
|
@ -108,22 +113,26 @@ public class PromoteActionPayloadForGraphTableJob {
|
||||||
Class<G> rowClazz,
|
Class<G> rowClazz,
|
||||||
Class<A> actionPayloadClazz) {
|
Class<A> actionPayloadClazz) {
|
||||||
Dataset<G> rowDS = readGraphTable(spark, inputGraphTablePath, rowClazz);
|
Dataset<G> rowDS = readGraphTable(spark, inputGraphTablePath, rowClazz);
|
||||||
Dataset<A> actionPayloadDS = readActionPayload(spark, inputActionPayloadPath, actionPayloadClazz);
|
Dataset<A> actionPayloadDS =
|
||||||
|
readActionPayload(spark, inputActionPayloadPath, actionPayloadClazz);
|
||||||
|
|
||||||
Dataset<G> result = promoteActionPayloadForGraphTable(rowDS, actionPayloadDS, strategy, rowClazz, actionPayloadClazz)
|
Dataset<G> result =
|
||||||
|
promoteActionPayloadForGraphTable(
|
||||||
|
rowDS, actionPayloadDS, strategy, rowClazz, actionPayloadClazz)
|
||||||
.map((MapFunction<G, G>) value -> value, Encoders.bean(rowClazz));
|
.map((MapFunction<G, G>) value -> value, Encoders.bean(rowClazz));
|
||||||
|
|
||||||
saveGraphTable(result, outputGraphTablePath);
|
saveGraphTable(result, outputGraphTablePath);
|
||||||
}
|
}
|
||||||
|
|
||||||
private static <G extends Oaf> Dataset<G> readGraphTable(SparkSession spark,
|
private static <G extends Oaf> Dataset<G> readGraphTable(
|
||||||
String path,
|
SparkSession spark, String path, Class<G> rowClazz) {
|
||||||
Class<G> rowClazz) {
|
|
||||||
logger.info("Reading graph table from path: {}", path);
|
logger.info("Reading graph table from path: {}", path);
|
||||||
|
|
||||||
return spark.read()
|
return spark.read()
|
||||||
.textFile(path)
|
.textFile(path)
|
||||||
.map((MapFunction<String, G>) value -> OBJECT_MAPPER.readValue(value, rowClazz), Encoders.bean(rowClazz));
|
.map(
|
||||||
|
(MapFunction<String, G>) value -> OBJECT_MAPPER.readValue(value, rowClazz),
|
||||||
|
Encoders.bean(rowClazz));
|
||||||
|
|
||||||
/*
|
/*
|
||||||
return spark
|
return spark
|
||||||
|
@ -133,33 +142,44 @@ public class PromoteActionPayloadForGraphTableJob {
|
||||||
*/
|
*/
|
||||||
}
|
}
|
||||||
|
|
||||||
private static <A extends Oaf> Dataset<A> readActionPayload(SparkSession spark,
|
private static <A extends Oaf> Dataset<A> readActionPayload(
|
||||||
String path,
|
SparkSession spark, String path, Class<A> actionPayloadClazz) {
|
||||||
Class<A> actionPayloadClazz) {
|
|
||||||
logger.info("Reading action payload from path: {}", path);
|
logger.info("Reading action payload from path: {}", path);
|
||||||
return spark
|
return spark.read()
|
||||||
.read()
|
|
||||||
.parquet(path)
|
.parquet(path)
|
||||||
.map((MapFunction<Row, A>) value -> OBJECT_MAPPER.readValue(value.<String>getAs("payload"),
|
.map(
|
||||||
actionPayloadClazz), Encoders.bean(actionPayloadClazz));
|
(MapFunction<Row, A>)
|
||||||
|
value ->
|
||||||
|
OBJECT_MAPPER.readValue(
|
||||||
|
value.<String>getAs("payload"), actionPayloadClazz),
|
||||||
|
Encoders.bean(actionPayloadClazz));
|
||||||
}
|
}
|
||||||
|
|
||||||
private static <G extends Oaf, A extends Oaf> Dataset<G> promoteActionPayloadForGraphTable(Dataset<G> rowDS,
|
private static <G extends Oaf, A extends Oaf> Dataset<G> promoteActionPayloadForGraphTable(
|
||||||
|
Dataset<G> rowDS,
|
||||||
Dataset<A> actionPayloadDS,
|
Dataset<A> actionPayloadDS,
|
||||||
MergeAndGet.Strategy strategy,
|
MergeAndGet.Strategy strategy,
|
||||||
Class<G> rowClazz,
|
Class<G> rowClazz,
|
||||||
Class<A> actionPayloadClazz) {
|
Class<A> actionPayloadClazz) {
|
||||||
logger.info("Promoting action payload for graph table: payload={}, table={}", actionPayloadClazz.getSimpleName(), rowClazz.getSimpleName());
|
logger.info(
|
||||||
|
"Promoting action payload for graph table: payload={}, table={}",
|
||||||
|
actionPayloadClazz.getSimpleName(),
|
||||||
|
rowClazz.getSimpleName());
|
||||||
|
|
||||||
SerializableSupplier<Function<G, String>> rowIdFn = PromoteActionPayloadForGraphTableJob::idFn;
|
SerializableSupplier<Function<G, String>> rowIdFn =
|
||||||
SerializableSupplier<Function<A, String>> actionPayloadIdFn = PromoteActionPayloadForGraphTableJob::idFn;
|
PromoteActionPayloadForGraphTableJob::idFn;
|
||||||
SerializableSupplier<BiFunction<G, A, G>> mergeRowWithActionPayloadAndGetFn = MergeAndGet.functionFor(strategy);
|
SerializableSupplier<Function<A, String>> actionPayloadIdFn =
|
||||||
SerializableSupplier<BiFunction<G, G, G>> mergeRowsAndGetFn = MergeAndGet.functionFor(strategy);
|
PromoteActionPayloadForGraphTableJob::idFn;
|
||||||
|
SerializableSupplier<BiFunction<G, A, G>> mergeRowWithActionPayloadAndGetFn =
|
||||||
|
MergeAndGet.functionFor(strategy);
|
||||||
|
SerializableSupplier<BiFunction<G, G, G>> mergeRowsAndGetFn =
|
||||||
|
MergeAndGet.functionFor(strategy);
|
||||||
SerializableSupplier<G> zeroFn = zeroFn(rowClazz);
|
SerializableSupplier<G> zeroFn = zeroFn(rowClazz);
|
||||||
SerializableSupplier<Function<G, Boolean>> isNotZeroFn = PromoteActionPayloadForGraphTableJob::isNotZeroFnUsingIdOrSource;
|
SerializableSupplier<Function<G, Boolean>> isNotZeroFn =
|
||||||
|
PromoteActionPayloadForGraphTableJob::isNotZeroFnUsingIdOrSource;
|
||||||
|
|
||||||
Dataset<G> joinedAndMerged = PromoteActionPayloadFunctions
|
Dataset<G> joinedAndMerged =
|
||||||
.joinGraphTableWithActionPayloadAndMerge(
|
PromoteActionPayloadFunctions.joinGraphTableWithActionPayloadAndMerge(
|
||||||
rowDS,
|
rowDS,
|
||||||
actionPayloadDS,
|
actionPayloadDS,
|
||||||
rowIdFn,
|
rowIdFn,
|
||||||
|
@ -168,14 +188,8 @@ public class PromoteActionPayloadForGraphTableJob {
|
||||||
rowClazz,
|
rowClazz,
|
||||||
actionPayloadClazz);
|
actionPayloadClazz);
|
||||||
|
|
||||||
return PromoteActionPayloadFunctions
|
return PromoteActionPayloadFunctions.groupGraphTableByIdAndMerge(
|
||||||
.groupGraphTableByIdAndMerge(
|
joinedAndMerged, rowIdFn, mergeRowsAndGetFn, zeroFn, isNotZeroFn, rowClazz);
|
||||||
joinedAndMerged,
|
|
||||||
rowIdFn,
|
|
||||||
mergeRowsAndGetFn,
|
|
||||||
zeroFn,
|
|
||||||
isNotZeroFn,
|
|
||||||
rowClazz);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private static <T extends Oaf> Function<T, String> idFn() {
|
private static <T extends Oaf> Function<T, String> idFn() {
|
||||||
|
@ -190,19 +204,49 @@ public class PromoteActionPayloadForGraphTableJob {
|
||||||
private static <T extends Oaf> String idFnForRelation(T t) {
|
private static <T extends Oaf> String idFnForRelation(T t) {
|
||||||
Relation r = (Relation) t;
|
Relation r = (Relation) t;
|
||||||
return Optional.ofNullable(r.getSource())
|
return Optional.ofNullable(r.getSource())
|
||||||
.map(source -> Optional.ofNullable(r.getTarget())
|
.map(
|
||||||
.map(target -> Optional.ofNullable(r.getRelType())
|
source ->
|
||||||
.map(relType -> Optional.ofNullable(r.getSubRelType())
|
Optional.ofNullable(r.getTarget())
|
||||||
.map(subRelType -> Optional.ofNullable(r.getRelClass())
|
.map(
|
||||||
.map(relClass -> String.join(source, target, relType, subRelType, relClass))
|
target ->
|
||||||
.orElse(String.join(source, target, relType, subRelType))
|
Optional.ofNullable(r.getRelType())
|
||||||
)
|
.map(
|
||||||
.orElse(String.join(source, target, relType))
|
relType ->
|
||||||
)
|
Optional.ofNullable(
|
||||||
.orElse(String.join(source, target))
|
r
|
||||||
)
|
.getSubRelType())
|
||||||
.orElse(source)
|
.map(
|
||||||
)
|
subRelType ->
|
||||||
|
Optional
|
||||||
|
.ofNullable(
|
||||||
|
r
|
||||||
|
.getRelClass())
|
||||||
|
.map(
|
||||||
|
relClass ->
|
||||||
|
String
|
||||||
|
.join(
|
||||||
|
source,
|
||||||
|
target,
|
||||||
|
relType,
|
||||||
|
subRelType,
|
||||||
|
relClass))
|
||||||
|
.orElse(
|
||||||
|
String
|
||||||
|
.join(
|
||||||
|
source,
|
||||||
|
target,
|
||||||
|
relType,
|
||||||
|
subRelType)))
|
||||||
|
.orElse(
|
||||||
|
String
|
||||||
|
.join(
|
||||||
|
source,
|
||||||
|
target,
|
||||||
|
relType)))
|
||||||
|
.orElse(
|
||||||
|
String.join(
|
||||||
|
source, target)))
|
||||||
|
.orElse(source))
|
||||||
.orElse(null);
|
.orElse(null);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -242,13 +286,8 @@ public class PromoteActionPayloadForGraphTableJob {
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
private static <G extends Oaf> void saveGraphTable(Dataset<G> result,
|
private static <G extends Oaf> void saveGraphTable(Dataset<G> result, String path) {
|
||||||
String path) {
|
|
||||||
logger.info("Saving graph table to path: {}", path);
|
logger.info("Saving graph table to path: {}", path);
|
||||||
result
|
result.toJSON().write().option("compression", "gzip").text(path);
|
||||||
.toJSON()
|
|
||||||
.write()
|
|
||||||
.option("compression", "gzip")
|
|
||||||
.text(path);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,7 +1,13 @@
|
||||||
package eu.dnetlib.dhp.actionmanager.promote;
|
package eu.dnetlib.dhp.actionmanager.promote;
|
||||||
|
|
||||||
|
import static eu.dnetlib.dhp.schema.common.ModelSupport.isSubClass;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.common.FunctionalInterfaceSupport.SerializableSupplier;
|
import eu.dnetlib.dhp.common.FunctionalInterfaceSupport.SerializableSupplier;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Oaf;
|
import eu.dnetlib.dhp.schema.oaf.Oaf;
|
||||||
|
import java.util.Objects;
|
||||||
|
import java.util.Optional;
|
||||||
|
import java.util.function.BiFunction;
|
||||||
|
import java.util.function.Function;
|
||||||
import org.apache.spark.api.java.function.FilterFunction;
|
import org.apache.spark.api.java.function.FilterFunction;
|
||||||
import org.apache.spark.api.java.function.MapFunction;
|
import org.apache.spark.api.java.function.MapFunction;
|
||||||
import org.apache.spark.sql.Dataset;
|
import org.apache.spark.sql.Dataset;
|
||||||
|
@ -11,23 +17,14 @@ import org.apache.spark.sql.TypedColumn;
|
||||||
import org.apache.spark.sql.expressions.Aggregator;
|
import org.apache.spark.sql.expressions.Aggregator;
|
||||||
import scala.Tuple2;
|
import scala.Tuple2;
|
||||||
|
|
||||||
import java.util.Objects;
|
/** Promote action payload functions. */
|
||||||
import java.util.Optional;
|
|
||||||
import java.util.function.BiFunction;
|
|
||||||
import java.util.function.Function;
|
|
||||||
|
|
||||||
import static eu.dnetlib.dhp.schema.common.ModelSupport.isSubClass;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Promote action payload functions.
|
|
||||||
*/
|
|
||||||
public class PromoteActionPayloadFunctions {
|
public class PromoteActionPayloadFunctions {
|
||||||
|
|
||||||
private PromoteActionPayloadFunctions() {
|
private PromoteActionPayloadFunctions() {}
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Joins dataset representing graph table with dataset representing action payload using supplied functions.
|
* Joins dataset representing graph table with dataset representing action payload using
|
||||||
|
* supplied functions.
|
||||||
*
|
*
|
||||||
* @param rowDS Dataset representing graph table
|
* @param rowDS Dataset representing graph table
|
||||||
* @param actionPayloadDS Dataset representing action payload
|
* @param actionPayloadDS Dataset representing action payload
|
||||||
|
@ -40,7 +37,8 @@ public class PromoteActionPayloadFunctions {
|
||||||
* @param <A> Type of action payload instance
|
* @param <A> Type of action payload instance
|
||||||
* @return Dataset of merged graph table rows and action payload instances
|
* @return Dataset of merged graph table rows and action payload instances
|
||||||
*/
|
*/
|
||||||
public static <G extends Oaf, A extends Oaf> Dataset<G> joinGraphTableWithActionPayloadAndMerge(Dataset<G> rowDS,
|
public static <G extends Oaf, A extends Oaf> Dataset<G> joinGraphTableWithActionPayloadAndMerge(
|
||||||
|
Dataset<G> rowDS,
|
||||||
Dataset<A> actionPayloadDS,
|
Dataset<A> actionPayloadDS,
|
||||||
SerializableSupplier<Function<G, String>> rowIdFn,
|
SerializableSupplier<Function<G, String>> rowIdFn,
|
||||||
SerializableSupplier<Function<A, String>> actionPayloadIdFn,
|
SerializableSupplier<Function<A, String>> actionPayloadIdFn,
|
||||||
|
@ -48,34 +46,58 @@ public class PromoteActionPayloadFunctions {
|
||||||
Class<G> rowClazz,
|
Class<G> rowClazz,
|
||||||
Class<A> actionPayloadClazz) {
|
Class<A> actionPayloadClazz) {
|
||||||
if (!isSubClass(rowClazz, actionPayloadClazz)) {
|
if (!isSubClass(rowClazz, actionPayloadClazz)) {
|
||||||
throw new RuntimeException("action payload type must be the same or be a super type of table row type");
|
throw new RuntimeException(
|
||||||
|
"action payload type must be the same or be a super type of table row type");
|
||||||
}
|
}
|
||||||
|
|
||||||
Dataset<Tuple2<String, G>> rowWithIdDS = mapToTupleWithId(rowDS, rowIdFn, rowClazz);
|
Dataset<Tuple2<String, G>> rowWithIdDS = mapToTupleWithId(rowDS, rowIdFn, rowClazz);
|
||||||
Dataset<Tuple2<String, A>> actionPayloadWithIdDS = mapToTupleWithId(actionPayloadDS, actionPayloadIdFn, actionPayloadClazz);
|
Dataset<Tuple2<String, A>> actionPayloadWithIdDS =
|
||||||
|
mapToTupleWithId(actionPayloadDS, actionPayloadIdFn, actionPayloadClazz);
|
||||||
|
|
||||||
return rowWithIdDS
|
return rowWithIdDS
|
||||||
.joinWith(actionPayloadWithIdDS, rowWithIdDS.col("_1").equalTo(actionPayloadWithIdDS.col("_1")), "full_outer")
|
.joinWith(
|
||||||
.map((MapFunction<Tuple2<Tuple2<String, G>, Tuple2<String, A>>, G>) value -> {
|
actionPayloadWithIdDS,
|
||||||
Optional<G> rowOpt = Optional.ofNullable(value._1()).map(Tuple2::_2);
|
rowWithIdDS.col("_1").equalTo(actionPayloadWithIdDS.col("_1")),
|
||||||
Optional<A> actionPayloadOpt = Optional.ofNullable(value._2()).map(Tuple2::_2);
|
"full_outer")
|
||||||
return rowOpt
|
.map(
|
||||||
.map(row -> actionPayloadOpt
|
(MapFunction<Tuple2<Tuple2<String, G>, Tuple2<String, A>>, G>)
|
||||||
.map(actionPayload -> mergeAndGetFn.get().apply(row, actionPayload))
|
value -> {
|
||||||
|
Optional<G> rowOpt =
|
||||||
|
Optional.ofNullable(value._1()).map(Tuple2::_2);
|
||||||
|
Optional<A> actionPayloadOpt =
|
||||||
|
Optional.ofNullable(value._2()).map(Tuple2::_2);
|
||||||
|
return rowOpt.map(
|
||||||
|
row ->
|
||||||
|
actionPayloadOpt
|
||||||
|
.map(
|
||||||
|
actionPayload ->
|
||||||
|
mergeAndGetFn
|
||||||
|
.get()
|
||||||
|
.apply(
|
||||||
|
row,
|
||||||
|
actionPayload))
|
||||||
.orElse(row))
|
.orElse(row))
|
||||||
.orElseGet(() -> actionPayloadOpt
|
.orElseGet(
|
||||||
.filter(actionPayload -> actionPayload.getClass().equals(rowClazz))
|
() ->
|
||||||
|
actionPayloadOpt
|
||||||
|
.filter(
|
||||||
|
actionPayload ->
|
||||||
|
actionPayload
|
||||||
|
.getClass()
|
||||||
|
.equals(
|
||||||
|
rowClazz))
|
||||||
.map(rowClazz::cast)
|
.map(rowClazz::cast)
|
||||||
.orElse(null));
|
.orElse(null));
|
||||||
}, Encoders.kryo(rowClazz))
|
},
|
||||||
|
Encoders.kryo(rowClazz))
|
||||||
.filter((FilterFunction<G>) Objects::nonNull);
|
.filter((FilterFunction<G>) Objects::nonNull);
|
||||||
}
|
}
|
||||||
|
|
||||||
private static <T extends Oaf> Dataset<Tuple2<String, T>> mapToTupleWithId(Dataset<T> ds,
|
private static <T extends Oaf> Dataset<Tuple2<String, T>> mapToTupleWithId(
|
||||||
SerializableSupplier<Function<T, String>> idFn,
|
Dataset<T> ds, SerializableSupplier<Function<T, String>> idFn, Class<T> clazz) {
|
||||||
Class<T> clazz) {
|
return ds.map(
|
||||||
return ds
|
(MapFunction<T, Tuple2<String, T>>)
|
||||||
.map((MapFunction<T, Tuple2<String, T>>) value -> new Tuple2<>(idFn.get().apply(value), value),
|
value -> new Tuple2<>(idFn.get().apply(value), value),
|
||||||
Encoders.tuple(Encoders.STRING(), Encoders.kryo(clazz)));
|
Encoders.tuple(Encoders.STRING(), Encoders.kryo(clazz)));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -91,15 +113,17 @@ public class PromoteActionPayloadFunctions {
|
||||||
* @param <G> Type of graph table row
|
* @param <G> Type of graph table row
|
||||||
* @return Dataset of aggregated graph table rows
|
* @return Dataset of aggregated graph table rows
|
||||||
*/
|
*/
|
||||||
public static <G extends Oaf> Dataset<G> groupGraphTableByIdAndMerge(Dataset<G> rowDS,
|
public static <G extends Oaf> Dataset<G> groupGraphTableByIdAndMerge(
|
||||||
|
Dataset<G> rowDS,
|
||||||
SerializableSupplier<Function<G, String>> rowIdFn,
|
SerializableSupplier<Function<G, String>> rowIdFn,
|
||||||
SerializableSupplier<BiFunction<G, G, G>> mergeAndGetFn,
|
SerializableSupplier<BiFunction<G, G, G>> mergeAndGetFn,
|
||||||
SerializableSupplier<G> zeroFn,
|
SerializableSupplier<G> zeroFn,
|
||||||
SerializableSupplier<Function<G, Boolean>> isNotZeroFn,
|
SerializableSupplier<Function<G, Boolean>> isNotZeroFn,
|
||||||
Class<G> rowClazz) {
|
Class<G> rowClazz) {
|
||||||
TypedColumn<G, G> aggregator = new TableAggregator<>(zeroFn, mergeAndGetFn, isNotZeroFn, rowClazz).toColumn();
|
TypedColumn<G, G> aggregator =
|
||||||
return rowDS
|
new TableAggregator<>(zeroFn, mergeAndGetFn, isNotZeroFn, rowClazz).toColumn();
|
||||||
.groupByKey((MapFunction<G, String>) x -> rowIdFn.get().apply(x), Encoders.STRING())
|
return rowDS.groupByKey(
|
||||||
|
(MapFunction<G, String>) x -> rowIdFn.get().apply(x), Encoders.STRING())
|
||||||
.agg(aggregator)
|
.agg(aggregator)
|
||||||
.map((MapFunction<Tuple2<String, G>, G>) Tuple2::_2, Encoders.kryo(rowClazz));
|
.map((MapFunction<Tuple2<String, G>, G>) Tuple2::_2, Encoders.kryo(rowClazz));
|
||||||
}
|
}
|
||||||
|
@ -115,7 +139,8 @@ public class PromoteActionPayloadFunctions {
|
||||||
private SerializableSupplier<Function<G, Boolean>> isNotZeroFn;
|
private SerializableSupplier<Function<G, Boolean>> isNotZeroFn;
|
||||||
private Class<G> rowClazz;
|
private Class<G> rowClazz;
|
||||||
|
|
||||||
public TableAggregator(SerializableSupplier<G> zeroFn,
|
public TableAggregator(
|
||||||
|
SerializableSupplier<G> zeroFn,
|
||||||
SerializableSupplier<BiFunction<G, G, G>> mergeAndGetFn,
|
SerializableSupplier<BiFunction<G, G, G>> mergeAndGetFn,
|
||||||
SerializableSupplier<Function<G, Boolean>> isNotZeroFn,
|
SerializableSupplier<Function<G, Boolean>> isNotZeroFn,
|
||||||
Class<G> rowClazz) {
|
Class<G> rowClazz) {
|
||||||
|
@ -149,7 +174,8 @@ public class PromoteActionPayloadFunctions {
|
||||||
} else if (!isNotZero.apply(left) && isNotZero.apply(right)) {
|
} else if (!isNotZero.apply(left) && isNotZero.apply(right)) {
|
||||||
return right;
|
return right;
|
||||||
}
|
}
|
||||||
throw new RuntimeException("internal aggregation error: left and right objects are zero");
|
throw new RuntimeException(
|
||||||
|
"internal aggregation error: left and right objects are zero");
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -1,10 +1,20 @@
|
||||||
package eu.dnetlib.dhp.actionmanager.partition;
|
package eu.dnetlib.dhp.actionmanager.partition;
|
||||||
|
|
||||||
|
import static eu.dnetlib.dhp.common.ThrowingSupport.rethrowAsRuntimeException;
|
||||||
|
import static org.apache.spark.sql.functions.*;
|
||||||
|
import static org.junit.jupiter.api.Assertions.assertIterableEquals;
|
||||||
|
import static scala.collection.JavaConversions.mutableSeqAsJavaList;
|
||||||
|
|
||||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
import com.google.common.collect.Lists;
|
|
||||||
import eu.dnetlib.dhp.actionmanager.ISClient;
|
import eu.dnetlib.dhp.actionmanager.ISClient;
|
||||||
import eu.dnetlib.dhp.actionmanager.promote.PromoteActionPayloadForGraphTableJobTest;
|
import eu.dnetlib.dhp.actionmanager.promote.PromoteActionPayloadForGraphTableJobTest;
|
||||||
import eu.dnetlib.dhp.schema.oaf.*;
|
import eu.dnetlib.dhp.schema.oaf.*;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.nio.file.Files;
|
||||||
|
import java.nio.file.Path;
|
||||||
|
import java.nio.file.Paths;
|
||||||
|
import java.util.*;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
import org.apache.hadoop.conf.Configuration;
|
import org.apache.hadoop.conf.Configuration;
|
||||||
import org.apache.hadoop.io.Text;
|
import org.apache.hadoop.io.Text;
|
||||||
import org.apache.hadoop.mapreduce.Job;
|
import org.apache.hadoop.mapreduce.Job;
|
||||||
|
@ -25,32 +35,23 @@ import org.mockito.junit.jupiter.MockitoExtension;
|
||||||
import scala.Tuple2;
|
import scala.Tuple2;
|
||||||
import scala.collection.mutable.Seq;
|
import scala.collection.mutable.Seq;
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.nio.file.Files;
|
|
||||||
import java.nio.file.Path;
|
|
||||||
import java.nio.file.Paths;
|
|
||||||
import java.util.*;
|
|
||||||
import java.util.stream.Collectors;
|
|
||||||
|
|
||||||
import static eu.dnetlib.dhp.common.ThrowingSupport.rethrowAsRuntimeException;
|
|
||||||
import static org.apache.spark.sql.functions.*;
|
|
||||||
import static org.junit.jupiter.api.Assertions.assertIterableEquals;
|
|
||||||
import static scala.collection.JavaConversions.mutableSeqAsJavaList;
|
|
||||||
|
|
||||||
@ExtendWith(MockitoExtension.class)
|
@ExtendWith(MockitoExtension.class)
|
||||||
public class PartitionActionSetsByPayloadTypeJobTest {
|
public class PartitionActionSetsByPayloadTypeJobTest {
|
||||||
private static final ClassLoader cl = PartitionActionSetsByPayloadTypeJobTest.class.getClassLoader();
|
private static final ClassLoader cl =
|
||||||
|
PartitionActionSetsByPayloadTypeJobTest.class.getClassLoader();
|
||||||
|
|
||||||
private static Configuration configuration;
|
private static Configuration configuration;
|
||||||
private static SparkSession spark;
|
private static SparkSession spark;
|
||||||
|
|
||||||
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
||||||
|
|
||||||
private static final StructType ATOMIC_ACTION_SCHEMA = StructType$.MODULE$.apply(
|
private static final StructType ATOMIC_ACTION_SCHEMA =
|
||||||
|
StructType$.MODULE$.apply(
|
||||||
Arrays.asList(
|
Arrays.asList(
|
||||||
StructField$.MODULE$.apply("clazz", DataTypes.StringType, false, Metadata.empty()),
|
StructField$.MODULE$.apply(
|
||||||
StructField$.MODULE$.apply("payload", DataTypes.StringType, false, Metadata.empty())
|
"clazz", DataTypes.StringType, false, Metadata.empty()),
|
||||||
));
|
StructField$.MODULE$.apply(
|
||||||
|
"payload", DataTypes.StringType, false, Metadata.empty())));
|
||||||
|
|
||||||
@BeforeAll
|
@BeforeAll
|
||||||
public static void beforeAll() throws IOException {
|
public static void beforeAll() throws IOException {
|
||||||
|
@ -71,11 +72,11 @@ public class PartitionActionSetsByPayloadTypeJobTest {
|
||||||
@Nested
|
@Nested
|
||||||
class Main {
|
class Main {
|
||||||
|
|
||||||
@Mock
|
@Mock private ISClient isClient;
|
||||||
private ISClient isClient;
|
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void shouldPartitionActionSetsByPayloadType(@TempDir Path workingDir) throws Exception {
|
public void shouldPartitionActionSetsByPayloadType(@TempDir Path workingDir)
|
||||||
|
throws Exception {
|
||||||
// given
|
// given
|
||||||
Path inputActionSetsBaseDir = workingDir.resolve("input").resolve("action_sets");
|
Path inputActionSetsBaseDir = workingDir.resolve("input").resolve("action_sets");
|
||||||
Path outputDir = workingDir.resolve("output");
|
Path outputDir = workingDir.resolve("output");
|
||||||
|
@ -85,15 +86,16 @@ public class PartitionActionSetsByPayloadTypeJobTest {
|
||||||
List<String> inputActionSetsPaths = resolveInputActionSetPaths(inputActionSetsBaseDir);
|
List<String> inputActionSetsPaths = resolveInputActionSetPaths(inputActionSetsBaseDir);
|
||||||
|
|
||||||
// when
|
// when
|
||||||
Mockito.when(isClient.getLatestRawsetPaths(Mockito.anyString())).thenReturn(inputActionSetsPaths);
|
Mockito.when(isClient.getLatestRawsetPaths(Mockito.anyString()))
|
||||||
|
.thenReturn(inputActionSetsPaths);
|
||||||
|
|
||||||
PartitionActionSetsByPayloadTypeJob job = new PartitionActionSetsByPayloadTypeJob();
|
PartitionActionSetsByPayloadTypeJob job = new PartitionActionSetsByPayloadTypeJob();
|
||||||
job.setIsClient(isClient);
|
job.setIsClient(isClient);
|
||||||
job.run(
|
job.run(
|
||||||
Boolean.FALSE,
|
Boolean.FALSE,
|
||||||
"", // it can be empty we're mocking the response from isClient to resolve the paths
|
"", // it can be empty we're mocking the response from isClient to resolve the
|
||||||
outputDir.toString()
|
// paths
|
||||||
);
|
outputDir.toString());
|
||||||
|
|
||||||
// then
|
// then
|
||||||
Files.exists(outputDir);
|
Files.exists(outputDir);
|
||||||
|
@ -110,45 +112,61 @@ public class PartitionActionSetsByPayloadTypeJobTest {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private List<String> resolveInputActionSetPaths(Path inputActionSetsBaseDir) throws IOException {
|
private List<String> resolveInputActionSetPaths(Path inputActionSetsBaseDir)
|
||||||
|
throws IOException {
|
||||||
Path inputActionSetJsonDumpsDir = getInputActionSetJsonDumpsDir();
|
Path inputActionSetJsonDumpsDir = getInputActionSetJsonDumpsDir();
|
||||||
return Files
|
return Files.list(inputActionSetJsonDumpsDir)
|
||||||
.list(inputActionSetJsonDumpsDir)
|
.map(
|
||||||
.map(path -> {
|
path -> {
|
||||||
String inputActionSetId = path.getFileName().toString();
|
String inputActionSetId = path.getFileName().toString();
|
||||||
return inputActionSetsBaseDir.resolve(inputActionSetId).toString();
|
return inputActionSetsBaseDir.resolve(inputActionSetId).toString();
|
||||||
})
|
})
|
||||||
.collect(Collectors.toCollection(ArrayList::new));
|
.collect(Collectors.toCollection(ArrayList::new));
|
||||||
}
|
}
|
||||||
|
|
||||||
private static Map<String, List<String>> createActionSets(Path inputActionSetsDir) throws IOException {
|
private static Map<String, List<String>> createActionSets(Path inputActionSetsDir)
|
||||||
|
throws IOException {
|
||||||
Path inputActionSetJsonDumpsDir = getInputActionSetJsonDumpsDir();
|
Path inputActionSetJsonDumpsDir = getInputActionSetJsonDumpsDir();
|
||||||
|
|
||||||
Map<String, List<String>> oafsByType = new HashMap<>();
|
Map<String, List<String>> oafsByType = new HashMap<>();
|
||||||
Files
|
Files.list(inputActionSetJsonDumpsDir)
|
||||||
.list(inputActionSetJsonDumpsDir)
|
.forEach(
|
||||||
.forEach(inputActionSetJsonDumpFile -> {
|
inputActionSetJsonDumpFile -> {
|
||||||
String inputActionSetId = inputActionSetJsonDumpFile.getFileName().toString();
|
String inputActionSetId =
|
||||||
|
inputActionSetJsonDumpFile.getFileName().toString();
|
||||||
Path inputActionSetDir = inputActionSetsDir.resolve(inputActionSetId);
|
Path inputActionSetDir = inputActionSetsDir.resolve(inputActionSetId);
|
||||||
|
|
||||||
Dataset<String> actionDS = readActionsFromJsonDump(inputActionSetJsonDumpFile.toString())
|
Dataset<String> actionDS =
|
||||||
|
readActionsFromJsonDump(inputActionSetJsonDumpFile.toString())
|
||||||
.cache();
|
.cache();
|
||||||
|
|
||||||
writeActionsAsJobInput(actionDS, inputActionSetId, inputActionSetDir.toString());
|
writeActionsAsJobInput(
|
||||||
|
actionDS, inputActionSetId, inputActionSetDir.toString());
|
||||||
|
|
||||||
Map<String, List<String>> actionSetOafsByType = actionDS
|
Map<String, List<String>> actionSetOafsByType =
|
||||||
.withColumn("atomic_action", from_json(col("value"), ATOMIC_ACTION_SCHEMA))
|
actionDS
|
||||||
.select(expr("atomic_action.*"))
|
.withColumn(
|
||||||
.groupBy(col("clazz"))
|
"atomic_action",
|
||||||
|
from_json(col("value"), ATOMIC_ACTION_SCHEMA))
|
||||||
|
.select(expr("atomic_action.*")).groupBy(col("clazz"))
|
||||||
.agg(collect_list(col("payload")).as("payload_list"))
|
.agg(collect_list(col("payload")).as("payload_list"))
|
||||||
.collectAsList()
|
.collectAsList().stream()
|
||||||
.stream()
|
.map(
|
||||||
.map(row -> new AbstractMap.SimpleEntry<>(row.<String>getAs("clazz"),
|
row ->
|
||||||
mutableSeqAsJavaList(row.<Seq<String>>getAs("payload_list"))))
|
new AbstractMap.SimpleEntry<>(
|
||||||
.collect(Collectors.toMap(AbstractMap.SimpleEntry::getKey, AbstractMap.SimpleEntry::getValue));
|
row.<String>getAs("clazz"),
|
||||||
|
mutableSeqAsJavaList(
|
||||||
|
row.<Seq<String>>getAs(
|
||||||
|
"payload_list"))))
|
||||||
|
.collect(
|
||||||
|
Collectors.toMap(
|
||||||
|
AbstractMap.SimpleEntry::getKey,
|
||||||
|
AbstractMap.SimpleEntry::getValue));
|
||||||
|
|
||||||
actionSetOafsByType.keySet()
|
actionSetOafsByType
|
||||||
.forEach(x -> {
|
.keySet()
|
||||||
|
.forEach(
|
||||||
|
x -> {
|
||||||
if (oafsByType.containsKey(x)) {
|
if (oafsByType.containsKey(x)) {
|
||||||
List<String> collected = new ArrayList<>();
|
List<String> collected = new ArrayList<>();
|
||||||
collected.addAll(oafsByType.get(x));
|
collected.addAll(oafsByType.get(x));
|
||||||
|
@ -164,38 +182,40 @@ public class PartitionActionSetsByPayloadTypeJobTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
private static Path getInputActionSetJsonDumpsDir() {
|
private static Path getInputActionSetJsonDumpsDir() {
|
||||||
return Paths
|
return Paths.get(
|
||||||
.get(Objects.requireNonNull(cl.getResource("eu/dnetlib/dhp/actionmanager/partition/input/"))
|
Objects.requireNonNull(
|
||||||
|
cl.getResource("eu/dnetlib/dhp/actionmanager/partition/input/"))
|
||||||
.getFile());
|
.getFile());
|
||||||
}
|
}
|
||||||
|
|
||||||
private static Dataset<String> readActionsFromJsonDump(String path) {
|
private static Dataset<String> readActionsFromJsonDump(String path) {
|
||||||
return spark
|
return spark.read().textFile(path);
|
||||||
.read()
|
|
||||||
.textFile(path);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private static void writeActionsAsJobInput(Dataset<String> actionDS,
|
private static void writeActionsAsJobInput(
|
||||||
String inputActionSetId,
|
Dataset<String> actionDS, String inputActionSetId, String path) {
|
||||||
String path) {
|
actionDS.javaRDD()
|
||||||
actionDS
|
|
||||||
.javaRDD()
|
|
||||||
.mapToPair(json -> new Tuple2<>(new Text(inputActionSetId), new Text(json)))
|
.mapToPair(json -> new Tuple2<>(new Text(inputActionSetId), new Text(json)))
|
||||||
.saveAsNewAPIHadoopFile(path,
|
.saveAsNewAPIHadoopFile(
|
||||||
|
path,
|
||||||
Text.class,
|
Text.class,
|
||||||
Text.class,
|
Text.class,
|
||||||
SequenceFileOutputFormat.class,
|
SequenceFileOutputFormat.class,
|
||||||
configuration);
|
configuration);
|
||||||
}
|
}
|
||||||
|
|
||||||
private static <T extends Oaf> void assertForOafType(Path outputDir, Map<String, List<String>> oafsByClassName, Class<T> clazz) {
|
private static <T extends Oaf> void assertForOafType(
|
||||||
Path outputDatasetDir = outputDir.resolve(String.format("clazz=%s", clazz.getCanonicalName()));
|
Path outputDir, Map<String, List<String>> oafsByClassName, Class<T> clazz) {
|
||||||
|
Path outputDatasetDir =
|
||||||
|
outputDir.resolve(String.format("clazz=%s", clazz.getCanonicalName()));
|
||||||
Files.exists(outputDatasetDir);
|
Files.exists(outputDatasetDir);
|
||||||
|
|
||||||
List<T> actuals = readActionPayloadFromJobOutput(outputDatasetDir.toString(), clazz).collectAsList();
|
List<T> actuals =
|
||||||
|
readActionPayloadFromJobOutput(outputDatasetDir.toString(), clazz).collectAsList();
|
||||||
actuals.sort(Comparator.comparingInt(Object::hashCode));
|
actuals.sort(Comparator.comparingInt(Object::hashCode));
|
||||||
|
|
||||||
List<T> expecteds = oafsByClassName.get(clazz.getCanonicalName()).stream()
|
List<T> expecteds =
|
||||||
|
oafsByClassName.get(clazz.getCanonicalName()).stream()
|
||||||
.map(json -> mapToOaf(json, clazz))
|
.map(json -> mapToOaf(json, clazz))
|
||||||
.sorted(Comparator.comparingInt(Object::hashCode))
|
.sorted(Comparator.comparingInt(Object::hashCode))
|
||||||
.collect(Collectors.toList());
|
.collect(Collectors.toList());
|
||||||
|
@ -203,19 +223,23 @@ public class PartitionActionSetsByPayloadTypeJobTest {
|
||||||
assertIterableEquals(expecteds, actuals);
|
assertIterableEquals(expecteds, actuals);
|
||||||
}
|
}
|
||||||
|
|
||||||
private static <T extends Oaf> Dataset<T> readActionPayloadFromJobOutput(String path,
|
private static <T extends Oaf> Dataset<T> readActionPayloadFromJobOutput(
|
||||||
Class<T> clazz) {
|
String path, Class<T> clazz) {
|
||||||
return spark
|
return spark.read()
|
||||||
.read()
|
|
||||||
.parquet(path)
|
.parquet(path)
|
||||||
.map((MapFunction<Row, T>) value -> OBJECT_MAPPER.readValue(value.<String>getAs("payload"), clazz),
|
.map(
|
||||||
|
(MapFunction<Row, T>)
|
||||||
|
value ->
|
||||||
|
OBJECT_MAPPER.readValue(
|
||||||
|
value.<String>getAs("payload"), clazz),
|
||||||
Encoders.bean(clazz));
|
Encoders.bean(clazz));
|
||||||
}
|
}
|
||||||
|
|
||||||
private static <T extends Oaf> T mapToOaf(String json, Class<T> clazz) {
|
private static <T extends Oaf> T mapToOaf(String json, Class<T> clazz) {
|
||||||
return rethrowAsRuntimeException(
|
return rethrowAsRuntimeException(
|
||||||
() -> OBJECT_MAPPER.readValue(json, clazz),
|
() -> OBJECT_MAPPER.readValue(json, clazz),
|
||||||
String.format("failed to map json to class: json=%s, class=%s", json, clazz.getCanonicalName())
|
String.format(
|
||||||
);
|
"failed to map json to class: json=%s, class=%s",
|
||||||
|
json, clazz.getCanonicalName()));
|
||||||
}
|
}
|
||||||
}
|
}
|
|
@ -1,17 +1,16 @@
|
||||||
package eu.dnetlib.dhp.actionmanager.promote;
|
package eu.dnetlib.dhp.actionmanager.promote;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.common.FunctionalInterfaceSupport.SerializableSupplier;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.*;
|
|
||||||
import org.junit.jupiter.api.Nested;
|
|
||||||
import org.junit.jupiter.api.Test;
|
|
||||||
|
|
||||||
import java.util.function.BiFunction;
|
|
||||||
|
|
||||||
import static eu.dnetlib.dhp.actionmanager.promote.MergeAndGet.Strategy;
|
import static eu.dnetlib.dhp.actionmanager.promote.MergeAndGet.Strategy;
|
||||||
import static eu.dnetlib.dhp.actionmanager.promote.MergeAndGet.functionFor;
|
import static eu.dnetlib.dhp.actionmanager.promote.MergeAndGet.functionFor;
|
||||||
import static org.junit.jupiter.api.Assertions.*;
|
import static org.junit.jupiter.api.Assertions.*;
|
||||||
import static org.mockito.Mockito.*;
|
import static org.mockito.Mockito.*;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.common.FunctionalInterfaceSupport.SerializableSupplier;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.*;
|
||||||
|
import java.util.function.BiFunction;
|
||||||
|
import org.junit.jupiter.api.Nested;
|
||||||
|
import org.junit.jupiter.api.Test;
|
||||||
|
|
||||||
public class MergeAndGetTest {
|
public class MergeAndGetTest {
|
||||||
|
|
||||||
@Nested
|
@Nested
|
||||||
|
@ -24,11 +23,11 @@ public class MergeAndGetTest {
|
||||||
Oaf b = mock(Oaf.class);
|
Oaf b = mock(Oaf.class);
|
||||||
|
|
||||||
// when
|
// when
|
||||||
SerializableSupplier<BiFunction<Oaf, Oaf, Oaf>> fn = functionFor(Strategy.MERGE_FROM_AND_GET);
|
SerializableSupplier<BiFunction<Oaf, Oaf, Oaf>> fn =
|
||||||
|
functionFor(Strategy.MERGE_FROM_AND_GET);
|
||||||
|
|
||||||
// then
|
// then
|
||||||
assertThrows(RuntimeException.class, () ->
|
assertThrows(RuntimeException.class, () -> fn.get().apply(a, b));
|
||||||
fn.get().apply(a, b));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
@ -38,11 +37,11 @@ public class MergeAndGetTest {
|
||||||
Relation b = mock(Relation.class);
|
Relation b = mock(Relation.class);
|
||||||
|
|
||||||
// when
|
// when
|
||||||
SerializableSupplier<BiFunction<Oaf, Oaf, Oaf>> fn = functionFor(Strategy.MERGE_FROM_AND_GET);
|
SerializableSupplier<BiFunction<Oaf, Oaf, Oaf>> fn =
|
||||||
|
functionFor(Strategy.MERGE_FROM_AND_GET);
|
||||||
|
|
||||||
// then
|
// then
|
||||||
assertThrows(RuntimeException.class, () ->
|
assertThrows(RuntimeException.class, () -> fn.get().apply(a, b));
|
||||||
fn.get().apply(a, b));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
@ -52,11 +51,11 @@ public class MergeAndGetTest {
|
||||||
OafEntity b = mock(OafEntity.class);
|
OafEntity b = mock(OafEntity.class);
|
||||||
|
|
||||||
// when
|
// when
|
||||||
SerializableSupplier<BiFunction<Oaf, Oaf, Oaf>> fn = functionFor(Strategy.MERGE_FROM_AND_GET);
|
SerializableSupplier<BiFunction<Oaf, Oaf, Oaf>> fn =
|
||||||
|
functionFor(Strategy.MERGE_FROM_AND_GET);
|
||||||
|
|
||||||
// then
|
// then
|
||||||
assertThrows(RuntimeException.class, () ->
|
assertThrows(RuntimeException.class, () -> fn.get().apply(a, b));
|
||||||
fn.get().apply(a, b));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
@ -66,11 +65,11 @@ public class MergeAndGetTest {
|
||||||
Oaf b = mock(Oaf.class);
|
Oaf b = mock(Oaf.class);
|
||||||
|
|
||||||
// when
|
// when
|
||||||
SerializableSupplier<BiFunction<Oaf, Oaf, Oaf>> fn = functionFor(Strategy.MERGE_FROM_AND_GET);
|
SerializableSupplier<BiFunction<Oaf, Oaf, Oaf>> fn =
|
||||||
|
functionFor(Strategy.MERGE_FROM_AND_GET);
|
||||||
|
|
||||||
// then
|
// then
|
||||||
assertThrows(RuntimeException.class, () ->
|
assertThrows(RuntimeException.class, () -> fn.get().apply(a, b));
|
||||||
fn.get().apply(a, b));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
@ -80,11 +79,11 @@ public class MergeAndGetTest {
|
||||||
OafEntity b = mock(OafEntity.class);
|
OafEntity b = mock(OafEntity.class);
|
||||||
|
|
||||||
// when
|
// when
|
||||||
SerializableSupplier<BiFunction<Oaf, Oaf, Oaf>> fn = functionFor(Strategy.MERGE_FROM_AND_GET);
|
SerializableSupplier<BiFunction<Oaf, Oaf, Oaf>> fn =
|
||||||
|
functionFor(Strategy.MERGE_FROM_AND_GET);
|
||||||
|
|
||||||
// then
|
// then
|
||||||
assertThrows(RuntimeException.class, () ->
|
assertThrows(RuntimeException.class, () -> fn.get().apply(a, b));
|
||||||
fn.get().apply(a, b));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
@ -94,7 +93,8 @@ public class MergeAndGetTest {
|
||||||
Relation b = mock(Relation.class);
|
Relation b = mock(Relation.class);
|
||||||
|
|
||||||
// when
|
// when
|
||||||
SerializableSupplier<BiFunction<Oaf, Oaf, Oaf>> fn = functionFor(Strategy.MERGE_FROM_AND_GET);
|
SerializableSupplier<BiFunction<Oaf, Oaf, Oaf>> fn =
|
||||||
|
functionFor(Strategy.MERGE_FROM_AND_GET);
|
||||||
|
|
||||||
// then
|
// then
|
||||||
Oaf x = fn.get().apply(a, b);
|
Oaf x = fn.get().apply(a, b);
|
||||||
|
@ -110,11 +110,11 @@ public class MergeAndGetTest {
|
||||||
Oaf b = mock(Oaf.class);
|
Oaf b = mock(Oaf.class);
|
||||||
|
|
||||||
// when
|
// when
|
||||||
SerializableSupplier<BiFunction<Oaf, Oaf, Oaf>> fn = functionFor(Strategy.MERGE_FROM_AND_GET);
|
SerializableSupplier<BiFunction<Oaf, Oaf, Oaf>> fn =
|
||||||
|
functionFor(Strategy.MERGE_FROM_AND_GET);
|
||||||
|
|
||||||
// then
|
// then
|
||||||
assertThrows(RuntimeException.class, () ->
|
assertThrows(RuntimeException.class, () -> fn.get().apply(a, b));
|
||||||
fn.get().apply(a, b));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
@ -124,30 +124,28 @@ public class MergeAndGetTest {
|
||||||
Relation b = mock(Relation.class);
|
Relation b = mock(Relation.class);
|
||||||
|
|
||||||
// when
|
// when
|
||||||
SerializableSupplier<BiFunction<Oaf, Oaf, Oaf>> fn = functionFor(Strategy.MERGE_FROM_AND_GET);
|
SerializableSupplier<BiFunction<Oaf, Oaf, Oaf>> fn =
|
||||||
|
functionFor(Strategy.MERGE_FROM_AND_GET);
|
||||||
|
|
||||||
// then
|
// then
|
||||||
assertThrows(RuntimeException.class, () ->
|
assertThrows(RuntimeException.class, () -> fn.get().apply(a, b));
|
||||||
fn.get().apply(a, b));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void shouldThrowForOafEntityAndOafEntityButNotSubclasses() {
|
public void shouldThrowForOafEntityAndOafEntityButNotSubclasses() {
|
||||||
// given
|
// given
|
||||||
class OafEntitySub1 extends OafEntity {
|
class OafEntitySub1 extends OafEntity {}
|
||||||
}
|
class OafEntitySub2 extends OafEntity {}
|
||||||
class OafEntitySub2 extends OafEntity {
|
|
||||||
}
|
|
||||||
|
|
||||||
OafEntitySub1 a = mock(OafEntitySub1.class);
|
OafEntitySub1 a = mock(OafEntitySub1.class);
|
||||||
OafEntitySub2 b = mock(OafEntitySub2.class);
|
OafEntitySub2 b = mock(OafEntitySub2.class);
|
||||||
|
|
||||||
// when
|
// when
|
||||||
SerializableSupplier<BiFunction<Oaf, Oaf, Oaf>> fn = functionFor(Strategy.MERGE_FROM_AND_GET);
|
SerializableSupplier<BiFunction<Oaf, Oaf, Oaf>> fn =
|
||||||
|
functionFor(Strategy.MERGE_FROM_AND_GET);
|
||||||
|
|
||||||
// then
|
// then
|
||||||
assertThrows(RuntimeException.class, () ->
|
assertThrows(RuntimeException.class, () -> fn.get().apply(a, b));
|
||||||
fn.get().apply(a, b));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
@ -157,7 +155,8 @@ public class MergeAndGetTest {
|
||||||
OafEntity b = mock(OafEntity.class);
|
OafEntity b = mock(OafEntity.class);
|
||||||
|
|
||||||
// when
|
// when
|
||||||
SerializableSupplier<BiFunction<Oaf, Oaf, Oaf>> fn = functionFor(Strategy.MERGE_FROM_AND_GET);
|
SerializableSupplier<BiFunction<Oaf, Oaf, Oaf>> fn =
|
||||||
|
functionFor(Strategy.MERGE_FROM_AND_GET);
|
||||||
|
|
||||||
// then
|
// then
|
||||||
Oaf x = fn.get().apply(a, b);
|
Oaf x = fn.get().apply(a, b);
|
||||||
|
@ -177,11 +176,11 @@ public class MergeAndGetTest {
|
||||||
Relation b = mock(Relation.class);
|
Relation b = mock(Relation.class);
|
||||||
|
|
||||||
// when
|
// when
|
||||||
SerializableSupplier<BiFunction<Oaf, Oaf, Oaf>> fn = functionFor(Strategy.SELECT_NEWER_AND_GET);
|
SerializableSupplier<BiFunction<Oaf, Oaf, Oaf>> fn =
|
||||||
|
functionFor(Strategy.SELECT_NEWER_AND_GET);
|
||||||
|
|
||||||
// then
|
// then
|
||||||
assertThrows(RuntimeException.class, () ->
|
assertThrows(RuntimeException.class, () -> fn.get().apply(a, b));
|
||||||
fn.get().apply(a, b));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
@ -191,11 +190,11 @@ public class MergeAndGetTest {
|
||||||
OafEntity b = mock(OafEntity.class);
|
OafEntity b = mock(OafEntity.class);
|
||||||
|
|
||||||
// when
|
// when
|
||||||
SerializableSupplier<BiFunction<Oaf, Oaf, Oaf>> fn = functionFor(Strategy.SELECT_NEWER_AND_GET);
|
SerializableSupplier<BiFunction<Oaf, Oaf, Oaf>> fn =
|
||||||
|
functionFor(Strategy.SELECT_NEWER_AND_GET);
|
||||||
|
|
||||||
// then
|
// then
|
||||||
assertThrows(RuntimeException.class, () ->
|
assertThrows(RuntimeException.class, () -> fn.get().apply(a, b));
|
||||||
fn.get().apply(a, b));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
@ -205,28 +204,29 @@ public class MergeAndGetTest {
|
||||||
Result b = mock(Result.class);
|
Result b = mock(Result.class);
|
||||||
|
|
||||||
// when
|
// when
|
||||||
SerializableSupplier<BiFunction<Oaf, Oaf, Oaf>> fn = functionFor(Strategy.SELECT_NEWER_AND_GET);
|
SerializableSupplier<BiFunction<Oaf, Oaf, Oaf>> fn =
|
||||||
|
functionFor(Strategy.SELECT_NEWER_AND_GET);
|
||||||
|
|
||||||
// then
|
// then
|
||||||
assertThrows(RuntimeException.class, () ->
|
assertThrows(RuntimeException.class, () -> fn.get().apply(a, b));
|
||||||
fn.get().apply(a, b));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void shouldThrowWhenSuperTypeIsNewerForResultAndOafEntity() {
|
public void shouldThrowWhenSuperTypeIsNewerForResultAndOafEntity() {
|
||||||
// given
|
// given
|
||||||
// real types must be used because subclass-superclass resolution does not work for mocks
|
// real types must be used because subclass-superclass resolution does not work for
|
||||||
|
// mocks
|
||||||
Dataset a = new Dataset();
|
Dataset a = new Dataset();
|
||||||
a.setLastupdatetimestamp(1L);
|
a.setLastupdatetimestamp(1L);
|
||||||
Result b = new Result();
|
Result b = new Result();
|
||||||
b.setLastupdatetimestamp(2L);
|
b.setLastupdatetimestamp(2L);
|
||||||
|
|
||||||
// when
|
// when
|
||||||
SerializableSupplier<BiFunction<Oaf, Oaf, Oaf>> fn = functionFor(Strategy.SELECT_NEWER_AND_GET);
|
SerializableSupplier<BiFunction<Oaf, Oaf, Oaf>> fn =
|
||||||
|
functionFor(Strategy.SELECT_NEWER_AND_GET);
|
||||||
|
|
||||||
// then
|
// then
|
||||||
assertThrows(RuntimeException.class, () ->
|
assertThrows(RuntimeException.class, () -> fn.get().apply(a, b));
|
||||||
fn.get().apply(a, b));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
|
|
@ -1,8 +1,20 @@
|
||||||
package eu.dnetlib.dhp.actionmanager.promote;
|
package eu.dnetlib.dhp.actionmanager.promote;
|
||||||
|
|
||||||
|
import static org.junit.jupiter.api.Assertions.*;
|
||||||
|
import static org.junit.jupiter.params.provider.Arguments.arguments;
|
||||||
|
|
||||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
||||||
import eu.dnetlib.dhp.schema.oaf.*;
|
import eu.dnetlib.dhp.schema.oaf.*;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.nio.file.Files;
|
||||||
|
import java.nio.file.Path;
|
||||||
|
import java.nio.file.Paths;
|
||||||
|
import java.util.Comparator;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Objects;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
import java.util.stream.Stream;
|
||||||
import org.apache.commons.io.FileUtils;
|
import org.apache.commons.io.FileUtils;
|
||||||
import org.apache.spark.SparkConf;
|
import org.apache.spark.SparkConf;
|
||||||
import org.apache.spark.api.java.function.MapFunction;
|
import org.apache.spark.api.java.function.MapFunction;
|
||||||
|
@ -14,21 +26,9 @@ import org.junit.jupiter.params.ParameterizedTest;
|
||||||
import org.junit.jupiter.params.provider.Arguments;
|
import org.junit.jupiter.params.provider.Arguments;
|
||||||
import org.junit.jupiter.params.provider.MethodSource;
|
import org.junit.jupiter.params.provider.MethodSource;
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.nio.file.Files;
|
|
||||||
import java.nio.file.Path;
|
|
||||||
import java.nio.file.Paths;
|
|
||||||
import java.util.Comparator;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.Objects;
|
|
||||||
import java.util.stream.Collectors;
|
|
||||||
import java.util.stream.Stream;
|
|
||||||
|
|
||||||
import static org.junit.jupiter.api.Assertions.*;
|
|
||||||
import static org.junit.jupiter.params.provider.Arguments.arguments;
|
|
||||||
|
|
||||||
public class PromoteActionPayloadForGraphTableJobTest {
|
public class PromoteActionPayloadForGraphTableJobTest {
|
||||||
private static final ClassLoader cl = PromoteActionPayloadForGraphTableJobTest.class.getClassLoader();
|
private static final ClassLoader cl =
|
||||||
|
PromoteActionPayloadForGraphTableJobTest.class.getClassLoader();
|
||||||
|
|
||||||
private static SparkSession spark;
|
private static SparkSession spark;
|
||||||
|
|
||||||
|
@ -52,7 +52,9 @@ public class PromoteActionPayloadForGraphTableJobTest {
|
||||||
|
|
||||||
@BeforeEach
|
@BeforeEach
|
||||||
public void beforeEach() throws IOException {
|
public void beforeEach() throws IOException {
|
||||||
workingDir = Files.createTempDirectory(PromoteActionPayloadForGraphTableJobTest.class.getSimpleName());
|
workingDir =
|
||||||
|
Files.createTempDirectory(
|
||||||
|
PromoteActionPayloadForGraphTableJobTest.class.getSimpleName());
|
||||||
inputDir = workingDir.resolve("input");
|
inputDir = workingDir.resolve("input");
|
||||||
inputGraphRootDir = inputDir.resolve("graph");
|
inputGraphRootDir = inputDir.resolve("graph");
|
||||||
inputActionPayloadRootDir = inputDir.resolve("action_payload");
|
inputActionPayloadRootDir = inputDir.resolve("action_payload");
|
||||||
|
@ -80,35 +82,50 @@ public class PromoteActionPayloadForGraphTableJobTest {
|
||||||
Class<OafEntity> actionPayloadClazz = OafEntity.class;
|
Class<OafEntity> actionPayloadClazz = OafEntity.class;
|
||||||
|
|
||||||
// when
|
// when
|
||||||
RuntimeException exception = assertThrows(RuntimeException.class, () ->
|
RuntimeException exception =
|
||||||
PromoteActionPayloadForGraphTableJob.main(new String[]{
|
assertThrows(
|
||||||
|
RuntimeException.class,
|
||||||
|
() ->
|
||||||
|
PromoteActionPayloadForGraphTableJob.main(
|
||||||
|
new String[] {
|
||||||
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||||
"-inputGraphTablePath", "",
|
"-inputGraphTablePath", "",
|
||||||
"-graphTableClassName", rowClazz.getCanonicalName(),
|
"-graphTableClassName", rowClazz.getCanonicalName(),
|
||||||
"-inputActionPayloadPath", "",
|
"-inputActionPayloadPath", "",
|
||||||
"-actionPayloadClassName", actionPayloadClazz.getCanonicalName(),
|
"-actionPayloadClassName",
|
||||||
|
actionPayloadClazz.getCanonicalName(),
|
||||||
"-outputGraphTablePath", "",
|
"-outputGraphTablePath", "",
|
||||||
"-mergeAndGetStrategy", MergeAndGet.Strategy.SELECT_NEWER_AND_GET.name()
|
"-mergeAndGetStrategy",
|
||||||
|
MergeAndGet.Strategy.SELECT_NEWER_AND_GET
|
||||||
|
.name()
|
||||||
}));
|
}));
|
||||||
|
|
||||||
// then
|
// then
|
||||||
String msg = String.format("graph table class is not a subclass of action payload class: graph=%s, action=%s",
|
String msg =
|
||||||
|
String.format(
|
||||||
|
"graph table class is not a subclass of action payload class: graph=%s, action=%s",
|
||||||
rowClazz.getCanonicalName(), actionPayloadClazz.getCanonicalName());
|
rowClazz.getCanonicalName(), actionPayloadClazz.getCanonicalName());
|
||||||
assertTrue(exception.getMessage().contains(msg));
|
assertTrue(exception.getMessage().contains(msg));
|
||||||
}
|
}
|
||||||
|
|
||||||
@ParameterizedTest(name = "strategy: {0}, graph table: {1}, action payload: {2}")
|
@ParameterizedTest(name = "strategy: {0}, graph table: {1}, action payload: {2}")
|
||||||
@MethodSource("eu.dnetlib.dhp.actionmanager.promote.PromoteActionPayloadForGraphTableJobTest#promoteJobTestParams")
|
@MethodSource(
|
||||||
public void shouldPromoteActionPayloadForGraphTable(MergeAndGet.Strategy strategy,
|
"eu.dnetlib.dhp.actionmanager.promote.PromoteActionPayloadForGraphTableJobTest#promoteJobTestParams")
|
||||||
|
public void shouldPromoteActionPayloadForGraphTable(
|
||||||
|
MergeAndGet.Strategy strategy,
|
||||||
Class<? extends Oaf> rowClazz,
|
Class<? extends Oaf> rowClazz,
|
||||||
Class<? extends Oaf> actionPayloadClazz) throws Exception {
|
Class<? extends Oaf> actionPayloadClazz)
|
||||||
|
throws Exception {
|
||||||
// given
|
// given
|
||||||
Path inputGraphTableDir = createGraphTable(inputGraphRootDir, rowClazz);
|
Path inputGraphTableDir = createGraphTable(inputGraphRootDir, rowClazz);
|
||||||
Path inputActionPayloadDir = createActionPayload(inputActionPayloadRootDir, rowClazz, actionPayloadClazz);
|
Path inputActionPayloadDir =
|
||||||
Path outputGraphTableDir = outputDir.resolve("graph").resolve(rowClazz.getSimpleName().toLowerCase());
|
createActionPayload(inputActionPayloadRootDir, rowClazz, actionPayloadClazz);
|
||||||
|
Path outputGraphTableDir =
|
||||||
|
outputDir.resolve("graph").resolve(rowClazz.getSimpleName().toLowerCase());
|
||||||
|
|
||||||
// when
|
// when
|
||||||
PromoteActionPayloadForGraphTableJob.main(new String[]{
|
PromoteActionPayloadForGraphTableJob.main(
|
||||||
|
new String[] {
|
||||||
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||||
"-inputGraphTablePath", inputGraphTableDir.toString(),
|
"-inputGraphTablePath", inputGraphTableDir.toString(),
|
||||||
"-graphTableClassName", rowClazz.getCanonicalName(),
|
"-graphTableClassName", rowClazz.getCanonicalName(),
|
||||||
|
@ -121,17 +138,22 @@ public class PromoteActionPayloadForGraphTableJobTest {
|
||||||
// then
|
// then
|
||||||
assertTrue(Files.exists(outputGraphTableDir));
|
assertTrue(Files.exists(outputGraphTableDir));
|
||||||
|
|
||||||
List<? extends Oaf> actualOutputRows = readGraphTableFromJobOutput(outputGraphTableDir.toString(), rowClazz)
|
List<? extends Oaf> actualOutputRows =
|
||||||
.collectAsList()
|
readGraphTableFromJobOutput(outputGraphTableDir.toString(), rowClazz)
|
||||||
.stream()
|
.collectAsList().stream()
|
||||||
.sorted(Comparator.comparingInt(Object::hashCode))
|
.sorted(Comparator.comparingInt(Object::hashCode))
|
||||||
.collect(Collectors.toList());
|
.collect(Collectors.toList());
|
||||||
String expectedOutputGraphTableJsonDumpPath = resultFileLocation(strategy, rowClazz, actionPayloadClazz);
|
String expectedOutputGraphTableJsonDumpPath =
|
||||||
Path expectedOutputGraphTableJsonDumpFile = Paths
|
resultFileLocation(strategy, rowClazz, actionPayloadClazz);
|
||||||
.get(Objects.requireNonNull(cl.getResource(expectedOutputGraphTableJsonDumpPath)).getFile());
|
Path expectedOutputGraphTableJsonDumpFile =
|
||||||
List<? extends Oaf> expectedOutputRows = readGraphTableFromJsonDump(expectedOutputGraphTableJsonDumpFile.toString(), rowClazz)
|
Paths.get(
|
||||||
.collectAsList()
|
Objects.requireNonNull(
|
||||||
.stream()
|
cl.getResource(expectedOutputGraphTableJsonDumpPath))
|
||||||
|
.getFile());
|
||||||
|
List<? extends Oaf> expectedOutputRows =
|
||||||
|
readGraphTableFromJsonDump(
|
||||||
|
expectedOutputGraphTableJsonDumpFile.toString(), rowClazz)
|
||||||
|
.collectAsList().stream()
|
||||||
.sorted(Comparator.comparingInt(Object::hashCode))
|
.sorted(Comparator.comparingInt(Object::hashCode))
|
||||||
.collect(Collectors.toList());
|
.collect(Collectors.toList());
|
||||||
assertIterableEquals(expectedOutputRows, actualOutputRows);
|
assertIterableEquals(expectedOutputRows, actualOutputRows);
|
||||||
|
@ -140,27 +162,50 @@ public class PromoteActionPayloadForGraphTableJobTest {
|
||||||
|
|
||||||
public static Stream<Arguments> promoteJobTestParams() {
|
public static Stream<Arguments> promoteJobTestParams() {
|
||||||
return Stream.of(
|
return Stream.of(
|
||||||
arguments(MergeAndGet.Strategy.MERGE_FROM_AND_GET, eu.dnetlib.dhp.schema.oaf.Dataset.class, eu.dnetlib.dhp.schema.oaf.Dataset.class),
|
arguments(
|
||||||
arguments(MergeAndGet.Strategy.MERGE_FROM_AND_GET, eu.dnetlib.dhp.schema.oaf.Dataset.class, eu.dnetlib.dhp.schema.oaf.Result.class),
|
MergeAndGet.Strategy.MERGE_FROM_AND_GET,
|
||||||
arguments(MergeAndGet.Strategy.MERGE_FROM_AND_GET, Datasource.class, Datasource.class),
|
eu.dnetlib.dhp.schema.oaf.Dataset.class,
|
||||||
arguments(MergeAndGet.Strategy.MERGE_FROM_AND_GET, Organization.class, Organization.class),
|
eu.dnetlib.dhp.schema.oaf.Dataset.class),
|
||||||
arguments(MergeAndGet.Strategy.MERGE_FROM_AND_GET, OtherResearchProduct.class, OtherResearchProduct.class),
|
arguments(
|
||||||
arguments(MergeAndGet.Strategy.MERGE_FROM_AND_GET, OtherResearchProduct.class, Result.class),
|
MergeAndGet.Strategy.MERGE_FROM_AND_GET,
|
||||||
|
eu.dnetlib.dhp.schema.oaf.Dataset.class,
|
||||||
|
eu.dnetlib.dhp.schema.oaf.Result.class),
|
||||||
|
arguments(
|
||||||
|
MergeAndGet.Strategy.MERGE_FROM_AND_GET,
|
||||||
|
Datasource.class,
|
||||||
|
Datasource.class),
|
||||||
|
arguments(
|
||||||
|
MergeAndGet.Strategy.MERGE_FROM_AND_GET,
|
||||||
|
Organization.class,
|
||||||
|
Organization.class),
|
||||||
|
arguments(
|
||||||
|
MergeAndGet.Strategy.MERGE_FROM_AND_GET,
|
||||||
|
OtherResearchProduct.class,
|
||||||
|
OtherResearchProduct.class),
|
||||||
|
arguments(
|
||||||
|
MergeAndGet.Strategy.MERGE_FROM_AND_GET,
|
||||||
|
OtherResearchProduct.class,
|
||||||
|
Result.class),
|
||||||
arguments(MergeAndGet.Strategy.MERGE_FROM_AND_GET, Project.class, Project.class),
|
arguments(MergeAndGet.Strategy.MERGE_FROM_AND_GET, Project.class, Project.class),
|
||||||
arguments(MergeAndGet.Strategy.MERGE_FROM_AND_GET, Publication.class, Publication.class),
|
arguments(
|
||||||
|
MergeAndGet.Strategy.MERGE_FROM_AND_GET,
|
||||||
|
Publication.class,
|
||||||
|
Publication.class),
|
||||||
arguments(MergeAndGet.Strategy.MERGE_FROM_AND_GET, Publication.class, Result.class),
|
arguments(MergeAndGet.Strategy.MERGE_FROM_AND_GET, Publication.class, Result.class),
|
||||||
arguments(MergeAndGet.Strategy.MERGE_FROM_AND_GET, Relation.class, Relation.class),
|
arguments(MergeAndGet.Strategy.MERGE_FROM_AND_GET, Relation.class, Relation.class),
|
||||||
arguments(MergeAndGet.Strategy.MERGE_FROM_AND_GET, Software.class, Software.class),
|
arguments(MergeAndGet.Strategy.MERGE_FROM_AND_GET, Software.class, Software.class),
|
||||||
arguments(MergeAndGet.Strategy.MERGE_FROM_AND_GET, Software.class, Result.class)
|
arguments(MergeAndGet.Strategy.MERGE_FROM_AND_GET, Software.class, Result.class));
|
||||||
);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private static <G extends Oaf> Path createGraphTable(Path inputGraphRootDir,
|
private static <G extends Oaf> Path createGraphTable(
|
||||||
Class<G> rowClazz) {
|
Path inputGraphRootDir, Class<G> rowClazz) {
|
||||||
String inputGraphTableJsonDumpPath = inputGraphTableJsonDumpLocation(rowClazz);
|
String inputGraphTableJsonDumpPath = inputGraphTableJsonDumpLocation(rowClazz);
|
||||||
Path inputGraphTableJsonDumpFile = Paths
|
Path inputGraphTableJsonDumpFile =
|
||||||
.get(Objects.requireNonNull(cl.getResource(inputGraphTableJsonDumpPath)).getFile());
|
Paths.get(
|
||||||
Dataset<G> rowDS = readGraphTableFromJsonDump(inputGraphTableJsonDumpFile.toString(), rowClazz);
|
Objects.requireNonNull(cl.getResource(inputGraphTableJsonDumpPath))
|
||||||
|
.getFile());
|
||||||
|
Dataset<G> rowDS =
|
||||||
|
readGraphTableFromJsonDump(inputGraphTableJsonDumpFile.toString(), rowClazz);
|
||||||
String inputGraphTableName = rowClazz.getSimpleName().toLowerCase();
|
String inputGraphTableName = rowClazz.getSimpleName().toLowerCase();
|
||||||
Path inputGraphTableDir = inputGraphRootDir.resolve(inputGraphTableName);
|
Path inputGraphTableDir = inputGraphRootDir.resolve(inputGraphTableName);
|
||||||
writeGraphTableAaJobInput(rowDS, inputGraphTableDir.toString());
|
writeGraphTableAaJobInput(rowDS, inputGraphTableDir.toString());
|
||||||
|
@ -169,71 +214,74 @@ public class PromoteActionPayloadForGraphTableJobTest {
|
||||||
|
|
||||||
private static String inputGraphTableJsonDumpLocation(Class<? extends Oaf> rowClazz) {
|
private static String inputGraphTableJsonDumpLocation(Class<? extends Oaf> rowClazz) {
|
||||||
return String.format(
|
return String.format(
|
||||||
"%s/%s.json", "eu/dnetlib/dhp/actionmanager/promote/input/graph", rowClazz.getSimpleName().toLowerCase());
|
"%s/%s.json",
|
||||||
|
"eu/dnetlib/dhp/actionmanager/promote/input/graph",
|
||||||
|
rowClazz.getSimpleName().toLowerCase());
|
||||||
}
|
}
|
||||||
|
|
||||||
private static <G extends Oaf> Dataset<G> readGraphTableFromJsonDump(String path,
|
private static <G extends Oaf> Dataset<G> readGraphTableFromJsonDump(
|
||||||
Class<G> rowClazz) {
|
String path, Class<G> rowClazz) {
|
||||||
return spark
|
return spark.read()
|
||||||
.read()
|
|
||||||
.textFile(path)
|
.textFile(path)
|
||||||
.map((MapFunction<String, G>) json -> OBJECT_MAPPER.readValue(json, rowClazz), Encoders.bean(rowClazz));
|
.map(
|
||||||
|
(MapFunction<String, G>) json -> OBJECT_MAPPER.readValue(json, rowClazz),
|
||||||
|
Encoders.bean(rowClazz));
|
||||||
}
|
}
|
||||||
|
|
||||||
private static <G extends Oaf> void writeGraphTableAaJobInput(Dataset<G> rowDS,
|
private static <G extends Oaf> void writeGraphTableAaJobInput(Dataset<G> rowDS, String path) {
|
||||||
String path) {
|
rowDS.write().option("compression", "gzip").json(path);
|
||||||
rowDS
|
|
||||||
.write()
|
|
||||||
.option("compression", "gzip")
|
|
||||||
.json(path);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private static <G extends Oaf, A extends Oaf> Path createActionPayload(Path inputActionPayloadRootDir,
|
private static <G extends Oaf, A extends Oaf> Path createActionPayload(
|
||||||
Class<G> rowClazz,
|
Path inputActionPayloadRootDir, Class<G> rowClazz, Class<A> actionPayloadClazz) {
|
||||||
Class<A> actionPayloadClazz) {
|
String inputActionPayloadJsonDumpPath =
|
||||||
String inputActionPayloadJsonDumpPath = inputActionPayloadJsonDumpLocation(rowClazz, actionPayloadClazz);
|
inputActionPayloadJsonDumpLocation(rowClazz, actionPayloadClazz);
|
||||||
Path inputActionPayloadJsonDumpFile = Paths
|
Path inputActionPayloadJsonDumpFile =
|
||||||
.get(Objects.requireNonNull(cl.getResource(inputActionPayloadJsonDumpPath)).getFile());
|
Paths.get(
|
||||||
Dataset<String> actionPayloadDS = readActionPayloadFromJsonDump(inputActionPayloadJsonDumpFile.toString());
|
Objects.requireNonNull(cl.getResource(inputActionPayloadJsonDumpPath))
|
||||||
Path inputActionPayloadDir = inputActionPayloadRootDir.resolve(actionPayloadClazz.getSimpleName().toLowerCase());
|
.getFile());
|
||||||
|
Dataset<String> actionPayloadDS =
|
||||||
|
readActionPayloadFromJsonDump(inputActionPayloadJsonDumpFile.toString());
|
||||||
|
Path inputActionPayloadDir =
|
||||||
|
inputActionPayloadRootDir.resolve(actionPayloadClazz.getSimpleName().toLowerCase());
|
||||||
writeActionPayloadAsJobInput(actionPayloadDS, inputActionPayloadDir.toString());
|
writeActionPayloadAsJobInput(actionPayloadDS, inputActionPayloadDir.toString());
|
||||||
return inputActionPayloadDir;
|
return inputActionPayloadDir;
|
||||||
}
|
}
|
||||||
|
|
||||||
private static String inputActionPayloadJsonDumpLocation(Class<? extends Oaf> rowClazz,
|
private static String inputActionPayloadJsonDumpLocation(
|
||||||
Class<? extends Oaf> actionPayloadClazz) {
|
Class<? extends Oaf> rowClazz, Class<? extends Oaf> actionPayloadClazz) {
|
||||||
|
|
||||||
return String.format("eu/dnetlib/dhp/actionmanager/promote/input/action_payload/%s_table/%s.json",
|
return String.format(
|
||||||
rowClazz.getSimpleName().toLowerCase(), actionPayloadClazz.getSimpleName().toLowerCase());
|
"eu/dnetlib/dhp/actionmanager/promote/input/action_payload/%s_table/%s.json",
|
||||||
|
rowClazz.getSimpleName().toLowerCase(),
|
||||||
|
actionPayloadClazz.getSimpleName().toLowerCase());
|
||||||
}
|
}
|
||||||
|
|
||||||
private static Dataset<String> readActionPayloadFromJsonDump(String path) {
|
private static Dataset<String> readActionPayloadFromJsonDump(String path) {
|
||||||
return spark
|
return spark.read().textFile(path);
|
||||||
.read()
|
|
||||||
.textFile(path);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private static void writeActionPayloadAsJobInput(Dataset<String> actionPayloadDS,
|
private static void writeActionPayloadAsJobInput(Dataset<String> actionPayloadDS, String path) {
|
||||||
String path) {
|
actionPayloadDS.withColumnRenamed("value", "payload").write().parquet(path);
|
||||||
actionPayloadDS
|
|
||||||
.withColumnRenamed("value", "payload")
|
|
||||||
.write()
|
|
||||||
.parquet(path);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private static <G extends Oaf> Dataset<G> readGraphTableFromJobOutput(String path,
|
private static <G extends Oaf> Dataset<G> readGraphTableFromJobOutput(
|
||||||
Class<G> rowClazz) {
|
String path, Class<G> rowClazz) {
|
||||||
return spark
|
return spark.read()
|
||||||
.read()
|
|
||||||
.textFile(path)
|
.textFile(path)
|
||||||
.map((MapFunction<String, G>) json -> OBJECT_MAPPER.readValue(json, rowClazz), Encoders.bean(rowClazz));
|
.map(
|
||||||
|
(MapFunction<String, G>) json -> OBJECT_MAPPER.readValue(json, rowClazz),
|
||||||
|
Encoders.bean(rowClazz));
|
||||||
}
|
}
|
||||||
|
|
||||||
private static String resultFileLocation(MergeAndGet.Strategy strategy,
|
private static String resultFileLocation(
|
||||||
|
MergeAndGet.Strategy strategy,
|
||||||
Class<? extends Oaf> rowClazz,
|
Class<? extends Oaf> rowClazz,
|
||||||
Class<? extends Oaf> actionPayloadClazz) {
|
Class<? extends Oaf> actionPayloadClazz) {
|
||||||
return String
|
return String.format(
|
||||||
.format("eu/dnetlib/dhp/actionmanager/promote/output/graph/%s/%s/%s_action_payload/result.json",
|
"eu/dnetlib/dhp/actionmanager/promote/output/graph/%s/%s/%s_action_payload/result.json",
|
||||||
strategy.name().toLowerCase(), rowClazz.getSimpleName().toLowerCase(), actionPayloadClazz.getSimpleName().toLowerCase());
|
strategy.name().toLowerCase(),
|
||||||
|
rowClazz.getSimpleName().toLowerCase(),
|
||||||
|
actionPayloadClazz.getSimpleName().toLowerCase());
|
||||||
}
|
}
|
||||||
}
|
}
|
|
@ -1,7 +1,15 @@
|
||||||
package eu.dnetlib.dhp.actionmanager.promote;
|
package eu.dnetlib.dhp.actionmanager.promote;
|
||||||
|
|
||||||
|
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||||
|
import static org.junit.jupiter.api.Assertions.assertThrows;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.common.FunctionalInterfaceSupport.SerializableSupplier;
|
import eu.dnetlib.dhp.common.FunctionalInterfaceSupport.SerializableSupplier;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Oaf;
|
import eu.dnetlib.dhp.schema.oaf.Oaf;
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Objects;
|
||||||
|
import java.util.function.BiFunction;
|
||||||
|
import java.util.function.Function;
|
||||||
import org.apache.spark.SparkConf;
|
import org.apache.spark.SparkConf;
|
||||||
import org.apache.spark.sql.Dataset;
|
import org.apache.spark.sql.Dataset;
|
||||||
import org.apache.spark.sql.Encoders;
|
import org.apache.spark.sql.Encoders;
|
||||||
|
@ -11,15 +19,6 @@ import org.junit.jupiter.api.BeforeAll;
|
||||||
import org.junit.jupiter.api.Nested;
|
import org.junit.jupiter.api.Nested;
|
||||||
import org.junit.jupiter.api.Test;
|
import org.junit.jupiter.api.Test;
|
||||||
|
|
||||||
import java.util.Arrays;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.Objects;
|
|
||||||
import java.util.function.BiFunction;
|
|
||||||
import java.util.function.Function;
|
|
||||||
|
|
||||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
|
||||||
import static org.junit.jupiter.api.Assertions.assertThrows;
|
|
||||||
|
|
||||||
public class PromoteActionPayloadFunctionsTest {
|
public class PromoteActionPayloadFunctionsTest {
|
||||||
|
|
||||||
private static SparkSession spark;
|
private static SparkSession spark;
|
||||||
|
@ -44,20 +43,20 @@ public class PromoteActionPayloadFunctionsTest {
|
||||||
@Test
|
@Test
|
||||||
public void shouldThrowWhenTableTypeIsNotSubtypeOfActionPayloadType() {
|
public void shouldThrowWhenTableTypeIsNotSubtypeOfActionPayloadType() {
|
||||||
// given
|
// given
|
||||||
class OafImpl extends Oaf {
|
class OafImpl extends Oaf {}
|
||||||
}
|
|
||||||
|
|
||||||
// when
|
// when
|
||||||
assertThrows(RuntimeException.class, () ->
|
assertThrows(
|
||||||
PromoteActionPayloadFunctions
|
RuntimeException.class,
|
||||||
.joinGraphTableWithActionPayloadAndMerge(null,
|
() ->
|
||||||
|
PromoteActionPayloadFunctions.joinGraphTableWithActionPayloadAndMerge(
|
||||||
|
null,
|
||||||
null,
|
null,
|
||||||
null,
|
null,
|
||||||
null,
|
null,
|
||||||
null,
|
null,
|
||||||
OafImplSubSub.class,
|
OafImplSubSub.class,
|
||||||
OafImpl.class
|
OafImpl.class));
|
||||||
));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
@ -68,39 +67,52 @@ public class PromoteActionPayloadFunctionsTest {
|
||||||
String id2 = "id2";
|
String id2 = "id2";
|
||||||
String id3 = "id3";
|
String id3 = "id3";
|
||||||
String id4 = "id4";
|
String id4 = "id4";
|
||||||
List<OafImplSubSub> rowData = Arrays.asList(
|
List<OafImplSubSub> rowData =
|
||||||
|
Arrays.asList(
|
||||||
createOafImplSubSub(id0),
|
createOafImplSubSub(id0),
|
||||||
createOafImplSubSub(id1),
|
createOafImplSubSub(id1),
|
||||||
createOafImplSubSub(id2),
|
createOafImplSubSub(id2),
|
||||||
createOafImplSubSub(id3)
|
createOafImplSubSub(id3));
|
||||||
);
|
Dataset<OafImplSubSub> rowDS =
|
||||||
Dataset<OafImplSubSub> rowDS = spark.createDataset(rowData, Encoders.bean(OafImplSubSub.class));
|
spark.createDataset(rowData, Encoders.bean(OafImplSubSub.class));
|
||||||
|
|
||||||
List<OafImplSubSub> actionPayloadData = Arrays.asList(
|
List<OafImplSubSub> actionPayloadData =
|
||||||
|
Arrays.asList(
|
||||||
createOafImplSubSub(id1),
|
createOafImplSubSub(id1),
|
||||||
createOafImplSubSub(id2), createOafImplSubSub(id2),
|
createOafImplSubSub(id2),
|
||||||
createOafImplSubSub(id3), createOafImplSubSub(id3), createOafImplSubSub(id3),
|
createOafImplSubSub(id2),
|
||||||
createOafImplSubSub(id4), createOafImplSubSub(id4), createOafImplSubSub(id4), createOafImplSubSub(id4)
|
createOafImplSubSub(id3),
|
||||||
);
|
createOafImplSubSub(id3),
|
||||||
Dataset<OafImplSubSub> actionPayloadDS = spark.createDataset(actionPayloadData, Encoders.bean(OafImplSubSub.class));
|
createOafImplSubSub(id3),
|
||||||
|
createOafImplSubSub(id4),
|
||||||
|
createOafImplSubSub(id4),
|
||||||
|
createOafImplSubSub(id4),
|
||||||
|
createOafImplSubSub(id4));
|
||||||
|
Dataset<OafImplSubSub> actionPayloadDS =
|
||||||
|
spark.createDataset(actionPayloadData, Encoders.bean(OafImplSubSub.class));
|
||||||
|
|
||||||
SerializableSupplier<Function<OafImplSubSub, String>> rowIdFn = () -> OafImplRoot::getId;
|
SerializableSupplier<Function<OafImplSubSub, String>> rowIdFn =
|
||||||
SerializableSupplier<Function<OafImplSubSub, String>> actionPayloadIdFn = () -> OafImplRoot::getId;
|
() -> OafImplRoot::getId;
|
||||||
SerializableSupplier<BiFunction<OafImplSubSub, OafImplSubSub, OafImplSubSub>> mergeAndGetFn = () -> (x, y) -> {
|
SerializableSupplier<Function<OafImplSubSub, String>> actionPayloadIdFn =
|
||||||
|
() -> OafImplRoot::getId;
|
||||||
|
SerializableSupplier<BiFunction<OafImplSubSub, OafImplSubSub, OafImplSubSub>>
|
||||||
|
mergeAndGetFn =
|
||||||
|
() ->
|
||||||
|
(x, y) -> {
|
||||||
x.merge(y);
|
x.merge(y);
|
||||||
return x;
|
return x;
|
||||||
};
|
};
|
||||||
|
|
||||||
// when
|
// when
|
||||||
List<OafImplSubSub> results = PromoteActionPayloadFunctions
|
List<OafImplSubSub> results =
|
||||||
.joinGraphTableWithActionPayloadAndMerge(rowDS,
|
PromoteActionPayloadFunctions.joinGraphTableWithActionPayloadAndMerge(
|
||||||
|
rowDS,
|
||||||
actionPayloadDS,
|
actionPayloadDS,
|
||||||
rowIdFn,
|
rowIdFn,
|
||||||
actionPayloadIdFn,
|
actionPayloadIdFn,
|
||||||
mergeAndGetFn,
|
mergeAndGetFn,
|
||||||
OafImplSubSub.class,
|
OafImplSubSub.class,
|
||||||
OafImplSubSub.class
|
OafImplSubSub.class)
|
||||||
)
|
|
||||||
.collectAsList();
|
.collectAsList();
|
||||||
|
|
||||||
// then
|
// then
|
||||||
|
@ -111,7 +123,8 @@ public class PromoteActionPayloadFunctionsTest {
|
||||||
assertEquals(3, results.stream().filter(x -> x.getId().equals(id3)).count());
|
assertEquals(3, results.stream().filter(x -> x.getId().equals(id3)).count());
|
||||||
assertEquals(4, results.stream().filter(x -> x.getId().equals(id4)).count());
|
assertEquals(4, results.stream().filter(x -> x.getId().equals(id4)).count());
|
||||||
|
|
||||||
results.forEach(result -> {
|
results.forEach(
|
||||||
|
result -> {
|
||||||
switch (result.getId()) {
|
switch (result.getId()) {
|
||||||
case "id0":
|
case "id0":
|
||||||
assertEquals(1, result.getMerged());
|
assertEquals(1, result.getMerged());
|
||||||
|
@ -138,39 +151,52 @@ public class PromoteActionPayloadFunctionsTest {
|
||||||
String id2 = "id2";
|
String id2 = "id2";
|
||||||
String id3 = "id3";
|
String id3 = "id3";
|
||||||
String id4 = "id4";
|
String id4 = "id4";
|
||||||
List<OafImplSubSub> rowData = Arrays.asList(
|
List<OafImplSubSub> rowData =
|
||||||
|
Arrays.asList(
|
||||||
createOafImplSubSub(id0),
|
createOafImplSubSub(id0),
|
||||||
createOafImplSubSub(id1),
|
createOafImplSubSub(id1),
|
||||||
createOafImplSubSub(id2),
|
createOafImplSubSub(id2),
|
||||||
createOafImplSubSub(id3)
|
createOafImplSubSub(id3));
|
||||||
);
|
Dataset<OafImplSubSub> rowDS =
|
||||||
Dataset<OafImplSubSub> rowDS = spark.createDataset(rowData, Encoders.bean(OafImplSubSub.class));
|
spark.createDataset(rowData, Encoders.bean(OafImplSubSub.class));
|
||||||
|
|
||||||
List<OafImplSub> actionPayloadData = Arrays.asList(
|
List<OafImplSub> actionPayloadData =
|
||||||
|
Arrays.asList(
|
||||||
createOafImplSub(id1),
|
createOafImplSub(id1),
|
||||||
createOafImplSub(id2), createOafImplSub(id2),
|
createOafImplSub(id2),
|
||||||
createOafImplSub(id3), createOafImplSub(id3), createOafImplSub(id3),
|
createOafImplSub(id2),
|
||||||
createOafImplSub(id4), createOafImplSub(id4), createOafImplSub(id4), createOafImplSub(id4)
|
createOafImplSub(id3),
|
||||||
);
|
createOafImplSub(id3),
|
||||||
Dataset<OafImplSub> actionPayloadDS = spark.createDataset(actionPayloadData, Encoders.bean(OafImplSub.class));
|
createOafImplSub(id3),
|
||||||
|
createOafImplSub(id4),
|
||||||
|
createOafImplSub(id4),
|
||||||
|
createOafImplSub(id4),
|
||||||
|
createOafImplSub(id4));
|
||||||
|
Dataset<OafImplSub> actionPayloadDS =
|
||||||
|
spark.createDataset(actionPayloadData, Encoders.bean(OafImplSub.class));
|
||||||
|
|
||||||
SerializableSupplier<Function<OafImplSubSub, String>> rowIdFn = () -> OafImplRoot::getId;
|
SerializableSupplier<Function<OafImplSubSub, String>> rowIdFn =
|
||||||
SerializableSupplier<Function<OafImplSub, String>> actionPayloadIdFn = () -> OafImplRoot::getId;
|
() -> OafImplRoot::getId;
|
||||||
SerializableSupplier<BiFunction<OafImplSubSub, OafImplSub, OafImplSubSub>> mergeAndGetFn = () -> (x, y) -> {
|
SerializableSupplier<Function<OafImplSub, String>> actionPayloadIdFn =
|
||||||
|
() -> OafImplRoot::getId;
|
||||||
|
SerializableSupplier<BiFunction<OafImplSubSub, OafImplSub, OafImplSubSub>>
|
||||||
|
mergeAndGetFn =
|
||||||
|
() ->
|
||||||
|
(x, y) -> {
|
||||||
x.merge(y);
|
x.merge(y);
|
||||||
return x;
|
return x;
|
||||||
};
|
};
|
||||||
|
|
||||||
// when
|
// when
|
||||||
List<OafImplSubSub> results = PromoteActionPayloadFunctions
|
List<OafImplSubSub> results =
|
||||||
.joinGraphTableWithActionPayloadAndMerge(rowDS,
|
PromoteActionPayloadFunctions.joinGraphTableWithActionPayloadAndMerge(
|
||||||
|
rowDS,
|
||||||
actionPayloadDS,
|
actionPayloadDS,
|
||||||
rowIdFn,
|
rowIdFn,
|
||||||
actionPayloadIdFn,
|
actionPayloadIdFn,
|
||||||
mergeAndGetFn,
|
mergeAndGetFn,
|
||||||
OafImplSubSub.class,
|
OafImplSubSub.class,
|
||||||
OafImplSub.class
|
OafImplSub.class)
|
||||||
)
|
|
||||||
.collectAsList();
|
.collectAsList();
|
||||||
|
|
||||||
// then
|
// then
|
||||||
|
@ -181,7 +207,8 @@ public class PromoteActionPayloadFunctionsTest {
|
||||||
assertEquals(3, results.stream().filter(x -> x.getId().equals(id3)).count());
|
assertEquals(3, results.stream().filter(x -> x.getId().equals(id3)).count());
|
||||||
assertEquals(0, results.stream().filter(x -> x.getId().equals(id4)).count());
|
assertEquals(0, results.stream().filter(x -> x.getId().equals(id4)).count());
|
||||||
|
|
||||||
results.forEach(result -> {
|
results.forEach(
|
||||||
|
result -> {
|
||||||
switch (result.getId()) {
|
switch (result.getId()) {
|
||||||
case "id0":
|
case "id0":
|
||||||
assertEquals(1, result.getMerged());
|
assertEquals(1, result.getMerged());
|
||||||
|
@ -196,7 +223,6 @@ public class PromoteActionPayloadFunctionsTest {
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Nested
|
@Nested
|
||||||
|
@ -208,24 +234,34 @@ public class PromoteActionPayloadFunctionsTest {
|
||||||
String id1 = "id1";
|
String id1 = "id1";
|
||||||
String id2 = "id2";
|
String id2 = "id2";
|
||||||
String id3 = "id3";
|
String id3 = "id3";
|
||||||
List<OafImplSubSub> rowData = Arrays.asList(
|
List<OafImplSubSub> rowData =
|
||||||
|
Arrays.asList(
|
||||||
createOafImplSubSub(id1),
|
createOafImplSubSub(id1),
|
||||||
createOafImplSubSub(id2), createOafImplSubSub(id2),
|
createOafImplSubSub(id2),
|
||||||
createOafImplSubSub(id3), createOafImplSubSub(id3), createOafImplSubSub(id3)
|
createOafImplSubSub(id2),
|
||||||
);
|
createOafImplSubSub(id3),
|
||||||
Dataset<OafImplSubSub> rowDS = spark.createDataset(rowData, Encoders.bean(OafImplSubSub.class));
|
createOafImplSubSub(id3),
|
||||||
|
createOafImplSubSub(id3));
|
||||||
|
Dataset<OafImplSubSub> rowDS =
|
||||||
|
spark.createDataset(rowData, Encoders.bean(OafImplSubSub.class));
|
||||||
|
|
||||||
SerializableSupplier<Function<OafImplSubSub, String>> rowIdFn = () -> OafImplRoot::getId;
|
SerializableSupplier<Function<OafImplSubSub, String>> rowIdFn =
|
||||||
SerializableSupplier<BiFunction<OafImplSubSub, OafImplSubSub, OafImplSubSub>> mergeAndGetFn = () -> (x, y) -> {
|
() -> OafImplRoot::getId;
|
||||||
|
SerializableSupplier<BiFunction<OafImplSubSub, OafImplSubSub, OafImplSubSub>>
|
||||||
|
mergeAndGetFn =
|
||||||
|
() ->
|
||||||
|
(x, y) -> {
|
||||||
x.merge(y);
|
x.merge(y);
|
||||||
return x;
|
return x;
|
||||||
};
|
};
|
||||||
SerializableSupplier<OafImplSubSub> zeroFn = OafImplSubSub::new;
|
SerializableSupplier<OafImplSubSub> zeroFn = OafImplSubSub::new;
|
||||||
SerializableSupplier<Function<OafImplSubSub, Boolean>> isNotZeroFn = () -> x -> Objects.nonNull(x.getId());
|
SerializableSupplier<Function<OafImplSubSub, Boolean>> isNotZeroFn =
|
||||||
|
() -> x -> Objects.nonNull(x.getId());
|
||||||
|
|
||||||
// when
|
// when
|
||||||
List<OafImplSubSub> results = PromoteActionPayloadFunctions
|
List<OafImplSubSub> results =
|
||||||
.groupGraphTableByIdAndMerge(rowDS,
|
PromoteActionPayloadFunctions.groupGraphTableByIdAndMerge(
|
||||||
|
rowDS,
|
||||||
rowIdFn,
|
rowIdFn,
|
||||||
mergeAndGetFn,
|
mergeAndGetFn,
|
||||||
zeroFn,
|
zeroFn,
|
||||||
|
@ -239,7 +275,8 @@ public class PromoteActionPayloadFunctionsTest {
|
||||||
assertEquals(1, results.stream().filter(x -> x.getId().equals(id2)).count());
|
assertEquals(1, results.stream().filter(x -> x.getId().equals(id2)).count());
|
||||||
assertEquals(1, results.stream().filter(x -> x.getId().equals(id3)).count());
|
assertEquals(1, results.stream().filter(x -> x.getId().equals(id3)).count());
|
||||||
|
|
||||||
results.forEach(result -> {
|
results.forEach(
|
||||||
|
result -> {
|
||||||
switch (result.getId()) {
|
switch (result.getId()) {
|
||||||
case "id1":
|
case "id1":
|
||||||
assertEquals(1, result.getMerged());
|
assertEquals(1, result.getMerged());
|
||||||
|
@ -255,7 +292,6 @@ public class PromoteActionPayloadFunctionsTest {
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public static class OafImplRoot extends Oaf {
|
public static class OafImplRoot extends Oaf {
|
||||||
|
@ -310,5 +346,4 @@ public class PromoteActionPayloadFunctionsTest {
|
||||||
x.setId(id);
|
x.setId(id);
|
||||||
return x;
|
return x;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -7,6 +7,11 @@ import eu.dnetlib.dhp.model.mdstore.Provenance;
|
||||||
import eu.dnetlib.message.Message;
|
import eu.dnetlib.message.Message;
|
||||||
import eu.dnetlib.message.MessageManager;
|
import eu.dnetlib.message.MessageManager;
|
||||||
import eu.dnetlib.message.MessageType;
|
import eu.dnetlib.message.MessageType;
|
||||||
|
import java.io.ByteArrayInputStream;
|
||||||
|
import java.nio.charset.StandardCharsets;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.Objects;
|
||||||
import org.apache.commons.cli.*;
|
import org.apache.commons.cli.*;
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
import org.apache.commons.lang3.StringUtils;
|
import org.apache.commons.lang3.StringUtils;
|
||||||
|
@ -24,49 +29,52 @@ import org.dom4j.Document;
|
||||||
import org.dom4j.Node;
|
import org.dom4j.Node;
|
||||||
import org.dom4j.io.SAXReader;
|
import org.dom4j.io.SAXReader;
|
||||||
|
|
||||||
import java.io.ByteArrayInputStream;
|
|
||||||
import java.nio.charset.StandardCharsets;
|
|
||||||
import java.util.HashMap;
|
|
||||||
import java.util.Map;
|
|
||||||
import java.util.Objects;
|
|
||||||
|
|
||||||
public class GenerateNativeStoreSparkJob {
|
public class GenerateNativeStoreSparkJob {
|
||||||
|
|
||||||
|
public static MetadataRecord parseRecord(
|
||||||
|
final String input,
|
||||||
|
final String xpath,
|
||||||
|
final String encoding,
|
||||||
|
final Provenance provenance,
|
||||||
|
final Long dateOfCollection,
|
||||||
|
final LongAccumulator totalItems,
|
||||||
|
final LongAccumulator invalidRecords) {
|
||||||
|
|
||||||
public static MetadataRecord parseRecord (final String input, final String xpath, final String encoding, final Provenance provenance, final Long dateOfCollection, final LongAccumulator totalItems, final LongAccumulator invalidRecords) {
|
if (totalItems != null) totalItems.add(1);
|
||||||
|
|
||||||
if(totalItems != null)
|
|
||||||
totalItems.add(1);
|
|
||||||
try {
|
try {
|
||||||
SAXReader reader = new SAXReader();
|
SAXReader reader = new SAXReader();
|
||||||
Document document = reader.read(new ByteArrayInputStream(input.getBytes(StandardCharsets.UTF_8)));
|
Document document =
|
||||||
|
reader.read(new ByteArrayInputStream(input.getBytes(StandardCharsets.UTF_8)));
|
||||||
Node node = document.selectSingleNode(xpath);
|
Node node = document.selectSingleNode(xpath);
|
||||||
final String originalIdentifier = node.getText();
|
final String originalIdentifier = node.getText();
|
||||||
if (StringUtils.isBlank(originalIdentifier)) {
|
if (StringUtils.isBlank(originalIdentifier)) {
|
||||||
if (invalidRecords!= null)
|
if (invalidRecords != null) invalidRecords.add(1);
|
||||||
invalidRecords.add(1);
|
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
return new MetadataRecord(originalIdentifier, encoding, provenance, input, dateOfCollection);
|
return new MetadataRecord(
|
||||||
|
originalIdentifier, encoding, provenance, input, dateOfCollection);
|
||||||
} catch (Throwable e) {
|
} catch (Throwable e) {
|
||||||
if (invalidRecords!= null)
|
if (invalidRecords != null) invalidRecords.add(1);
|
||||||
invalidRecords.add(1);
|
|
||||||
e.printStackTrace();
|
e.printStackTrace();
|
||||||
return null;
|
return null;
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public static void main(String[] args) throws Exception {
|
public static void main(String[] args) throws Exception {
|
||||||
|
|
||||||
final ArgumentApplicationParser parser = new ArgumentApplicationParser(IOUtils.toString(GenerateNativeStoreSparkJob.class.getResourceAsStream("/eu/dnetlib/dhp/collection/collection_input_parameters.json")));
|
final ArgumentApplicationParser parser =
|
||||||
|
new ArgumentApplicationParser(
|
||||||
|
IOUtils.toString(
|
||||||
|
GenerateNativeStoreSparkJob.class.getResourceAsStream(
|
||||||
|
"/eu/dnetlib/dhp/collection/collection_input_parameters.json")));
|
||||||
parser.parseArgument(args);
|
parser.parseArgument(args);
|
||||||
final ObjectMapper jsonMapper = new ObjectMapper();
|
final ObjectMapper jsonMapper = new ObjectMapper();
|
||||||
final Provenance provenance = jsonMapper.readValue(parser.get("provenance"), Provenance.class);
|
final Provenance provenance =
|
||||||
|
jsonMapper.readValue(parser.get("provenance"), Provenance.class);
|
||||||
final long dateOfCollection = new Long(parser.get("dateOfCollection"));
|
final long dateOfCollection = new Long(parser.get("dateOfCollection"));
|
||||||
|
|
||||||
final SparkSession spark = SparkSession
|
final SparkSession spark =
|
||||||
.builder()
|
SparkSession.builder()
|
||||||
.appName("GenerateNativeStoreSparkJob")
|
.appName("GenerateNativeStoreSparkJob")
|
||||||
.master(parser.get("master"))
|
.master(parser.get("master"))
|
||||||
.getOrCreate();
|
.getOrCreate();
|
||||||
|
@ -74,44 +82,82 @@ public class GenerateNativeStoreSparkJob {
|
||||||
final Map<String, String> ongoingMap = new HashMap<>();
|
final Map<String, String> ongoingMap = new HashMap<>();
|
||||||
final Map<String, String> reportMap = new HashMap<>();
|
final Map<String, String> reportMap = new HashMap<>();
|
||||||
|
|
||||||
final boolean test = parser.get("isTest") == null ? false : Boolean.valueOf(parser.get("isTest"));
|
final boolean test =
|
||||||
|
parser.get("isTest") == null ? false : Boolean.valueOf(parser.get("isTest"));
|
||||||
|
|
||||||
final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext());
|
final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext());
|
||||||
|
|
||||||
final JavaPairRDD<IntWritable, Text> inputRDD = sc.sequenceFile(parser.get("input"), IntWritable.class, Text.class);
|
final JavaPairRDD<IntWritable, Text> inputRDD =
|
||||||
|
sc.sequenceFile(parser.get("input"), IntWritable.class, Text.class);
|
||||||
|
|
||||||
final LongAccumulator totalItems = sc.sc().longAccumulator("TotalItems");
|
final LongAccumulator totalItems = sc.sc().longAccumulator("TotalItems");
|
||||||
|
|
||||||
final LongAccumulator invalidRecords = sc.sc().longAccumulator("InvalidRecords");
|
final LongAccumulator invalidRecords = sc.sc().longAccumulator("InvalidRecords");
|
||||||
|
|
||||||
final MessageManager manager = new MessageManager(parser.get("rabbitHost"), parser.get("rabbitUser"), parser.get("rabbitPassword"), false, false, null);
|
final MessageManager manager =
|
||||||
|
new MessageManager(
|
||||||
|
parser.get("rabbitHost"),
|
||||||
|
parser.get("rabbitUser"),
|
||||||
|
parser.get("rabbitPassword"),
|
||||||
|
false,
|
||||||
|
false,
|
||||||
|
null);
|
||||||
|
|
||||||
final JavaRDD<MetadataRecord> mappeRDD = inputRDD.map(item -> parseRecord(item._2().toString(), parser.get("xpath"), parser.get("encoding"), provenance, dateOfCollection, totalItems, invalidRecords))
|
final JavaRDD<MetadataRecord> mappeRDD =
|
||||||
.filter(Objects::nonNull).distinct();
|
inputRDD.map(
|
||||||
|
item ->
|
||||||
|
parseRecord(
|
||||||
|
item._2().toString(),
|
||||||
|
parser.get("xpath"),
|
||||||
|
parser.get("encoding"),
|
||||||
|
provenance,
|
||||||
|
dateOfCollection,
|
||||||
|
totalItems,
|
||||||
|
invalidRecords))
|
||||||
|
.filter(Objects::nonNull)
|
||||||
|
.distinct();
|
||||||
|
|
||||||
ongoingMap.put("ongoing", "0");
|
ongoingMap.put("ongoing", "0");
|
||||||
if (!test) {
|
if (!test) {
|
||||||
manager.sendMessage(new Message(parser.get("workflowId"),"DataFrameCreation", MessageType.ONGOING, ongoingMap ), parser.get("rabbitOngoingQueue"), true, false);
|
manager.sendMessage(
|
||||||
|
new Message(
|
||||||
|
parser.get("workflowId"),
|
||||||
|
"DataFrameCreation",
|
||||||
|
MessageType.ONGOING,
|
||||||
|
ongoingMap),
|
||||||
|
parser.get("rabbitOngoingQueue"),
|
||||||
|
true,
|
||||||
|
false);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
final Encoder<MetadataRecord> encoder = Encoders.bean(MetadataRecord.class);
|
final Encoder<MetadataRecord> encoder = Encoders.bean(MetadataRecord.class);
|
||||||
final Dataset<MetadataRecord> mdstore = spark.createDataset(mappeRDD.rdd(), encoder);
|
final Dataset<MetadataRecord> mdstore = spark.createDataset(mappeRDD.rdd(), encoder);
|
||||||
final LongAccumulator mdStoreRecords = sc.sc().longAccumulator("MDStoreRecords");
|
final LongAccumulator mdStoreRecords = sc.sc().longAccumulator("MDStoreRecords");
|
||||||
mdStoreRecords.add(mdstore.count());
|
mdStoreRecords.add(mdstore.count());
|
||||||
ongoingMap.put("ongoing", "" + totalItems.value());
|
ongoingMap.put("ongoing", "" + totalItems.value());
|
||||||
if (!test) {
|
if (!test) {
|
||||||
manager.sendMessage(new Message(parser.get("workflowId"), "DataFrameCreation", MessageType.ONGOING, ongoingMap), parser.get("rabbitOngoingQueue"), true, false);
|
manager.sendMessage(
|
||||||
|
new Message(
|
||||||
|
parser.get("workflowId"),
|
||||||
|
"DataFrameCreation",
|
||||||
|
MessageType.ONGOING,
|
||||||
|
ongoingMap),
|
||||||
|
parser.get("rabbitOngoingQueue"),
|
||||||
|
true,
|
||||||
|
false);
|
||||||
}
|
}
|
||||||
mdstore.write().format("parquet").save(parser.get("output"));
|
mdstore.write().format("parquet").save(parser.get("output"));
|
||||||
reportMap.put("inputItem", "" + totalItems.value());
|
reportMap.put("inputItem", "" + totalItems.value());
|
||||||
reportMap.put("invalidRecords", "" + invalidRecords.value());
|
reportMap.put("invalidRecords", "" + invalidRecords.value());
|
||||||
reportMap.put("mdStoreSize", "" + mdStoreRecords.value());
|
reportMap.put("mdStoreSize", "" + mdStoreRecords.value());
|
||||||
if (!test) {
|
if (!test) {
|
||||||
manager.sendMessage(new Message(parser.get("workflowId"), "Collection", MessageType.REPORT, reportMap), parser.get("rabbitReportQueue"), true, false);
|
manager.sendMessage(
|
||||||
|
new Message(
|
||||||
|
parser.get("workflowId"), "Collection", MessageType.REPORT, reportMap),
|
||||||
|
parser.get("rabbitReportQueue"),
|
||||||
|
true,
|
||||||
|
false);
|
||||||
manager.close();
|
manager.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -2,7 +2,6 @@ package eu.dnetlib.dhp.collection.plugin;
|
||||||
|
|
||||||
import eu.dnetlib.collector.worker.model.ApiDescriptor;
|
import eu.dnetlib.collector.worker.model.ApiDescriptor;
|
||||||
import eu.dnetlib.dhp.collection.worker.DnetCollectorException;
|
import eu.dnetlib.dhp.collection.worker.DnetCollectorException;
|
||||||
|
|
||||||
import java.util.stream.Stream;
|
import java.util.stream.Stream;
|
||||||
|
|
||||||
public interface CollectorPlugin {
|
public interface CollectorPlugin {
|
||||||
|
|
|
@ -1,5 +1,11 @@
|
||||||
package eu.dnetlib.dhp.collection.plugin.oai;
|
package eu.dnetlib.dhp.collection.plugin.oai;
|
||||||
|
|
||||||
|
import com.google.common.base.Splitter;
|
||||||
|
import com.google.common.collect.Iterators;
|
||||||
|
import com.google.common.collect.Lists;
|
||||||
|
import eu.dnetlib.collector.worker.model.ApiDescriptor;
|
||||||
|
import eu.dnetlib.dhp.collection.plugin.CollectorPlugin;
|
||||||
|
import eu.dnetlib.dhp.collection.worker.DnetCollectorException;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
@ -7,15 +13,6 @@ import java.util.Spliterator;
|
||||||
import java.util.Spliterators;
|
import java.util.Spliterators;
|
||||||
import java.util.stream.Stream;
|
import java.util.stream.Stream;
|
||||||
import java.util.stream.StreamSupport;
|
import java.util.stream.StreamSupport;
|
||||||
import com.google.common.base.Splitter;
|
|
||||||
import com.google.common.collect.Iterators;
|
|
||||||
import com.google.common.collect.Lists;
|
|
||||||
|
|
||||||
|
|
||||||
import eu.dnetlib.collector.worker.model.ApiDescriptor;
|
|
||||||
import eu.dnetlib.dhp.collection.plugin.CollectorPlugin;
|
|
||||||
import eu.dnetlib.dhp.collection.worker.DnetCollectorException;
|
|
||||||
|
|
||||||
|
|
||||||
public class OaiCollectorPlugin implements CollectorPlugin {
|
public class OaiCollectorPlugin implements CollectorPlugin {
|
||||||
|
|
||||||
|
@ -24,7 +21,6 @@ public class OaiCollectorPlugin implements CollectorPlugin {
|
||||||
private static final Object OAI_FROM_DATE_PARAM = "fromDate";
|
private static final Object OAI_FROM_DATE_PARAM = "fromDate";
|
||||||
private static final Object OAI_UNTIL_DATE_PARAM = "untilDate";
|
private static final Object OAI_UNTIL_DATE_PARAM = "untilDate";
|
||||||
|
|
||||||
|
|
||||||
private OaiIteratorFactory oaiIteratorFactory;
|
private OaiIteratorFactory oaiIteratorFactory;
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -37,26 +33,44 @@ public class OaiCollectorPlugin implements CollectorPlugin {
|
||||||
|
|
||||||
final List<String> sets = new ArrayList<>();
|
final List<String> sets = new ArrayList<>();
|
||||||
if (setParam != null) {
|
if (setParam != null) {
|
||||||
sets.addAll(Lists.newArrayList(Splitter.on(",").omitEmptyStrings().trimResults().split(setParam)));
|
sets.addAll(
|
||||||
|
Lists.newArrayList(
|
||||||
|
Splitter.on(",").omitEmptyStrings().trimResults().split(setParam)));
|
||||||
}
|
}
|
||||||
if (sets.isEmpty()) {
|
if (sets.isEmpty()) {
|
||||||
// If no set is defined, ALL the sets must be harvested
|
// If no set is defined, ALL the sets must be harvested
|
||||||
sets.add("");
|
sets.add("");
|
||||||
}
|
}
|
||||||
|
|
||||||
if (baseUrl == null || baseUrl.isEmpty()) { throw new DnetCollectorException("Param 'baseurl' is null or empty"); }
|
if (baseUrl == null || baseUrl.isEmpty()) {
|
||||||
|
throw new DnetCollectorException("Param 'baseurl' is null or empty");
|
||||||
|
}
|
||||||
|
|
||||||
if (mdFormat == null || mdFormat.isEmpty()) { throw new DnetCollectorException("Param 'mdFormat' is null or empty"); }
|
if (mdFormat == null || mdFormat.isEmpty()) {
|
||||||
|
throw new DnetCollectorException("Param 'mdFormat' is null or empty");
|
||||||
|
}
|
||||||
|
|
||||||
if (fromDate != null && !fromDate.matches("\\d{4}-\\d{2}-\\d{2}")) { throw new DnetCollectorException("Invalid date (YYYY-MM-DD): " + fromDate); }
|
if (fromDate != null && !fromDate.matches("\\d{4}-\\d{2}-\\d{2}")) {
|
||||||
|
throw new DnetCollectorException("Invalid date (YYYY-MM-DD): " + fromDate);
|
||||||
|
}
|
||||||
|
|
||||||
if (untilDate != null && !untilDate.matches("\\d{4}-\\d{2}-\\d{2}")) { throw new DnetCollectorException("Invalid date (YYYY-MM-DD): " + untilDate); }
|
if (untilDate != null && !untilDate.matches("\\d{4}-\\d{2}-\\d{2}")) {
|
||||||
|
throw new DnetCollectorException("Invalid date (YYYY-MM-DD): " + untilDate);
|
||||||
|
}
|
||||||
|
|
||||||
final Iterator<Iterator<String>> iters = sets.stream()
|
final Iterator<Iterator<String>> iters =
|
||||||
.map(set -> getOaiIteratorFactory().newIterator(baseUrl, mdFormat, set, fromDate, untilDate))
|
sets.stream()
|
||||||
|
.map(
|
||||||
|
set ->
|
||||||
|
getOaiIteratorFactory()
|
||||||
|
.newIterator(
|
||||||
|
baseUrl, mdFormat, set, fromDate,
|
||||||
|
untilDate))
|
||||||
.iterator();
|
.iterator();
|
||||||
|
|
||||||
return StreamSupport.stream(Spliterators.spliteratorUnknownSize(Iterators.concat(iters), Spliterator.ORDERED), false);
|
return StreamSupport.stream(
|
||||||
|
Spliterators.spliteratorUnknownSize(Iterators.concat(iters), Spliterator.ORDERED),
|
||||||
|
false);
|
||||||
}
|
}
|
||||||
|
|
||||||
public OaiIteratorFactory getOaiIteratorFactory() {
|
public OaiIteratorFactory getOaiIteratorFactory() {
|
||||||
|
|
|
@ -1,15 +1,14 @@
|
||||||
package eu.dnetlib.dhp.collection.plugin.oai;
|
package eu.dnetlib.dhp.collection.plugin.oai;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.collection.worker.DnetCollectorException;
|
||||||
|
import eu.dnetlib.dhp.collection.worker.utils.HttpConnector;
|
||||||
|
import eu.dnetlib.dhp.collection.worker.utils.XmlCleaner;
|
||||||
import java.io.StringReader;
|
import java.io.StringReader;
|
||||||
import java.io.UnsupportedEncodingException;
|
import java.io.UnsupportedEncodingException;
|
||||||
import java.net.URLEncoder;
|
import java.net.URLEncoder;
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
import java.util.Queue;
|
import java.util.Queue;
|
||||||
import java.util.concurrent.PriorityBlockingQueue;
|
import java.util.concurrent.PriorityBlockingQueue;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.collection.worker.DnetCollectorException;
|
|
||||||
import eu.dnetlib.dhp.collection.worker.utils.HttpConnector;
|
|
||||||
import eu.dnetlib.dhp.collection.worker.utils.XmlCleaner;
|
|
||||||
import org.apache.commons.lang.StringUtils;
|
import org.apache.commons.lang.StringUtils;
|
||||||
import org.apache.commons.logging.Log;
|
import org.apache.commons.logging.Log;
|
||||||
import org.apache.commons.logging.LogFactory;
|
import org.apache.commons.logging.LogFactory;
|
||||||
|
@ -18,10 +17,10 @@ import org.dom4j.DocumentException;
|
||||||
import org.dom4j.Node;
|
import org.dom4j.Node;
|
||||||
import org.dom4j.io.SAXReader;
|
import org.dom4j.io.SAXReader;
|
||||||
|
|
||||||
|
|
||||||
public class OaiIterator implements Iterator<String> {
|
public class OaiIterator implements Iterator<String> {
|
||||||
|
|
||||||
private static final Log log = LogFactory.getLog(OaiIterator.class); // NOPMD by marko on 11/24/08 5:02 PM
|
private static final Log log =
|
||||||
|
LogFactory.getLog(OaiIterator.class); // NOPMD by marko on 11/24/08 5:02 PM
|
||||||
|
|
||||||
private final Queue<String> queue = new PriorityBlockingQueue<>();
|
private final Queue<String> queue = new PriorityBlockingQueue<>();
|
||||||
private final SAXReader reader = new SAXReader();
|
private final SAXReader reader = new SAXReader();
|
||||||
|
@ -35,7 +34,12 @@ public class OaiIterator implements Iterator<String> {
|
||||||
private boolean started;
|
private boolean started;
|
||||||
private final HttpConnector httpConnector;
|
private final HttpConnector httpConnector;
|
||||||
|
|
||||||
public OaiIterator(final String baseUrl, final String mdFormat, final String set, final String fromDate, final String untilDate,
|
public OaiIterator(
|
||||||
|
final String baseUrl,
|
||||||
|
final String mdFormat,
|
||||||
|
final String set,
|
||||||
|
final String fromDate,
|
||||||
|
final String untilDate,
|
||||||
final HttpConnector httpConnector) {
|
final HttpConnector httpConnector) {
|
||||||
this.baseUrl = baseUrl;
|
this.baseUrl = baseUrl;
|
||||||
this.mdFormat = mdFormat;
|
this.mdFormat = mdFormat;
|
||||||
|
@ -86,7 +90,10 @@ public class OaiIterator implements Iterator<String> {
|
||||||
|
|
||||||
private String firstPage() throws DnetCollectorException {
|
private String firstPage() throws DnetCollectorException {
|
||||||
try {
|
try {
|
||||||
String url = baseUrl + "?verb=ListRecords&metadataPrefix=" + URLEncoder.encode(mdFormat, "UTF-8");
|
String url =
|
||||||
|
baseUrl
|
||||||
|
+ "?verb=ListRecords&metadataPrefix="
|
||||||
|
+ URLEncoder.encode(mdFormat, "UTF-8");
|
||||||
if (set != null && !set.isEmpty()) {
|
if (set != null && !set.isEmpty()) {
|
||||||
url += "&set=" + URLEncoder.encode(set, "UTF-8");
|
url += "&set=" + URLEncoder.encode(set, "UTF-8");
|
||||||
}
|
}
|
||||||
|
@ -107,17 +114,23 @@ public class OaiIterator implements Iterator<String> {
|
||||||
private String extractResumptionToken(final String xml) {
|
private String extractResumptionToken(final String xml) {
|
||||||
|
|
||||||
final String s = StringUtils.substringAfter(xml, "<resumptionToken");
|
final String s = StringUtils.substringAfter(xml, "<resumptionToken");
|
||||||
if (s == null) { return null; }
|
if (s == null) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
final String result = StringUtils.substringBetween(s, ">", "</");
|
final String result = StringUtils.substringBetween(s, ">", "</");
|
||||||
if (result == null) { return null; }
|
if (result == null) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
return result.trim();
|
return result.trim();
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private String otherPages(final String resumptionToken) throws DnetCollectorException {
|
private String otherPages(final String resumptionToken) throws DnetCollectorException {
|
||||||
try {
|
try {
|
||||||
return downloadPage(baseUrl + "?verb=ListRecords&resumptionToken=" + URLEncoder.encode(resumptionToken, "UTF-8"));
|
return downloadPage(
|
||||||
|
baseUrl
|
||||||
|
+ "?verb=ListRecords&resumptionToken="
|
||||||
|
+ URLEncoder.encode(resumptionToken, "UTF-8"));
|
||||||
} catch (final UnsupportedEncodingException e) {
|
} catch (final UnsupportedEncodingException e) {
|
||||||
throw new DnetCollectorException(e);
|
throw new DnetCollectorException(e);
|
||||||
}
|
}
|
||||||
|
@ -136,12 +149,16 @@ public class OaiIterator implements Iterator<String> {
|
||||||
doc = reader.read(new StringReader(cleaned));
|
doc = reader.read(new StringReader(cleaned));
|
||||||
} catch (final DocumentException e1) {
|
} catch (final DocumentException e1) {
|
||||||
final String resumptionToken = extractResumptionToken(xml);
|
final String resumptionToken = extractResumptionToken(xml);
|
||||||
if (resumptionToken == null) { throw new DnetCollectorException("Error parsing cleaned document:" + cleaned, e1); }
|
if (resumptionToken == null) {
|
||||||
|
throw new DnetCollectorException(
|
||||||
|
"Error parsing cleaned document:" + cleaned, e1);
|
||||||
|
}
|
||||||
return resumptionToken;
|
return resumptionToken;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
final Node errorNode = doc.selectSingleNode("/*[local-name()='OAI-PMH']/*[local-name()='error']");
|
final Node errorNode =
|
||||||
|
doc.selectSingleNode("/*[local-name()='OAI-PMH']/*[local-name()='error']");
|
||||||
if (errorNode != null) {
|
if (errorNode != null) {
|
||||||
final String code = errorNode.valueOf("@code");
|
final String code = errorNode.valueOf("@code");
|
||||||
if ("noRecordsMatch".equalsIgnoreCase(code.trim())) {
|
if ("noRecordsMatch".equalsIgnoreCase(code.trim())) {
|
||||||
|
@ -152,12 +169,11 @@ public class OaiIterator implements Iterator<String> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
for (final Object o : doc.selectNodes("//*[local-name()='ListRecords']/*[local-name()='record']")) {
|
for (final Object o :
|
||||||
|
doc.selectNodes("//*[local-name()='ListRecords']/*[local-name()='record']")) {
|
||||||
queue.add(((Node) o).asXML());
|
queue.add(((Node) o).asXML());
|
||||||
}
|
}
|
||||||
|
|
||||||
return doc.valueOf("//*[local-name()='resumptionToken']");
|
return doc.valueOf("//*[local-name()='resumptionToken']");
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,25 +1,23 @@
|
||||||
package eu.dnetlib.dhp.collection.plugin.oai;
|
package eu.dnetlib.dhp.collection.plugin.oai;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.collection.worker.utils.HttpConnector;
|
import eu.dnetlib.dhp.collection.worker.utils.HttpConnector;
|
||||||
|
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
|
|
||||||
|
|
||||||
public class OaiIteratorFactory {
|
public class OaiIteratorFactory {
|
||||||
|
|
||||||
|
|
||||||
private HttpConnector httpConnector;
|
private HttpConnector httpConnector;
|
||||||
|
|
||||||
public Iterator<String> newIterator(final String baseUrl, final String mdFormat, final String set, final String fromDate, final String untilDate) {
|
public Iterator<String> newIterator(
|
||||||
|
final String baseUrl,
|
||||||
|
final String mdFormat,
|
||||||
|
final String set,
|
||||||
|
final String fromDate,
|
||||||
|
final String untilDate) {
|
||||||
return new OaiIterator(baseUrl, mdFormat, set, fromDate, untilDate, getHttpConnector());
|
return new OaiIterator(baseUrl, mdFormat, set, fromDate, untilDate, getHttpConnector());
|
||||||
}
|
}
|
||||||
|
|
||||||
private HttpConnector getHttpConnector() {
|
private HttpConnector getHttpConnector() {
|
||||||
if (httpConnector== null)
|
if (httpConnector == null) httpConnector = new HttpConnector();
|
||||||
httpConnector = new HttpConnector();
|
|
||||||
return httpConnector;
|
return httpConnector;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -2,16 +2,18 @@ package eu.dnetlib.dhp.collection.worker;
|
||||||
|
|
||||||
public class DnetCollectorException extends Exception {
|
public class DnetCollectorException extends Exception {
|
||||||
|
|
||||||
/**
|
/** */
|
||||||
*
|
|
||||||
*/
|
|
||||||
private static final long serialVersionUID = -290723075076039757L;
|
private static final long serialVersionUID = -290723075076039757L;
|
||||||
|
|
||||||
public DnetCollectorException() {
|
public DnetCollectorException() {
|
||||||
super();
|
super();
|
||||||
}
|
}
|
||||||
|
|
||||||
public DnetCollectorException(final String message, final Throwable cause, final boolean enableSuppression, final boolean writableStackTrace) {
|
public DnetCollectorException(
|
||||||
|
final String message,
|
||||||
|
final Throwable cause,
|
||||||
|
final boolean enableSuppression,
|
||||||
|
final boolean writableStackTrace) {
|
||||||
super(message, cause, enableSuppression, writableStackTrace);
|
super(message, cause, enableSuppression, writableStackTrace);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -26,5 +28,4 @@ public class DnetCollectorException extends Exception {
|
||||||
public DnetCollectorException(final Throwable cause) {
|
public DnetCollectorException(final Throwable cause) {
|
||||||
super(cause);
|
super(cause);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -2,13 +2,17 @@ package eu.dnetlib.dhp.collection.worker;
|
||||||
|
|
||||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
import eu.dnetlib.collector.worker.model.ApiDescriptor;
|
import eu.dnetlib.collector.worker.model.ApiDescriptor;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
import eu.dnetlib.dhp.collection.plugin.CollectorPlugin;
|
import eu.dnetlib.dhp.collection.plugin.CollectorPlugin;
|
||||||
import eu.dnetlib.dhp.collection.worker.utils.CollectorPluginFactory;
|
import eu.dnetlib.dhp.collection.worker.utils.CollectorPluginFactory;
|
||||||
import eu.dnetlib.message.Message;
|
import eu.dnetlib.message.Message;
|
||||||
import eu.dnetlib.message.MessageManager;
|
import eu.dnetlib.message.MessageManager;
|
||||||
import eu.dnetlib.message.MessageType;
|
import eu.dnetlib.message.MessageType;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.net.URI;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.concurrent.atomic.AtomicInteger;
|
||||||
import org.apache.hadoop.conf.Configuration;
|
import org.apache.hadoop.conf.Configuration;
|
||||||
import org.apache.hadoop.fs.FileSystem;
|
import org.apache.hadoop.fs.FileSystem;
|
||||||
import org.apache.hadoop.fs.Path;
|
import org.apache.hadoop.fs.Path;
|
||||||
|
@ -18,37 +22,34 @@ import org.apache.hadoop.io.Text;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.net.URI;
|
|
||||||
import java.util.HashMap;
|
|
||||||
import java.util.Map;
|
|
||||||
import java.util.concurrent.atomic.AtomicInteger;
|
|
||||||
|
|
||||||
public class DnetCollectorWorker {
|
public class DnetCollectorWorker {
|
||||||
|
|
||||||
private static final Logger log = LoggerFactory.getLogger(DnetCollectorWorker.class);
|
private static final Logger log = LoggerFactory.getLogger(DnetCollectorWorker.class);
|
||||||
|
|
||||||
|
|
||||||
private final CollectorPluginFactory collectorPluginFactory;
|
private final CollectorPluginFactory collectorPluginFactory;
|
||||||
|
|
||||||
private final ArgumentApplicationParser argumentParser;
|
private final ArgumentApplicationParser argumentParser;
|
||||||
|
|
||||||
private final MessageManager manager;
|
private final MessageManager manager;
|
||||||
|
|
||||||
|
public DnetCollectorWorker(
|
||||||
public DnetCollectorWorker(final CollectorPluginFactory collectorPluginFactory, final ArgumentApplicationParser argumentParser, final MessageManager manager) throws DnetCollectorException {
|
final CollectorPluginFactory collectorPluginFactory,
|
||||||
|
final ArgumentApplicationParser argumentParser,
|
||||||
|
final MessageManager manager)
|
||||||
|
throws DnetCollectorException {
|
||||||
this.collectorPluginFactory = collectorPluginFactory;
|
this.collectorPluginFactory = collectorPluginFactory;
|
||||||
this.argumentParser = argumentParser;
|
this.argumentParser = argumentParser;
|
||||||
this.manager = manager;
|
this.manager = manager;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public void collect() throws DnetCollectorException {
|
public void collect() throws DnetCollectorException {
|
||||||
try {
|
try {
|
||||||
final ObjectMapper jsonMapper = new ObjectMapper();
|
final ObjectMapper jsonMapper = new ObjectMapper();
|
||||||
final ApiDescriptor api = jsonMapper.readValue(argumentParser.get("apidescriptor"), ApiDescriptor.class);
|
final ApiDescriptor api =
|
||||||
|
jsonMapper.readValue(argumentParser.get("apidescriptor"), ApiDescriptor.class);
|
||||||
|
|
||||||
final CollectorPlugin plugin = collectorPluginFactory.getPluginByProtocol(api.getProtocol());
|
final CollectorPlugin plugin =
|
||||||
|
collectorPluginFactory.getPluginByProtocol(api.getProtocol());
|
||||||
|
|
||||||
final String hdfsuri = argumentParser.get("namenode");
|
final String hdfsuri = argumentParser.get("namenode");
|
||||||
|
|
||||||
|
@ -71,19 +72,35 @@ public class DnetCollectorWorker {
|
||||||
final Map<String, String> ongoingMap = new HashMap<>();
|
final Map<String, String> ongoingMap = new HashMap<>();
|
||||||
final Map<String, String> reportMap = new HashMap<>();
|
final Map<String, String> reportMap = new HashMap<>();
|
||||||
final AtomicInteger counter = new AtomicInteger(0);
|
final AtomicInteger counter = new AtomicInteger(0);
|
||||||
try (SequenceFile.Writer writer = SequenceFile.createWriter(conf,
|
try (SequenceFile.Writer writer =
|
||||||
SequenceFile.Writer.file(hdfswritepath), SequenceFile.Writer.keyClass(IntWritable.class),
|
SequenceFile.createWriter(
|
||||||
|
conf,
|
||||||
|
SequenceFile.Writer.file(hdfswritepath),
|
||||||
|
SequenceFile.Writer.keyClass(IntWritable.class),
|
||||||
SequenceFile.Writer.valueClass(Text.class))) {
|
SequenceFile.Writer.valueClass(Text.class))) {
|
||||||
final IntWritable key = new IntWritable(counter.get());
|
final IntWritable key = new IntWritable(counter.get());
|
||||||
final Text value = new Text();
|
final Text value = new Text();
|
||||||
plugin.collect(api).forEach(content -> {
|
plugin.collect(api)
|
||||||
|
.forEach(
|
||||||
|
content -> {
|
||||||
key.set(counter.getAndIncrement());
|
key.set(counter.getAndIncrement());
|
||||||
value.set(content);
|
value.set(content);
|
||||||
if (counter.get() % 10 == 0) {
|
if (counter.get() % 10 == 0) {
|
||||||
try {
|
try {
|
||||||
ongoingMap.put("ongoing", "" + counter.get());
|
ongoingMap.put("ongoing", "" + counter.get());
|
||||||
log.debug("Sending message: "+ manager.sendMessage(new Message(argumentParser.get("workflowId"), "Collection", MessageType.ONGOING, ongoingMap), argumentParser.get("rabbitOngoingQueue"), true, false));
|
log.debug(
|
||||||
|
"Sending message: "
|
||||||
|
+ manager.sendMessage(
|
||||||
|
new Message(
|
||||||
|
argumentParser.get(
|
||||||
|
"workflowId"),
|
||||||
|
"Collection",
|
||||||
|
MessageType.ONGOING,
|
||||||
|
ongoingMap),
|
||||||
|
argumentParser.get(
|
||||||
|
"rabbitOngoingQueue"),
|
||||||
|
true,
|
||||||
|
false));
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
log.error("Error on sending message ", e);
|
log.error("Error on sending message ", e);
|
||||||
}
|
}
|
||||||
|
@ -93,21 +110,31 @@ public class DnetCollectorWorker {
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
throw new RuntimeException(e);
|
throw new RuntimeException(e);
|
||||||
}
|
}
|
||||||
|
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
ongoingMap.put("ongoing", "" + counter.get());
|
ongoingMap.put("ongoing", "" + counter.get());
|
||||||
manager.sendMessage(new Message(argumentParser.get("workflowId"), "Collection", MessageType.ONGOING, ongoingMap), argumentParser.get("rabbitOngoingQueue"), true, false);
|
manager.sendMessage(
|
||||||
|
new Message(
|
||||||
|
argumentParser.get("workflowId"),
|
||||||
|
"Collection",
|
||||||
|
MessageType.ONGOING,
|
||||||
|
ongoingMap),
|
||||||
|
argumentParser.get("rabbitOngoingQueue"),
|
||||||
|
true,
|
||||||
|
false);
|
||||||
reportMap.put("collected", "" + counter.get());
|
reportMap.put("collected", "" + counter.get());
|
||||||
manager.sendMessage(new Message(argumentParser.get("workflowId"), "Collection", MessageType.REPORT, reportMap), argumentParser.get("rabbitOngoingQueue"), true, false);
|
manager.sendMessage(
|
||||||
|
new Message(
|
||||||
|
argumentParser.get("workflowId"),
|
||||||
|
"Collection",
|
||||||
|
MessageType.REPORT,
|
||||||
|
reportMap),
|
||||||
|
argumentParser.get("rabbitOngoingQueue"),
|
||||||
|
true,
|
||||||
|
false);
|
||||||
manager.close();
|
manager.close();
|
||||||
} catch (Throwable e) {
|
} catch (Throwable e) {
|
||||||
throw new DnetCollectorException("Error on collecting ", e);
|
throw new DnetCollectorException("Error on collecting ", e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,6 +1,5 @@
|
||||||
package eu.dnetlib.dhp.collection.worker;
|
package eu.dnetlib.dhp.collection.worker;
|
||||||
|
|
||||||
|
|
||||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
import eu.dnetlib.dhp.collection.worker.utils.CollectorPluginFactory;
|
import eu.dnetlib.dhp.collection.worker.utils.CollectorPluginFactory;
|
||||||
import eu.dnetlib.message.MessageManager;
|
import eu.dnetlib.message.MessageManager;
|
||||||
|
@ -8,16 +7,13 @@ import org.apache.commons.io.IOUtils;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* DnetCollectortWorkerApplication is the main class responsible to start
|
* DnetCollectortWorkerApplication is the main class responsible to start the Dnet Collection into
|
||||||
* the Dnet Collection into HDFS.
|
* HDFS. This module will be executed on the hadoop cluster and taking in input some parameters that
|
||||||
* This module will be executed on the hadoop cluster and taking in input some parameters
|
* tells it which is the right collector plugin to use and where store the data into HDFS path
|
||||||
* that tells it which is the right collector plugin to use and where store the data into HDFS path
|
|
||||||
*
|
*
|
||||||
* @author Sandro La Bruzzo
|
* @author Sandro La Bruzzo
|
||||||
*/
|
*/
|
||||||
|
|
||||||
public class DnetCollectorWorkerApplication {
|
public class DnetCollectorWorkerApplication {
|
||||||
|
|
||||||
private static final Logger log = LoggerFactory.getLogger(DnetCollectorWorkerApplication.class);
|
private static final Logger log = LoggerFactory.getLogger(DnetCollectorWorkerApplication.class);
|
||||||
|
@ -26,22 +22,27 @@ public class DnetCollectorWorkerApplication {
|
||||||
|
|
||||||
private static ArgumentApplicationParser argumentParser;
|
private static ArgumentApplicationParser argumentParser;
|
||||||
|
|
||||||
|
/** @param args */
|
||||||
/**
|
|
||||||
* @param args
|
|
||||||
*/
|
|
||||||
public static void main(final String[] args) throws Exception {
|
public static void main(final String[] args) throws Exception {
|
||||||
|
|
||||||
argumentParser= new ArgumentApplicationParser(IOUtils.toString(DnetCollectorWorker.class.getResourceAsStream("/eu/dnetlib/collector/worker/collector_parameter.json")));
|
argumentParser =
|
||||||
|
new ArgumentApplicationParser(
|
||||||
|
IOUtils.toString(
|
||||||
|
DnetCollectorWorker.class.getResourceAsStream(
|
||||||
|
"/eu/dnetlib/collector/worker/collector_parameter.json")));
|
||||||
argumentParser.parseArgument(args);
|
argumentParser.parseArgument(args);
|
||||||
log.info("hdfsPath =" + argumentParser.get("hdfsPath"));
|
log.info("hdfsPath =" + argumentParser.get("hdfsPath"));
|
||||||
log.info("json = " + argumentParser.get("apidescriptor"));
|
log.info("json = " + argumentParser.get("apidescriptor"));
|
||||||
final MessageManager manager = new MessageManager(argumentParser.get("rabbitHost"), argumentParser.get("rabbitUser"), argumentParser.get("rabbitPassword"), false, false, null);
|
final MessageManager manager =
|
||||||
final DnetCollectorWorker worker = new DnetCollectorWorker(collectorPluginFactory, argumentParser, manager);
|
new MessageManager(
|
||||||
|
argumentParser.get("rabbitHost"),
|
||||||
|
argumentParser.get("rabbitUser"),
|
||||||
|
argumentParser.get("rabbitPassword"),
|
||||||
|
false,
|
||||||
|
false,
|
||||||
|
null);
|
||||||
|
final DnetCollectorWorker worker =
|
||||||
|
new DnetCollectorWorker(collectorPluginFactory, argumentParser, manager);
|
||||||
worker.collect();
|
worker.collect();
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -15,5 +15,4 @@ public class CollectorPluginErrorLogList extends LinkedList<String> {
|
||||||
}
|
}
|
||||||
return log;
|
return log;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -4,12 +4,10 @@ import eu.dnetlib.dhp.collection.plugin.CollectorPlugin;
|
||||||
import eu.dnetlib.dhp.collection.plugin.oai.OaiCollectorPlugin;
|
import eu.dnetlib.dhp.collection.plugin.oai.OaiCollectorPlugin;
|
||||||
import eu.dnetlib.dhp.collection.worker.DnetCollectorException;
|
import eu.dnetlib.dhp.collection.worker.DnetCollectorException;
|
||||||
|
|
||||||
;
|
|
||||||
|
|
||||||
|
|
||||||
public class CollectorPluginFactory {
|
public class CollectorPluginFactory {
|
||||||
|
|
||||||
public CollectorPlugin getPluginByProtocol(final String protocol) throws DnetCollectorException {
|
public CollectorPlugin getPluginByProtocol(final String protocol)
|
||||||
|
throws DnetCollectorException {
|
||||||
if (protocol == null) throw new DnetCollectorException("protocol cannot be null");
|
if (protocol == null) throw new DnetCollectorException("protocol cannot be null");
|
||||||
switch (protocol.toLowerCase().trim()) {
|
switch (protocol.toLowerCase().trim()) {
|
||||||
case "oai":
|
case "oai":
|
||||||
|
@ -17,6 +15,5 @@ public class CollectorPluginFactory {
|
||||||
default:
|
default:
|
||||||
throw new DnetCollectorException("UNknown protocol");
|
throw new DnetCollectorException("UNknown protocol");
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,15 +1,6 @@
|
||||||
package eu.dnetlib.dhp.collection.worker.utils;
|
package eu.dnetlib.dhp.collection.worker.utils;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.collection.worker.DnetCollectorException;
|
import eu.dnetlib.dhp.collection.worker.DnetCollectorException;
|
||||||
import org.apache.commons.io.IOUtils;
|
|
||||||
import org.apache.commons.lang.math.NumberUtils;
|
|
||||||
import org.apache.commons.logging.Log;
|
|
||||||
import org.apache.commons.logging.LogFactory;
|
|
||||||
|
|
||||||
import javax.net.ssl.HttpsURLConnection;
|
|
||||||
import javax.net.ssl.SSLContext;
|
|
||||||
import javax.net.ssl.TrustManager;
|
|
||||||
import javax.net.ssl.X509TrustManager;
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.InputStream;
|
import java.io.InputStream;
|
||||||
import java.net.*;
|
import java.net.*;
|
||||||
|
@ -17,7 +8,14 @@ import java.security.GeneralSecurityException;
|
||||||
import java.security.cert.X509Certificate;
|
import java.security.cert.X509Certificate;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
import javax.net.ssl.HttpsURLConnection;
|
||||||
|
import javax.net.ssl.SSLContext;
|
||||||
|
import javax.net.ssl.TrustManager;
|
||||||
|
import javax.net.ssl.X509TrustManager;
|
||||||
|
import org.apache.commons.io.IOUtils;
|
||||||
|
import org.apache.commons.lang.math.NumberUtils;
|
||||||
|
import org.apache.commons.logging.Log;
|
||||||
|
import org.apache.commons.logging.LogFactory;
|
||||||
|
|
||||||
public class HttpConnector {
|
public class HttpConnector {
|
||||||
|
|
||||||
|
@ -38,11 +36,9 @@ public class HttpConnector {
|
||||||
/**
|
/**
|
||||||
* Given the URL returns the content via HTTP GET
|
* Given the URL returns the content via HTTP GET
|
||||||
*
|
*
|
||||||
* @param requestUrl
|
* @param requestUrl the URL
|
||||||
* the URL
|
|
||||||
* @return the content of the downloaded resource
|
* @return the content of the downloaded resource
|
||||||
* @throws DnetCollectorException
|
* @throws DnetCollectorException when retrying more than maxNumberOfRetry times
|
||||||
* when retrying more than maxNumberOfRetry times
|
|
||||||
*/
|
*/
|
||||||
public String getInputSource(final String requestUrl) throws DnetCollectorException {
|
public String getInputSource(final String requestUrl) throws DnetCollectorException {
|
||||||
return attemptDownlaodAsString(requestUrl, 1, new CollectorPluginErrorLogList());
|
return attemptDownlaodAsString(requestUrl, 1, new CollectorPluginErrorLogList());
|
||||||
|
@ -51,17 +47,19 @@ public class HttpConnector {
|
||||||
/**
|
/**
|
||||||
* Given the URL returns the content as a stream via HTTP GET
|
* Given the URL returns the content as a stream via HTTP GET
|
||||||
*
|
*
|
||||||
* @param requestUrl
|
* @param requestUrl the URL
|
||||||
* the URL
|
|
||||||
* @return the content of the downloaded resource as InputStream
|
* @return the content of the downloaded resource as InputStream
|
||||||
* @throws DnetCollectorException
|
* @throws DnetCollectorException when retrying more than maxNumberOfRetry times
|
||||||
* when retrying more than maxNumberOfRetry times
|
|
||||||
*/
|
*/
|
||||||
public InputStream getInputSourceAsStream(final String requestUrl) throws DnetCollectorException {
|
public InputStream getInputSourceAsStream(final String requestUrl)
|
||||||
|
throws DnetCollectorException {
|
||||||
return attemptDownload(requestUrl, 1, new CollectorPluginErrorLogList());
|
return attemptDownload(requestUrl, 1, new CollectorPluginErrorLogList());
|
||||||
}
|
}
|
||||||
|
|
||||||
private String attemptDownlaodAsString(final String requestUrl, final int retryNumber, final CollectorPluginErrorLogList errorList)
|
private String attemptDownlaodAsString(
|
||||||
|
final String requestUrl,
|
||||||
|
final int retryNumber,
|
||||||
|
final CollectorPluginErrorLogList errorList)
|
||||||
throws DnetCollectorException {
|
throws DnetCollectorException {
|
||||||
try {
|
try {
|
||||||
final InputStream s = attemptDownload(requestUrl, 1, new CollectorPluginErrorLogList());
|
final InputStream s = attemptDownload(requestUrl, 1, new CollectorPluginErrorLogList());
|
||||||
|
@ -80,17 +78,24 @@ public class HttpConnector {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private InputStream attemptDownload(final String requestUrl, final int retryNumber, final CollectorPluginErrorLogList errorList)
|
private InputStream attemptDownload(
|
||||||
|
final String requestUrl,
|
||||||
|
final int retryNumber,
|
||||||
|
final CollectorPluginErrorLogList errorList)
|
||||||
throws DnetCollectorException {
|
throws DnetCollectorException {
|
||||||
|
|
||||||
if (retryNumber > maxNumberOfRetry) { throw new DnetCollectorException("Max number of retries exceeded. Cause: \n " + errorList); }
|
if (retryNumber > maxNumberOfRetry) {
|
||||||
|
throw new DnetCollectorException(
|
||||||
|
"Max number of retries exceeded. Cause: \n " + errorList);
|
||||||
|
}
|
||||||
|
|
||||||
log.debug("Downloading " + requestUrl + " - try: " + retryNumber);
|
log.debug("Downloading " + requestUrl + " - try: " + retryNumber);
|
||||||
try {
|
try {
|
||||||
InputStream input = null;
|
InputStream input = null;
|
||||||
|
|
||||||
try {
|
try {
|
||||||
final HttpURLConnection urlConn = (HttpURLConnection) new URL(requestUrl).openConnection();
|
final HttpURLConnection urlConn =
|
||||||
|
(HttpURLConnection) new URL(requestUrl).openConnection();
|
||||||
urlConn.setInstanceFollowRedirects(false);
|
urlConn.setInstanceFollowRedirects(false);
|
||||||
urlConn.setReadTimeout(readTimeOut * 1000);
|
urlConn.setReadTimeout(readTimeOut * 1000);
|
||||||
urlConn.addRequestProperty("User-Agent", userAgent);
|
urlConn.addRequestProperty("User-Agent", userAgent);
|
||||||
|
@ -100,22 +105,35 @@ public class HttpConnector {
|
||||||
}
|
}
|
||||||
|
|
||||||
final int retryAfter = obtainRetryAfter(urlConn.getHeaderFields());
|
final int retryAfter = obtainRetryAfter(urlConn.getHeaderFields());
|
||||||
if (retryAfter > 0 && urlConn.getResponseCode() == HttpURLConnection.HTTP_UNAVAILABLE) {
|
if (retryAfter > 0
|
||||||
|
&& urlConn.getResponseCode() == HttpURLConnection.HTTP_UNAVAILABLE) {
|
||||||
log.warn("waiting and repeating request after " + retryAfter + " sec.");
|
log.warn("waiting and repeating request after " + retryAfter + " sec.");
|
||||||
Thread.sleep(retryAfter * 1000);
|
Thread.sleep(retryAfter * 1000);
|
||||||
errorList.add("503 Service Unavailable");
|
errorList.add("503 Service Unavailable");
|
||||||
urlConn.disconnect();
|
urlConn.disconnect();
|
||||||
return attemptDownload(requestUrl, retryNumber + 1, errorList);
|
return attemptDownload(requestUrl, retryNumber + 1, errorList);
|
||||||
} else if (urlConn.getResponseCode() == HttpURLConnection.HTTP_MOVED_PERM || urlConn.getResponseCode() == HttpURLConnection.HTTP_MOVED_TEMP) {
|
} else if (urlConn.getResponseCode() == HttpURLConnection.HTTP_MOVED_PERM
|
||||||
|
|| urlConn.getResponseCode() == HttpURLConnection.HTTP_MOVED_TEMP) {
|
||||||
final String newUrl = obtainNewLocation(urlConn.getHeaderFields());
|
final String newUrl = obtainNewLocation(urlConn.getHeaderFields());
|
||||||
log.debug("The requested url has been moved to " + newUrl);
|
log.debug("The requested url has been moved to " + newUrl);
|
||||||
errorList.add(String.format("%s %s. Moved to: %s", urlConn.getResponseCode(), urlConn.getResponseMessage(), newUrl));
|
errorList.add(
|
||||||
|
String.format(
|
||||||
|
"%s %s. Moved to: %s",
|
||||||
|
urlConn.getResponseCode(),
|
||||||
|
urlConn.getResponseMessage(),
|
||||||
|
newUrl));
|
||||||
urlConn.disconnect();
|
urlConn.disconnect();
|
||||||
return attemptDownload(newUrl, retryNumber + 1, errorList);
|
return attemptDownload(newUrl, retryNumber + 1, errorList);
|
||||||
} else if (urlConn.getResponseCode() != HttpURLConnection.HTTP_OK) {
|
} else if (urlConn.getResponseCode() != HttpURLConnection.HTTP_OK) {
|
||||||
log.error(String.format("HTTP error: %s %s", urlConn.getResponseCode(), urlConn.getResponseMessage()));
|
log.error(
|
||||||
|
String.format(
|
||||||
|
"HTTP error: %s %s",
|
||||||
|
urlConn.getResponseCode(), urlConn.getResponseMessage()));
|
||||||
Thread.sleep(defaultDelay * 1000);
|
Thread.sleep(defaultDelay * 1000);
|
||||||
errorList.add(String.format("%s %s", urlConn.getResponseCode(), urlConn.getResponseMessage()));
|
errorList.add(
|
||||||
|
String.format(
|
||||||
|
"%s %s",
|
||||||
|
urlConn.getResponseCode(), urlConn.getResponseMessage()));
|
||||||
urlConn.disconnect();
|
urlConn.disconnect();
|
||||||
return attemptDownload(requestUrl, retryNumber + 1, errorList);
|
return attemptDownload(requestUrl, retryNumber + 1, errorList);
|
||||||
} else {
|
} else {
|
||||||
|
@ -148,30 +166,44 @@ public class HttpConnector {
|
||||||
|
|
||||||
private int obtainRetryAfter(final Map<String, List<String>> headerMap) {
|
private int obtainRetryAfter(final Map<String, List<String>> headerMap) {
|
||||||
for (final String key : headerMap.keySet()) {
|
for (final String key : headerMap.keySet()) {
|
||||||
if (key != null && key.toLowerCase().equals("retry-after") && headerMap.get(key).size() > 0
|
if (key != null
|
||||||
&& NumberUtils.isNumber(headerMap.get(key).get(0))) { return Integer.parseInt(headerMap.get(key).get(0)) + 10; }
|
&& key.toLowerCase().equals("retry-after")
|
||||||
|
&& headerMap.get(key).size() > 0
|
||||||
|
&& NumberUtils.isNumber(headerMap.get(key).get(0))) {
|
||||||
|
return Integer.parseInt(headerMap.get(key).get(0)) + 10;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
private String obtainNewLocation(final Map<String, List<String>> headerMap) throws DnetCollectorException {
|
private String obtainNewLocation(final Map<String, List<String>> headerMap)
|
||||||
|
throws DnetCollectorException {
|
||||||
for (final String key : headerMap.keySet()) {
|
for (final String key : headerMap.keySet()) {
|
||||||
if (key != null && key.toLowerCase().equals("location") && headerMap.get(key).size() > 0) { return headerMap.get(key).get(0); }
|
if (key != null
|
||||||
|
&& key.toLowerCase().equals("location")
|
||||||
|
&& headerMap.get(key).size() > 0) {
|
||||||
|
return headerMap.get(key).get(0);
|
||||||
}
|
}
|
||||||
throw new DnetCollectorException("The requested url has been MOVED, but 'location' param is MISSING");
|
}
|
||||||
|
throw new DnetCollectorException(
|
||||||
|
"The requested url has been MOVED, but 'location' param is MISSING");
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* register for https scheme; this is a workaround and not intended for the use in trusted environments
|
* register for https scheme; this is a workaround and not intended for the use in trusted
|
||||||
|
* environments
|
||||||
*/
|
*/
|
||||||
public void initTrustManager() {
|
public void initTrustManager() {
|
||||||
final X509TrustManager tm = new X509TrustManager() {
|
final X509TrustManager tm =
|
||||||
|
new X509TrustManager() {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void checkClientTrusted(final X509Certificate[] xcs, final String string) {}
|
public void checkClientTrusted(
|
||||||
|
final X509Certificate[] xcs, final String string) {}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void checkServerTrusted(final X509Certificate[] xcs, final String string) {}
|
public void checkServerTrusted(
|
||||||
|
final X509Certificate[] xcs, final String string) {}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public X509Certificate[] getAcceptedIssuers() {
|
public X509Certificate[] getAcceptedIssuers() {
|
||||||
|
@ -215,5 +247,4 @@ public class HttpConnector {
|
||||||
public String getResponseType() {
|
public String getResponseType() {
|
||||||
return responseType;
|
return responseType;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -6,26 +6,24 @@ import java.util.Map;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
import java.util.regex.Pattern;
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
/**
|
/** @author jochen, Andreas Czerniak */
|
||||||
* @author jochen, Andreas Czerniak
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
public class XmlCleaner {
|
public class XmlCleaner {
|
||||||
|
|
||||||
/**
|
/** Pattern for numeric entities. */
|
||||||
* Pattern for numeric entities.
|
private static Pattern validCharacterEntityPattern =
|
||||||
*/
|
Pattern.compile("^&#x?\\d{2,4};"); // $NON-NLS-1$
|
||||||
private static Pattern validCharacterEntityPattern = Pattern.compile("^&#x?\\d{2,4};"); //$NON-NLS-1$
|
// private static Pattern validCharacterEntityPattern = Pattern.compile("^&#?\\d{2,4};");
|
||||||
// private static Pattern validCharacterEntityPattern = Pattern.compile("^&#?\\d{2,4};"); //$NON-NLS-1$
|
// //$NON-NLS-1$
|
||||||
|
|
||||||
// see https://www.w3.org/TR/REC-xml/#charsets , not only limited to 
|
// see https://www.w3.org/TR/REC-xml/#charsets , not only limited to 
|
||||||
private static Pattern invalidControlCharPattern = Pattern.compile("&#x?1[0-9a-fA-F];");
|
private static Pattern invalidControlCharPattern = Pattern.compile("&#x?1[0-9a-fA-F];");
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Pattern that negates the allowable XML 4 byte unicode characters. Valid are: #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] |
|
* Pattern that negates the allowable XML 4 byte unicode characters. Valid are: #x9 | #xA | #xD
|
||||||
* [#x10000-#x10FFFF]
|
* | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]
|
||||||
*/
|
*/
|
||||||
private static Pattern invalidCharacterPattern = Pattern.compile("[^\t\r\n\u0020-\uD7FF\uE000-\uFFFD]"); //$NON-NLS-1$
|
private static Pattern invalidCharacterPattern =
|
||||||
|
Pattern.compile("[^\t\r\n\u0020-\uD7FF\uE000-\uFFFD]"); // $NON-NLS-1$
|
||||||
|
|
||||||
// Map entities to their unicode equivalent
|
// Map entities to their unicode equivalent
|
||||||
private static Set<String> goodEntities = new HashSet<>();
|
private static Set<String> goodEntities = new HashSet<>();
|
||||||
|
@ -74,11 +72,14 @@ public class XmlCleaner {
|
||||||
badEntities.put("Ÿ", " "); // $NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
|
badEntities.put("Ÿ", " "); // $NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
|
||||||
// misc entities
|
// misc entities
|
||||||
badEntities.put("€", "\u20AC"); // $NON-NLS-1$ //$NON-NLS-2$ // euro
|
badEntities.put("€", "\u20AC"); // $NON-NLS-1$ //$NON-NLS-2$ // euro
|
||||||
badEntities.put("‘", "\u2018"); //$NON-NLS-1$ //$NON-NLS-2$ // left single quotation mark
|
badEntities.put(
|
||||||
badEntities.put("’", "\u2019"); //$NON-NLS-1$ //$NON-NLS-2$ // right single quotation mark
|
"‘", "\u2018"); // $NON-NLS-1$ //$NON-NLS-2$ // left single quotation mark
|
||||||
|
badEntities.put(
|
||||||
|
"’", "\u2019"); // $NON-NLS-1$ //$NON-NLS-2$ // right single quotation mark
|
||||||
// Latin 1 entities
|
// Latin 1 entities
|
||||||
badEntities.put(" ", "\u00A0"); // $NON-NLS-1$ //$NON-NLS-2$ // no-break space
|
badEntities.put(" ", "\u00A0"); // $NON-NLS-1$ //$NON-NLS-2$ // no-break space
|
||||||
badEntities.put("¡", "\u00A1"); //$NON-NLS-1$ //$NON-NLS-2$ // inverted exclamation mark
|
badEntities.put(
|
||||||
|
"¡", "\u00A1"); // $NON-NLS-1$ //$NON-NLS-2$ // inverted exclamation mark
|
||||||
badEntities.put("¢", "\u00A2"); // $NON-NLS-1$ //$NON-NLS-2$ // cent sign
|
badEntities.put("¢", "\u00A2"); // $NON-NLS-1$ //$NON-NLS-2$ // cent sign
|
||||||
badEntities.put("£", "\u00A3"); // $NON-NLS-1$ //$NON-NLS-2$ // pound sign
|
badEntities.put("£", "\u00A3"); // $NON-NLS-1$ //$NON-NLS-2$ // pound sign
|
||||||
badEntities.put("¤", "\u00A4"); // $NON-NLS-1$ //$NON-NLS-2$ // currency sign
|
badEntities.put("¤", "\u00A4"); // $NON-NLS-1$ //$NON-NLS-2$ // currency sign
|
||||||
|
@ -87,8 +88,11 @@ public class XmlCleaner {
|
||||||
badEntities.put("§", "\u00A7"); // $NON-NLS-1$ //$NON-NLS-2$ // section sign
|
badEntities.put("§", "\u00A7"); // $NON-NLS-1$ //$NON-NLS-2$ // section sign
|
||||||
badEntities.put("¨", "\u00A8"); // $NON-NLS-1$ //$NON-NLS-2$ // diaeresis
|
badEntities.put("¨", "\u00A8"); // $NON-NLS-1$ //$NON-NLS-2$ // diaeresis
|
||||||
badEntities.put("©", "\u00A9"); // $NON-NLS-1$ //$NON-NLS-2$ // copyright sign
|
badEntities.put("©", "\u00A9"); // $NON-NLS-1$ //$NON-NLS-2$ // copyright sign
|
||||||
badEntities.put("ª", "\u00AA"); //$NON-NLS-1$ //$NON-NLS-2$ // feminine ordinal indicator
|
badEntities.put(
|
||||||
badEntities.put("«", "\u00AB"); //$NON-NLS-1$ //$NON-NLS-2$ // left-pointing double angle quotation mark
|
"ª", "\u00AA"); // $NON-NLS-1$ //$NON-NLS-2$ // feminine ordinal indicator
|
||||||
|
badEntities.put(
|
||||||
|
"«",
|
||||||
|
"\u00AB"); //$NON-NLS-1$ //$NON-NLS-2$ // left-pointing double angle quotation mark
|
||||||
badEntities.put("¬", "\u00AC"); // $NON-NLS-1$ //$NON-NLS-2$ // not sign
|
badEntities.put("¬", "\u00AC"); // $NON-NLS-1$ //$NON-NLS-2$ // not sign
|
||||||
badEntities.put("­", "\u00AD"); // $NON-NLS-1$ //$NON-NLS-2$ // soft hyphen
|
badEntities.put("­", "\u00AD"); // $NON-NLS-1$ //$NON-NLS-2$ // soft hyphen
|
||||||
badEntities.put("®", "\u00AE"); // $NON-NLS-1$ //$NON-NLS-2$ // registered sign
|
badEntities.put("®", "\u00AE"); // $NON-NLS-1$ //$NON-NLS-2$ // registered sign
|
||||||
|
@ -103,89 +107,213 @@ public class XmlCleaner {
|
||||||
badEntities.put("·", "\u00B7"); // $NON-NLS-1$ //$NON-NLS-2$ // middle dot
|
badEntities.put("·", "\u00B7"); // $NON-NLS-1$ //$NON-NLS-2$ // middle dot
|
||||||
badEntities.put("¸", "\u00B8"); // $NON-NLS-1$ //$NON-NLS-2$ // cedilla
|
badEntities.put("¸", "\u00B8"); // $NON-NLS-1$ //$NON-NLS-2$ // cedilla
|
||||||
badEntities.put("¹", "\u00B9"); // $NON-NLS-1$ //$NON-NLS-2$ // superscript one
|
badEntities.put("¹", "\u00B9"); // $NON-NLS-1$ //$NON-NLS-2$ // superscript one
|
||||||
badEntities.put("º", "\u00BA"); //$NON-NLS-1$ //$NON-NLS-2$ // masculine ordinal indicator
|
badEntities.put(
|
||||||
badEntities.put("»", "\u00BB"); //$NON-NLS-1$ //$NON-NLS-2$ // right-pointing double angle quotation mark
|
"º", "\u00BA"); // $NON-NLS-1$ //$NON-NLS-2$ // masculine ordinal indicator
|
||||||
badEntities.put("¼", "\u00BC"); //$NON-NLS-1$ //$NON-NLS-2$ // vulgar fraction one quarter
|
badEntities.put(
|
||||||
badEntities.put("½", "\u00BD"); //$NON-NLS-1$ //$NON-NLS-2$ // vulgar fraction one half
|
"»",
|
||||||
badEntities.put("¾", "\u00BE"); //$NON-NLS-1$ //$NON-NLS-2$ // vulgar fraction three quarters
|
"\u00BB"); //$NON-NLS-1$ //$NON-NLS-2$ // right-pointing double angle quotation mark
|
||||||
badEntities.put("¿", "\u00BF"); //$NON-NLS-1$ //$NON-NLS-2$ // inverted question mark
|
badEntities.put(
|
||||||
badEntities.put("À", "\u00C0"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter A with grave
|
"¼", "\u00BC"); // $NON-NLS-1$ //$NON-NLS-2$ // vulgar fraction one quarter
|
||||||
badEntities.put("Á", "\u00C1"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter A with acute
|
badEntities.put(
|
||||||
badEntities.put("Â", "\u00C2"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter A with circumflex
|
"½", "\u00BD"); // $NON-NLS-1$ //$NON-NLS-2$ // vulgar fraction one half
|
||||||
badEntities.put("Ã", "\u00C3"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter A with tilde
|
badEntities.put(
|
||||||
badEntities.put("Ä", "\u00C4"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter A with diaeresis
|
"¾",
|
||||||
badEntities.put("Å", "\u00C5"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter A with ring above
|
"\u00BE"); // $NON-NLS-1$ //$NON-NLS-2$ // vulgar fraction three quarters
|
||||||
badEntities.put("Æ", "\u00C6"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter AE
|
badEntities.put(
|
||||||
badEntities.put("Ç", "\u00C7"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter C with cedilla
|
"¿", "\u00BF"); // $NON-NLS-1$ //$NON-NLS-2$ // inverted question mark
|
||||||
badEntities.put("È", "\u00C8"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter E with grave
|
badEntities.put(
|
||||||
badEntities.put("É", "\u00C9"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter E with acute
|
"À",
|
||||||
badEntities.put("Ê", "\u00CA"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter E with circumflex
|
"\u00C0"); // $NON-NLS-1$ //$NON-NLS-2$ // latin capital letter A with grave
|
||||||
badEntities.put("Ë", "\u00CB"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter E with diaeresis
|
badEntities.put(
|
||||||
badEntities.put("Ì", "\u00CC"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter I with grave
|
"Á",
|
||||||
badEntities.put("Í", "\u00CD"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter I with acute
|
"\u00C1"); // $NON-NLS-1$ //$NON-NLS-2$ // latin capital letter A with acute
|
||||||
badEntities.put("Î", "\u00CE"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter I with circumflex
|
badEntities.put(
|
||||||
badEntities.put("Ï", "\u00CF"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter I with diaeresis
|
"Â",
|
||||||
|
"\u00C2"); // $NON-NLS-1$ //$NON-NLS-2$ // latin capital letter A with circumflex
|
||||||
|
badEntities.put(
|
||||||
|
"Ã",
|
||||||
|
"\u00C3"); // $NON-NLS-1$ //$NON-NLS-2$ // latin capital letter A with tilde
|
||||||
|
badEntities.put(
|
||||||
|
"Ä",
|
||||||
|
"\u00C4"); // $NON-NLS-1$ //$NON-NLS-2$ // latin capital letter A with diaeresis
|
||||||
|
badEntities.put(
|
||||||
|
"Å",
|
||||||
|
"\u00C5"); // $NON-NLS-1$ //$NON-NLS-2$ // latin capital letter A with ring above
|
||||||
|
badEntities.put(
|
||||||
|
"Æ", "\u00C6"); // $NON-NLS-1$ //$NON-NLS-2$ // latin capital letter AE
|
||||||
|
badEntities.put(
|
||||||
|
"Ç",
|
||||||
|
"\u00C7"); // $NON-NLS-1$ //$NON-NLS-2$ // latin capital letter C with cedilla
|
||||||
|
badEntities.put(
|
||||||
|
"È",
|
||||||
|
"\u00C8"); // $NON-NLS-1$ //$NON-NLS-2$ // latin capital letter E with grave
|
||||||
|
badEntities.put(
|
||||||
|
"É",
|
||||||
|
"\u00C9"); // $NON-NLS-1$ //$NON-NLS-2$ // latin capital letter E with acute
|
||||||
|
badEntities.put(
|
||||||
|
"Ê",
|
||||||
|
"\u00CA"); // $NON-NLS-1$ //$NON-NLS-2$ // latin capital letter E with circumflex
|
||||||
|
badEntities.put(
|
||||||
|
"Ë",
|
||||||
|
"\u00CB"); // $NON-NLS-1$ //$NON-NLS-2$ // latin capital letter E with diaeresis
|
||||||
|
badEntities.put(
|
||||||
|
"Ì",
|
||||||
|
"\u00CC"); // $NON-NLS-1$ //$NON-NLS-2$ // latin capital letter I with grave
|
||||||
|
badEntities.put(
|
||||||
|
"Í",
|
||||||
|
"\u00CD"); // $NON-NLS-1$ //$NON-NLS-2$ // latin capital letter I with acute
|
||||||
|
badEntities.put(
|
||||||
|
"Î",
|
||||||
|
"\u00CE"); // $NON-NLS-1$ //$NON-NLS-2$ // latin capital letter I with circumflex
|
||||||
|
badEntities.put(
|
||||||
|
"Ï",
|
||||||
|
"\u00CF"); // $NON-NLS-1$ //$NON-NLS-2$ // latin capital letter I with diaeresis
|
||||||
badEntities.put("Ð", "\u00D0"); // $NON-NLS-1$ //$NON-NLS-2$ // latin capital letter ETH
|
badEntities.put("Ð", "\u00D0"); // $NON-NLS-1$ //$NON-NLS-2$ // latin capital letter ETH
|
||||||
badEntities.put("Ñ", "\u00D1"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter N with tilde
|
badEntities.put(
|
||||||
badEntities.put("Ò", "\u00D2"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter O with grave
|
"Ñ",
|
||||||
badEntities.put("Ó", "\u00D3"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter O with acute
|
"\u00D1"); // $NON-NLS-1$ //$NON-NLS-2$ // latin capital letter N with tilde
|
||||||
badEntities.put("Ô", "\u00D4"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter O with circumflex
|
badEntities.put(
|
||||||
badEntities.put("Õ", "\u00D5"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter O with tilde
|
"Ò",
|
||||||
badEntities.put("Ö", "\u00D6"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter O with diaeresis
|
"\u00D2"); // $NON-NLS-1$ //$NON-NLS-2$ // latin capital letter O with grave
|
||||||
|
badEntities.put(
|
||||||
|
"Ó",
|
||||||
|
"\u00D3"); // $NON-NLS-1$ //$NON-NLS-2$ // latin capital letter O with acute
|
||||||
|
badEntities.put(
|
||||||
|
"Ô",
|
||||||
|
"\u00D4"); // $NON-NLS-1$ //$NON-NLS-2$ // latin capital letter O with circumflex
|
||||||
|
badEntities.put(
|
||||||
|
"Õ",
|
||||||
|
"\u00D5"); // $NON-NLS-1$ //$NON-NLS-2$ // latin capital letter O with tilde
|
||||||
|
badEntities.put(
|
||||||
|
"Ö",
|
||||||
|
"\u00D6"); // $NON-NLS-1$ //$NON-NLS-2$ // latin capital letter O with diaeresis
|
||||||
badEntities.put("×", "\u00D7"); // $NON-NLS-1$ //$NON-NLS-2$ // multiplication sign
|
badEntities.put("×", "\u00D7"); // $NON-NLS-1$ //$NON-NLS-2$ // multiplication sign
|
||||||
badEntities.put("Ø", "\u00D8"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter O with stroke
|
badEntities.put(
|
||||||
badEntities.put("Ù", "\u00D9"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter U with grave
|
"Ø",
|
||||||
badEntities.put("Ú", "\u00DA"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter U with acute
|
"\u00D8"); // $NON-NLS-1$ //$NON-NLS-2$ // latin capital letter O with stroke
|
||||||
badEntities.put("Û", "\u00DB"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter U with circumflex
|
badEntities.put(
|
||||||
badEntities.put("Ü", "\u00DC"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter U with diaeresis
|
"Ù",
|
||||||
badEntities.put("Ý", "\u00DD"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter Y with acute
|
"\u00D9"); // $NON-NLS-1$ //$NON-NLS-2$ // latin capital letter U with grave
|
||||||
badEntities.put("Þ", "\u00DE"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter THORN
|
badEntities.put(
|
||||||
badEntities.put("ß", "\u00DF"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter sharp s
|
"Ú",
|
||||||
badEntities.put("à", "\u00E0"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter a with grave
|
"\u00DA"); // $NON-NLS-1$ //$NON-NLS-2$ // latin capital letter U with acute
|
||||||
badEntities.put("á", "\u00E1"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter a with acute
|
badEntities.put(
|
||||||
badEntities.put("â", "\u00E2"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter a with circumflex
|
"Û",
|
||||||
badEntities.put("ã", "\u00E3"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter a with tilde
|
"\u00DB"); // $NON-NLS-1$ //$NON-NLS-2$ // latin capital letter U with circumflex
|
||||||
badEntities.put("ä", "\u00E4"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter a with diaeresis
|
badEntities.put(
|
||||||
badEntities.put("å", "\u00E5"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter a with ring above
|
"Ü",
|
||||||
|
"\u00DC"); // $NON-NLS-1$ //$NON-NLS-2$ // latin capital letter U with diaeresis
|
||||||
|
badEntities.put(
|
||||||
|
"Ý",
|
||||||
|
"\u00DD"); // $NON-NLS-1$ //$NON-NLS-2$ // latin capital letter Y with acute
|
||||||
|
badEntities.put(
|
||||||
|
"Þ", "\u00DE"); // $NON-NLS-1$ //$NON-NLS-2$ // latin capital letter THORN
|
||||||
|
badEntities.put(
|
||||||
|
"ß", "\u00DF"); // $NON-NLS-1$ //$NON-NLS-2$ // latin small letter sharp s
|
||||||
|
badEntities.put(
|
||||||
|
"à",
|
||||||
|
"\u00E0"); // $NON-NLS-1$ //$NON-NLS-2$ // latin small letter a with grave
|
||||||
|
badEntities.put(
|
||||||
|
"á",
|
||||||
|
"\u00E1"); // $NON-NLS-1$ //$NON-NLS-2$ // latin small letter a with acute
|
||||||
|
badEntities.put(
|
||||||
|
"â",
|
||||||
|
"\u00E2"); // $NON-NLS-1$ //$NON-NLS-2$ // latin small letter a with circumflex
|
||||||
|
badEntities.put(
|
||||||
|
"ã",
|
||||||
|
"\u00E3"); // $NON-NLS-1$ //$NON-NLS-2$ // latin small letter a with tilde
|
||||||
|
badEntities.put(
|
||||||
|
"ä",
|
||||||
|
"\u00E4"); // $NON-NLS-1$ //$NON-NLS-2$ // latin small letter a with diaeresis
|
||||||
|
badEntities.put(
|
||||||
|
"å",
|
||||||
|
"\u00E5"); // $NON-NLS-1$ //$NON-NLS-2$ // latin small letter a with ring above
|
||||||
badEntities.put("æ", "\u00E6"); // $NON-NLS-1$ //$NON-NLS-2$ // latin small letter ae
|
badEntities.put("æ", "\u00E6"); // $NON-NLS-1$ //$NON-NLS-2$ // latin small letter ae
|
||||||
badEntities.put("ç", "\u00E7"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter c with cedilla
|
badEntities.put(
|
||||||
badEntities.put("è", "\u00E8"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter e with grave
|
"ç",
|
||||||
badEntities.put("é", "\u00E9"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter e with acute
|
"\u00E7"); // $NON-NLS-1$ //$NON-NLS-2$ // latin small letter c with cedilla
|
||||||
badEntities.put("ê", "\u00EA"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter e with circumflex
|
badEntities.put(
|
||||||
badEntities.put("ë", "\u00EB"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter e with diaeresis
|
"è",
|
||||||
badEntities.put("ì", "\u00EC"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter i with grave
|
"\u00E8"); // $NON-NLS-1$ //$NON-NLS-2$ // latin small letter e with grave
|
||||||
badEntities.put("í", "\u00ED"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter i with acute
|
badEntities.put(
|
||||||
badEntities.put("î", "\u00EE"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter i with circumflex
|
"é",
|
||||||
badEntities.put("ï", "\u00EF"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter i with diaeresis
|
"\u00E9"); // $NON-NLS-1$ //$NON-NLS-2$ // latin small letter e with acute
|
||||||
|
badEntities.put(
|
||||||
|
"ê",
|
||||||
|
"\u00EA"); // $NON-NLS-1$ //$NON-NLS-2$ // latin small letter e with circumflex
|
||||||
|
badEntities.put(
|
||||||
|
"ë",
|
||||||
|
"\u00EB"); // $NON-NLS-1$ //$NON-NLS-2$ // latin small letter e with diaeresis
|
||||||
|
badEntities.put(
|
||||||
|
"ì",
|
||||||
|
"\u00EC"); // $NON-NLS-1$ //$NON-NLS-2$ // latin small letter i with grave
|
||||||
|
badEntities.put(
|
||||||
|
"í",
|
||||||
|
"\u00ED"); // $NON-NLS-1$ //$NON-NLS-2$ // latin small letter i with acute
|
||||||
|
badEntities.put(
|
||||||
|
"î",
|
||||||
|
"\u00EE"); // $NON-NLS-1$ //$NON-NLS-2$ // latin small letter i with circumflex
|
||||||
|
badEntities.put(
|
||||||
|
"ï",
|
||||||
|
"\u00EF"); // $NON-NLS-1$ //$NON-NLS-2$ // latin small letter i with diaeresis
|
||||||
badEntities.put("ð", "\u00F0"); // $NON-NLS-1$ //$NON-NLS-2$ // latin small letter eth
|
badEntities.put("ð", "\u00F0"); // $NON-NLS-1$ //$NON-NLS-2$ // latin small letter eth
|
||||||
badEntities.put("ñ", "\u00F1"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter n with tilde
|
badEntities.put(
|
||||||
badEntities.put("ò", "\u00F2"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter o with grave
|
"ñ",
|
||||||
badEntities.put("ó", "\u00F3"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter o with acute
|
"\u00F1"); // $NON-NLS-1$ //$NON-NLS-2$ // latin small letter n with tilde
|
||||||
badEntities.put("ô", "\u00F4"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter o with circumflex
|
badEntities.put(
|
||||||
badEntities.put("õ", "\u00F5"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter o with tilde
|
"ò",
|
||||||
badEntities.put("ö", "\u00F6"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter o with diaeresis
|
"\u00F2"); // $NON-NLS-1$ //$NON-NLS-2$ // latin small letter o with grave
|
||||||
|
badEntities.put(
|
||||||
|
"ó",
|
||||||
|
"\u00F3"); // $NON-NLS-1$ //$NON-NLS-2$ // latin small letter o with acute
|
||||||
|
badEntities.put(
|
||||||
|
"ô",
|
||||||
|
"\u00F4"); // $NON-NLS-1$ //$NON-NLS-2$ // latin small letter o with circumflex
|
||||||
|
badEntities.put(
|
||||||
|
"õ",
|
||||||
|
"\u00F5"); // $NON-NLS-1$ //$NON-NLS-2$ // latin small letter o with tilde
|
||||||
|
badEntities.put(
|
||||||
|
"ö",
|
||||||
|
"\u00F6"); // $NON-NLS-1$ //$NON-NLS-2$ // latin small letter o with diaeresis
|
||||||
badEntities.put("÷", "\u00F7"); // $NON-NLS-1$ //$NON-NLS-2$ // division sign
|
badEntities.put("÷", "\u00F7"); // $NON-NLS-1$ //$NON-NLS-2$ // division sign
|
||||||
badEntities.put("ø", "\u00F8"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter o with stroke
|
badEntities.put(
|
||||||
badEntities.put("ù", "\u00F9"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter u with grave
|
"ø",
|
||||||
badEntities.put("ú", "\u00FA"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter u with acute
|
"\u00F8"); // $NON-NLS-1$ //$NON-NLS-2$ // latin small letter o with stroke
|
||||||
badEntities.put("û", "\u00FB"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter u with circumflex
|
badEntities.put(
|
||||||
badEntities.put("ü", "\u00FC"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter u with diaeresis
|
"ù",
|
||||||
badEntities.put("ý", "\u00FD"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter y with acute
|
"\u00F9"); // $NON-NLS-1$ //$NON-NLS-2$ // latin small letter u with grave
|
||||||
badEntities.put("þ", "\u00FE"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter thorn
|
badEntities.put(
|
||||||
badEntities.put("ÿ", "\u00FF"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter y with diaeresis
|
"ú",
|
||||||
|
"\u00FA"); // $NON-NLS-1$ //$NON-NLS-2$ // latin small letter u with acute
|
||||||
|
badEntities.put(
|
||||||
|
"û",
|
||||||
|
"\u00FB"); // $NON-NLS-1$ //$NON-NLS-2$ // latin small letter u with circumflex
|
||||||
|
badEntities.put(
|
||||||
|
"ü",
|
||||||
|
"\u00FC"); // $NON-NLS-1$ //$NON-NLS-2$ // latin small letter u with diaeresis
|
||||||
|
badEntities.put(
|
||||||
|
"ý",
|
||||||
|
"\u00FD"); // $NON-NLS-1$ //$NON-NLS-2$ // latin small letter y with acute
|
||||||
|
badEntities.put(
|
||||||
|
"þ", "\u00FE"); // $NON-NLS-1$ //$NON-NLS-2$ // latin small letter thorn
|
||||||
|
badEntities.put(
|
||||||
|
"ÿ",
|
||||||
|
"\u00FF"); // $NON-NLS-1$ //$NON-NLS-2$ // latin small letter y with diaeresis
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* For each entity in the input that is not allowed in XML, replace the entity with its unicode equivalent or remove it. For each
|
* For each entity in the input that is not allowed in XML, replace the entity with its unicode
|
||||||
* instance of a bare {@literal &}, replace it with {@literal &<br/>
|
* equivalent or remove it. For each instance of a bare {@literal &}, replace it with {@literal
|
||||||
* } XML only allows 4 entities: {@literal &amp;}, {@literal &quot;}, {@literal &lt;} and {@literal &gt;}.
|
* &<br/> } XML only allows 4 entities: {@literal &amp;}, {@literal &quot;},
|
||||||
|
* {@literal &lt;} and {@literal &gt;}.
|
||||||
*
|
*
|
||||||
* @param broken
|
* @param broken the string to handle entities
|
||||||
* the string to handle entities
|
|
||||||
* @return the string with entities appropriately fixed up
|
* @return the string with entities appropriately fixed up
|
||||||
*/
|
*/
|
||||||
static public String cleanAllEntities(final String broken) {
|
public static String cleanAllEntities(final String broken) {
|
||||||
if (broken == null) { return null; }
|
if (broken == null) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
String working = invalidControlCharPattern.matcher(broken).replaceAll("");
|
String working = invalidControlCharPattern.matcher(broken).replaceAll("");
|
||||||
working = invalidCharacterPattern.matcher(working).replaceAll("");
|
working = invalidCharacterPattern.matcher(working).replaceAll("");
|
||||||
|
@ -206,7 +334,10 @@ public class XmlCleaner {
|
||||||
int i = amp + 1;
|
int i = amp + 1;
|
||||||
while (true) {
|
while (true) {
|
||||||
// if we are at the end of the string then just escape the '&';
|
// if we are at the end of the string then just escape the '&';
|
||||||
if (i >= working.length()) { return working.substring(0, amp) + "&" + working.substring(amp + 1); //$NON-NLS-1$
|
if (i >= working.length()) {
|
||||||
|
return working.substring(0, amp)
|
||||||
|
+ "&"
|
||||||
|
+ working.substring(amp + 1); // $NON-NLS-1$
|
||||||
}
|
}
|
||||||
// if we have come to a ; then we have an entity
|
// if we have come to a ; then we have an entity
|
||||||
// If it is something that xml can't handle then replace it.
|
// If it is something that xml can't handle then replace it.
|
||||||
|
@ -220,7 +351,10 @@ public class XmlCleaner {
|
||||||
// Did we end an entity without finding a closing ;
|
// Did we end an entity without finding a closing ;
|
||||||
// Then treat it as an '&' that needs to be replaced with &
|
// Then treat it as an '&' that needs to be replaced with &
|
||||||
if (!Character.isLetterOrDigit(c)) {
|
if (!Character.isLetterOrDigit(c)) {
|
||||||
working = working.substring(0, amp) + "&" + working.substring(amp + 1); //$NON-NLS-1$
|
working =
|
||||||
|
working.substring(0, amp)
|
||||||
|
+ "&"
|
||||||
|
+ working.substring(amp + 1); // $NON-NLS-1$
|
||||||
amp = i + 4; // account for the 4 extra characters
|
amp = i + 4; // account for the 4 extra characters
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -241,18 +375,22 @@ public class XmlCleaner {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Replace entity with its unicode equivalent, if it is not a valid XML entity. Otherwise strip it out. XML only allows 4 entities:
|
* Replace entity with its unicode equivalent, if it is not a valid XML entity. Otherwise strip
|
||||||
* &amp;, &quot;, &lt; and &gt;.
|
* it out. XML only allows 4 entities: &amp;, &quot;, &lt; and &gt;.
|
||||||
*
|
*
|
||||||
* @param entity
|
* @param entity the entity to be replaced
|
||||||
* the entity to be replaced
|
* @return the substitution for the entity, either itself, the unicode equivalent or an empty
|
||||||
* @return the substitution for the entity, either itself, the unicode equivalent or an empty string.
|
* string.
|
||||||
*/
|
*/
|
||||||
private static String handleEntity(final String entity) {
|
private static String handleEntity(final String entity) {
|
||||||
if (goodEntities.contains(entity)) { return entity; }
|
if (goodEntities.contains(entity)) {
|
||||||
|
return entity;
|
||||||
|
}
|
||||||
|
|
||||||
final String replace = badEntities.get(entity);
|
final String replace = badEntities.get(entity);
|
||||||
if (replace != null) { return replace; }
|
if (replace != null) {
|
||||||
|
return replace;
|
||||||
|
}
|
||||||
|
|
||||||
return replace != null ? replace : "";
|
return replace != null ? replace : "";
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,7 +1,6 @@
|
||||||
package eu.dnetlib.dhp.migration.actions;
|
package eu.dnetlib.dhp.migration.actions;
|
||||||
|
|
||||||
import eu.dnetlib.data.proto.FieldTypeProtos.Qualifier;
|
import eu.dnetlib.data.proto.FieldTypeProtos.Qualifier;
|
||||||
|
|
||||||
import java.util.Comparator;
|
import java.util.Comparator;
|
||||||
|
|
||||||
public class LicenseComparator implements Comparator<Qualifier> {
|
public class LicenseComparator implements Comparator<Qualifier> {
|
||||||
|
@ -45,5 +44,4 @@ public class LicenseComparator implements Comparator<Qualifier> {
|
||||||
// Else (but unlikely), lexicographical ordering will do.
|
// Else (but unlikely), lexicographical ordering will do.
|
||||||
return lClass.compareTo(rClass);
|
return lClass.compareTo(rClass);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -6,6 +6,11 @@ import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
import eu.dnetlib.dhp.utils.ISLookupClientFactory;
|
import eu.dnetlib.dhp.utils.ISLookupClientFactory;
|
||||||
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
|
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
|
||||||
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
|
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
|
||||||
|
import java.io.File;
|
||||||
|
import java.io.FileOutputStream;
|
||||||
|
import java.io.OutputStream;
|
||||||
|
import java.util.*;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
import org.apache.commons.lang3.StringUtils;
|
import org.apache.commons.lang3.StringUtils;
|
||||||
import org.apache.commons.logging.Log;
|
import org.apache.commons.logging.Log;
|
||||||
|
@ -17,12 +22,6 @@ import org.apache.hadoop.tools.DistCp;
|
||||||
import org.apache.hadoop.tools.DistCpOptions;
|
import org.apache.hadoop.tools.DistCpOptions;
|
||||||
import org.apache.hadoop.util.ToolRunner;
|
import org.apache.hadoop.util.ToolRunner;
|
||||||
|
|
||||||
import java.io.File;
|
|
||||||
import java.io.FileOutputStream;
|
|
||||||
import java.io.OutputStream;
|
|
||||||
import java.util.*;
|
|
||||||
import java.util.stream.Collectors;
|
|
||||||
|
|
||||||
public class MigrateActionSet {
|
public class MigrateActionSet {
|
||||||
|
|
||||||
private static final Log log = LogFactory.getLog(MigrateActionSet.class);
|
private static final Log log = LogFactory.getLog(MigrateActionSet.class);
|
||||||
|
@ -34,8 +33,10 @@ public class MigrateActionSet {
|
||||||
private static Boolean DEFAULT_TRANSFORM_ONLY = false;
|
private static Boolean DEFAULT_TRANSFORM_ONLY = false;
|
||||||
|
|
||||||
public static void main(String[] args) throws Exception {
|
public static void main(String[] args) throws Exception {
|
||||||
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
|
final ArgumentApplicationParser parser =
|
||||||
IOUtils.toString(MigrateActionSet.class.getResourceAsStream(
|
new ArgumentApplicationParser(
|
||||||
|
IOUtils.toString(
|
||||||
|
MigrateActionSet.class.getResourceAsStream(
|
||||||
"/eu/dnetlib/dhp/migration/migrate_actionsets_parameters.json")));
|
"/eu/dnetlib/dhp/migration/migrate_actionsets_parameters.json")));
|
||||||
parser.parseArgument(args);
|
parser.parseArgument(args);
|
||||||
|
|
||||||
|
@ -63,10 +64,12 @@ public class MigrateActionSet {
|
||||||
|
|
||||||
ISLookUpService isLookUp = ISLookupClientFactory.getLookUpService(isLookupUrl);
|
ISLookUpService isLookUp = ISLookupClientFactory.getLookUpService(isLookupUrl);
|
||||||
|
|
||||||
Configuration conf = getConfiguration(distcp_task_timeout, distcp_memory_mb, distcp_num_maps);
|
Configuration conf =
|
||||||
|
getConfiguration(distcp_task_timeout, distcp_memory_mb, distcp_num_maps);
|
||||||
FileSystem targetFS = FileSystem.get(conf);
|
FileSystem targetFS = FileSystem.get(conf);
|
||||||
|
|
||||||
Configuration sourceConf = getConfiguration(distcp_task_timeout, distcp_memory_mb, distcp_num_maps);
|
Configuration sourceConf =
|
||||||
|
getConfiguration(distcp_task_timeout, distcp_memory_mb, distcp_num_maps);
|
||||||
sourceConf.set(FileSystem.FS_DEFAULT_NAME_KEY, sourceNN);
|
sourceConf.set(FileSystem.FS_DEFAULT_NAME_KEY, sourceNN);
|
||||||
FileSystem sourceFS = FileSystem.get(sourceConf);
|
FileSystem sourceFS = FileSystem.get(sourceConf);
|
||||||
|
|
||||||
|
@ -75,14 +78,20 @@ public class MigrateActionSet {
|
||||||
List<Path> targetPaths = new ArrayList<>();
|
List<Path> targetPaths = new ArrayList<>();
|
||||||
|
|
||||||
final List<Path> sourcePaths = getSourcePaths(sourceNN, isLookUp);
|
final List<Path> sourcePaths = getSourcePaths(sourceNN, isLookUp);
|
||||||
log.info(String.format("paths to process:\n%s", sourcePaths.stream().map(p -> p.toString()).collect(Collectors.joining("\n"))));
|
log.info(
|
||||||
|
String.format(
|
||||||
|
"paths to process:\n%s",
|
||||||
|
sourcePaths.stream()
|
||||||
|
.map(p -> p.toString())
|
||||||
|
.collect(Collectors.joining("\n"))));
|
||||||
for (Path source : sourcePaths) {
|
for (Path source : sourcePaths) {
|
||||||
|
|
||||||
if (!sourceFS.exists(source)) {
|
if (!sourceFS.exists(source)) {
|
||||||
log.warn(String.format("skipping unexisting path: %s", source));
|
log.warn(String.format("skipping unexisting path: %s", source));
|
||||||
} else {
|
} else {
|
||||||
|
|
||||||
LinkedList<String> pathQ = Lists.newLinkedList(Splitter.on(SEPARATOR).split(source.toUri().getPath()));
|
LinkedList<String> pathQ =
|
||||||
|
Lists.newLinkedList(Splitter.on(SEPARATOR).split(source.toUri().getPath()));
|
||||||
|
|
||||||
final String rawSet = pathQ.pollLast();
|
final String rawSet = pathQ.pollLast();
|
||||||
log.info(String.format("got RAWSET: %s", rawSet));
|
log.info(String.format("got RAWSET: %s", rawSet));
|
||||||
|
@ -91,7 +100,14 @@ public class MigrateActionSet {
|
||||||
|
|
||||||
final String actionSetDirectory = pathQ.pollLast();
|
final String actionSetDirectory = pathQ.pollLast();
|
||||||
|
|
||||||
final Path targetPath = new Path(targetNN + workDir + SEPARATOR + actionSetDirectory + SEPARATOR + rawSet);
|
final Path targetPath =
|
||||||
|
new Path(
|
||||||
|
targetNN
|
||||||
|
+ workDir
|
||||||
|
+ SEPARATOR
|
||||||
|
+ actionSetDirectory
|
||||||
|
+ SEPARATOR
|
||||||
|
+ rawSet);
|
||||||
|
|
||||||
log.info(String.format("using TARGET PATH: %s", targetPath));
|
log.info(String.format("using TARGET PATH: %s", targetPath));
|
||||||
|
|
||||||
|
@ -99,7 +115,13 @@ public class MigrateActionSet {
|
||||||
if (targetFS.exists(targetPath)) {
|
if (targetFS.exists(targetPath)) {
|
||||||
targetFS.delete(targetPath, true);
|
targetFS.delete(targetPath, true);
|
||||||
}
|
}
|
||||||
runDistcp(distcp_num_maps, distcp_memory_mb, distcp_task_timeout, conf, source, targetPath);
|
runDistcp(
|
||||||
|
distcp_num_maps,
|
||||||
|
distcp_memory_mb,
|
||||||
|
distcp_task_timeout,
|
||||||
|
conf,
|
||||||
|
source,
|
||||||
|
targetPath);
|
||||||
}
|
}
|
||||||
|
|
||||||
targetPaths.add(targetPath);
|
targetPaths.add(targetPath);
|
||||||
|
@ -107,10 +129,9 @@ public class MigrateActionSet {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
props.setProperty(TARGET_PATHS, targetPaths
|
props.setProperty(
|
||||||
.stream()
|
TARGET_PATHS,
|
||||||
.map(p -> p.toString())
|
targetPaths.stream().map(p -> p.toString()).collect(Collectors.joining(",")));
|
||||||
.collect(Collectors.joining(",")));
|
|
||||||
File file = new File(System.getProperty("oozie.action.output.properties"));
|
File file = new File(System.getProperty("oozie.action.output.properties"));
|
||||||
|
|
||||||
try (OutputStream os = new FileOutputStream(file)) {
|
try (OutputStream os = new FileOutputStream(file)) {
|
||||||
|
@ -119,7 +140,14 @@ public class MigrateActionSet {
|
||||||
System.out.println(file.getAbsolutePath());
|
System.out.println(file.getAbsolutePath());
|
||||||
}
|
}
|
||||||
|
|
||||||
private void runDistcp(Integer distcp_num_maps, String distcp_memory_mb, String distcp_task_timeout, Configuration conf, Path source, Path targetPath) throws Exception {
|
private void runDistcp(
|
||||||
|
Integer distcp_num_maps,
|
||||||
|
String distcp_memory_mb,
|
||||||
|
String distcp_task_timeout,
|
||||||
|
Configuration conf,
|
||||||
|
Path source,
|
||||||
|
Path targetPath)
|
||||||
|
throws Exception {
|
||||||
|
|
||||||
final DistCpOptions op = new DistCpOptions(source, targetPath);
|
final DistCpOptions op = new DistCpOptions(source, targetPath);
|
||||||
op.setMaxMaps(distcp_num_maps);
|
op.setMaxMaps(distcp_num_maps);
|
||||||
|
@ -127,20 +155,25 @@ public class MigrateActionSet {
|
||||||
op.preserve(DistCpOptions.FileAttribute.REPLICATION);
|
op.preserve(DistCpOptions.FileAttribute.REPLICATION);
|
||||||
op.preserve(DistCpOptions.FileAttribute.CHECKSUMTYPE);
|
op.preserve(DistCpOptions.FileAttribute.CHECKSUMTYPE);
|
||||||
|
|
||||||
int res = ToolRunner.run(new DistCp(conf, op), new String[]{
|
int res =
|
||||||
|
ToolRunner.run(
|
||||||
|
new DistCp(conf, op),
|
||||||
|
new String[] {
|
||||||
"-Dmapred.task.timeout=" + distcp_task_timeout,
|
"-Dmapred.task.timeout=" + distcp_task_timeout,
|
||||||
"-Dmapreduce.map.memory.mb=" + distcp_memory_mb,
|
"-Dmapreduce.map.memory.mb=" + distcp_memory_mb,
|
||||||
"-pb",
|
"-pb",
|
||||||
"-m " + distcp_num_maps,
|
"-m " + distcp_num_maps,
|
||||||
source.toString(),
|
source.toString(),
|
||||||
targetPath.toString()});
|
targetPath.toString()
|
||||||
|
});
|
||||||
|
|
||||||
if (res != 0) {
|
if (res != 0) {
|
||||||
throw new RuntimeException(String.format("distcp exited with code %s", res));
|
throw new RuntimeException(String.format("distcp exited with code %s", res));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private Configuration getConfiguration(String distcp_task_timeout, String distcp_memory_mb, Integer distcp_num_maps) {
|
private Configuration getConfiguration(
|
||||||
|
String distcp_task_timeout, String distcp_memory_mb, Integer distcp_num_maps) {
|
||||||
final Configuration conf = new Configuration();
|
final Configuration conf = new Configuration();
|
||||||
conf.set("dfs.webhdfs.socket.connect-timeout", distcp_task_timeout);
|
conf.set("dfs.webhdfs.socket.connect-timeout", distcp_task_timeout);
|
||||||
conf.set("dfs.webhdfs.socket.read-timeout", distcp_task_timeout);
|
conf.set("dfs.webhdfs.socket.read-timeout", distcp_task_timeout);
|
||||||
|
@ -151,20 +184,20 @@ public class MigrateActionSet {
|
||||||
return conf;
|
return conf;
|
||||||
}
|
}
|
||||||
|
|
||||||
private List<Path> getSourcePaths(String sourceNN, ISLookUpService isLookUp) throws ISLookUpException {
|
private List<Path> getSourcePaths(String sourceNN, ISLookUpService isLookUp)
|
||||||
String XQUERY = "distinct-values(\n" +
|
throws ISLookUpException {
|
||||||
"let $basePath := collection('/db/DRIVER/ServiceResources/ActionManagerServiceResourceType')//SERVICE_PROPERTIES/PROPERTY[@key = 'basePath']/@value/string()\n" +
|
String XQUERY =
|
||||||
"for $x in collection('/db/DRIVER/ActionManagerSetDSResources/ActionManagerSetDSResourceType') \n" +
|
"distinct-values(\n"
|
||||||
"let $setDir := $x//SET/@directory/string()\n" +
|
+ "let $basePath := collection('/db/DRIVER/ServiceResources/ActionManagerServiceResourceType')//SERVICE_PROPERTIES/PROPERTY[@key = 'basePath']/@value/string()\n"
|
||||||
"let $rawSet := $x//RAW_SETS/LATEST/@id/string()\n" +
|
+ "for $x in collection('/db/DRIVER/ActionManagerSetDSResources/ActionManagerSetDSResourceType') \n"
|
||||||
"return concat($basePath, '/', $setDir, '/', $rawSet))";
|
+ "let $setDir := $x//SET/@directory/string()\n"
|
||||||
|
+ "let $rawSet := $x//RAW_SETS/LATEST/@id/string()\n"
|
||||||
|
+ "return concat($basePath, '/', $setDir, '/', $rawSet))";
|
||||||
|
|
||||||
log.info(String.format("running xquery:\n%s", XQUERY));
|
log.info(String.format("running xquery:\n%s", XQUERY));
|
||||||
return isLookUp.quickSearchProfile(XQUERY)
|
return isLookUp.quickSearchProfile(XQUERY).stream()
|
||||||
.stream()
|
|
||||||
.map(p -> sourceNN + p)
|
.map(p -> sourceNN + p)
|
||||||
.map(Path::new)
|
.map(Path::new)
|
||||||
.collect(Collectors.toList());
|
.collect(Collectors.toList());
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -4,12 +4,11 @@ import com.google.common.collect.Lists;
|
||||||
import com.googlecode.protobuf.format.JsonFormat;
|
import com.googlecode.protobuf.format.JsonFormat;
|
||||||
import eu.dnetlib.data.proto.*;
|
import eu.dnetlib.data.proto.*;
|
||||||
import eu.dnetlib.dhp.schema.oaf.*;
|
import eu.dnetlib.dhp.schema.oaf.*;
|
||||||
import org.apache.commons.lang3.StringUtils;
|
|
||||||
|
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Optional;
|
import java.util.Optional;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
|
import org.apache.commons.lang3.StringUtils;
|
||||||
|
|
||||||
public class ProtoConverter implements Serializable {
|
public class ProtoConverter implements Serializable {
|
||||||
|
|
||||||
|
@ -42,10 +41,12 @@ public class ProtoConverter implements Serializable {
|
||||||
rel.setRelType(r.getRelType().toString());
|
rel.setRelType(r.getRelType().toString());
|
||||||
rel.setSubRelType(r.getSubRelType().toString());
|
rel.setSubRelType(r.getSubRelType().toString());
|
||||||
rel.setRelClass(r.getRelClass());
|
rel.setRelClass(r.getRelClass());
|
||||||
rel.setCollectedFrom(r.getCollectedfromCount() > 0 ?
|
rel.setCollectedFrom(
|
||||||
r.getCollectedfromList().stream()
|
r.getCollectedfromCount() > 0
|
||||||
|
? r.getCollectedfromList().stream()
|
||||||
.map(kv -> mapKV(kv))
|
.map(kv -> mapKV(kv))
|
||||||
.collect(Collectors.toList()) : null);
|
.collect(Collectors.toList())
|
||||||
|
: null);
|
||||||
return rel;
|
return rel;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -71,8 +72,7 @@ public class ProtoConverter implements Serializable {
|
||||||
|
|
||||||
final ResultProtos.Result r = oaf.getEntity().getResult();
|
final ResultProtos.Result r = oaf.getEntity().getResult();
|
||||||
if (r.getInstanceCount() > 0) {
|
if (r.getInstanceCount() > 0) {
|
||||||
return r.getInstanceList()
|
return r.getInstanceList().stream()
|
||||||
.stream()
|
|
||||||
.map(i -> convertInstance(i))
|
.map(i -> convertInstance(i))
|
||||||
.collect(Collectors.toList());
|
.collect(Collectors.toList());
|
||||||
}
|
}
|
||||||
|
@ -96,13 +96,14 @@ public class ProtoConverter implements Serializable {
|
||||||
}
|
}
|
||||||
|
|
||||||
private static Organization convertOrganization(OafProtos.Oaf oaf) {
|
private static Organization convertOrganization(OafProtos.Oaf oaf) {
|
||||||
final OrganizationProtos.Organization.Metadata m = oaf.getEntity().getOrganization().getMetadata();
|
final OrganizationProtos.Organization.Metadata m =
|
||||||
|
oaf.getEntity().getOrganization().getMetadata();
|
||||||
final Organization org = setOaf(new Organization(), oaf);
|
final Organization org = setOaf(new Organization(), oaf);
|
||||||
setEntity(org, oaf);
|
setEntity(org, oaf);
|
||||||
org.setLegalshortname(mapStringField(m.getLegalshortname()));
|
org.setLegalshortname(mapStringField(m.getLegalshortname()));
|
||||||
org.setLegalname(mapStringField(m.getLegalname()));
|
org.setLegalname(mapStringField(m.getLegalname()));
|
||||||
org.setAlternativeNames(m.getAlternativeNamesList().
|
org.setAlternativeNames(
|
||||||
stream()
|
m.getAlternativeNamesList().stream()
|
||||||
.map(ProtoConverter::mapStringField)
|
.map(ProtoConverter::mapStringField)
|
||||||
.collect(Collectors.toList()));
|
.collect(Collectors.toList()));
|
||||||
org.setWebsiteurl(mapStringField(m.getWebsiteurl()));
|
org.setWebsiteurl(mapStringField(m.getWebsiteurl()));
|
||||||
|
@ -112,7 +113,8 @@ public class ProtoConverter implements Serializable {
|
||||||
org.setEcnonprofit(mapStringField(m.getEcnonprofit()));
|
org.setEcnonprofit(mapStringField(m.getEcnonprofit()));
|
||||||
org.setEcresearchorganization(mapStringField(m.getEcresearchorganization()));
|
org.setEcresearchorganization(mapStringField(m.getEcresearchorganization()));
|
||||||
org.setEchighereducation(mapStringField(m.getEchighereducation()));
|
org.setEchighereducation(mapStringField(m.getEchighereducation()));
|
||||||
org.setEcinternationalorganizationeurinterests(mapStringField(m.getEcinternationalorganizationeurinterests()));
|
org.setEcinternationalorganizationeurinterests(
|
||||||
|
mapStringField(m.getEcinternationalorganizationeurinterests()));
|
||||||
org.setEcinternationalorganization(mapStringField(m.getEcinternationalorganization()));
|
org.setEcinternationalorganization(mapStringField(m.getEcinternationalorganization()));
|
||||||
org.setEcenterprise(mapStringField(m.getEcenterprise()));
|
org.setEcenterprise(mapStringField(m.getEcenterprise()));
|
||||||
org.setEcsmevalidated(mapStringField(m.getEcsmevalidated()));
|
org.setEcsmevalidated(mapStringField(m.getEcsmevalidated()));
|
||||||
|
@ -123,11 +125,12 @@ public class ProtoConverter implements Serializable {
|
||||||
}
|
}
|
||||||
|
|
||||||
private static Datasource convertDataSource(OafProtos.Oaf oaf) {
|
private static Datasource convertDataSource(OafProtos.Oaf oaf) {
|
||||||
final DatasourceProtos.Datasource.Metadata m = oaf.getEntity().getDatasource().getMetadata();
|
final DatasourceProtos.Datasource.Metadata m =
|
||||||
|
oaf.getEntity().getDatasource().getMetadata();
|
||||||
final Datasource datasource = setOaf(new Datasource(), oaf);
|
final Datasource datasource = setOaf(new Datasource(), oaf);
|
||||||
setEntity(datasource, oaf);
|
setEntity(datasource, oaf);
|
||||||
datasource.setAccessinfopackage(m.getAccessinfopackageList()
|
datasource.setAccessinfopackage(
|
||||||
.stream()
|
m.getAccessinfopackageList().stream()
|
||||||
.map(ProtoConverter::mapStringField)
|
.map(ProtoConverter::mapStringField)
|
||||||
.collect(Collectors.toList()));
|
.collect(Collectors.toList()));
|
||||||
datasource.setCertificates(mapStringField(m.getCertificates()));
|
datasource.setCertificates(mapStringField(m.getCertificates()));
|
||||||
|
@ -148,12 +151,12 @@ public class ProtoConverter implements Serializable {
|
||||||
datasource.setLogourl(mapStringField(m.getLogourl()));
|
datasource.setLogourl(mapStringField(m.getLogourl()));
|
||||||
datasource.setMissionstatementurl(mapStringField(m.getMissionstatementurl()));
|
datasource.setMissionstatementurl(mapStringField(m.getMissionstatementurl()));
|
||||||
datasource.setNamespaceprefix(mapStringField(m.getNamespaceprefix()));
|
datasource.setNamespaceprefix(mapStringField(m.getNamespaceprefix()));
|
||||||
datasource.setOdcontenttypes(m.getOdcontenttypesList()
|
datasource.setOdcontenttypes(
|
||||||
.stream()
|
m.getOdcontenttypesList().stream()
|
||||||
.map(ProtoConverter::mapStringField)
|
.map(ProtoConverter::mapStringField)
|
||||||
.collect(Collectors.toList()));
|
.collect(Collectors.toList()));
|
||||||
datasource.setOdlanguages(m.getOdlanguagesList()
|
datasource.setOdlanguages(
|
||||||
.stream()
|
m.getOdlanguagesList().stream()
|
||||||
.map(ProtoConverter::mapStringField)
|
.map(ProtoConverter::mapStringField)
|
||||||
.collect(Collectors.toList()));
|
.collect(Collectors.toList()));
|
||||||
datasource.setOdnumberofitems(mapStringField(m.getOdnumberofitems()));
|
datasource.setOdnumberofitems(mapStringField(m.getOdnumberofitems()));
|
||||||
|
@ -162,23 +165,22 @@ public class ProtoConverter implements Serializable {
|
||||||
datasource.setOfficialname(mapStringField(m.getOfficialname()));
|
datasource.setOfficialname(mapStringField(m.getOfficialname()));
|
||||||
datasource.setOpenairecompatibility(mapQualifier(m.getOpenairecompatibility()));
|
datasource.setOpenairecompatibility(mapQualifier(m.getOpenairecompatibility()));
|
||||||
datasource.setPidsystems(mapStringField(m.getPidsystems()));
|
datasource.setPidsystems(mapStringField(m.getPidsystems()));
|
||||||
datasource.setPolicies(m.getPoliciesList()
|
datasource.setPolicies(
|
||||||
.stream()
|
m.getPoliciesList().stream()
|
||||||
.map(ProtoConverter::mapKV)
|
.map(ProtoConverter::mapKV)
|
||||||
.collect(Collectors.toList()));
|
.collect(Collectors.toList()));
|
||||||
datasource.setQualitymanagementkind(mapStringField(m.getQualitymanagementkind()));
|
datasource.setQualitymanagementkind(mapStringField(m.getQualitymanagementkind()));
|
||||||
datasource.setReleaseenddate(mapStringField(m.getReleaseenddate()));
|
datasource.setReleaseenddate(mapStringField(m.getReleaseenddate()));
|
||||||
datasource.setServiceprovider(mapBoolField(m.getServiceprovider()));
|
datasource.setServiceprovider(mapBoolField(m.getServiceprovider()));
|
||||||
datasource.setReleasestartdate(mapStringField(m.getReleasestartdate()));
|
datasource.setReleasestartdate(mapStringField(m.getReleasestartdate()));
|
||||||
datasource.setSubjects(m.getSubjectsList()
|
datasource.setSubjects(
|
||||||
.stream()
|
m.getSubjectsList().stream()
|
||||||
.map(ProtoConverter::mapStructuredProperty)
|
.map(ProtoConverter::mapStructuredProperty)
|
||||||
.collect(Collectors.toList()));
|
.collect(Collectors.toList()));
|
||||||
datasource.setVersioning(mapBoolField(m.getVersioning()));
|
datasource.setVersioning(mapBoolField(m.getVersioning()));
|
||||||
datasource.setWebsiteurl(mapStringField(m.getWebsiteurl()));
|
datasource.setWebsiteurl(mapStringField(m.getWebsiteurl()));
|
||||||
datasource.setJournal(mapJournal(m.getJournal()));
|
datasource.setJournal(mapJournal(m.getJournal()));
|
||||||
|
|
||||||
|
|
||||||
return datasource;
|
return datasource;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -204,12 +206,14 @@ public class ProtoConverter implements Serializable {
|
||||||
project.setFundedamount(m.getFundedamount());
|
project.setFundedamount(m.getFundedamount());
|
||||||
project.setTotalcost(m.getTotalcost());
|
project.setTotalcost(m.getTotalcost());
|
||||||
project.setKeywords(mapStringField(m.getKeywords()));
|
project.setKeywords(mapStringField(m.getKeywords()));
|
||||||
project.setSubjects(m.getSubjectsList().stream()
|
project.setSubjects(
|
||||||
|
m.getSubjectsList().stream()
|
||||||
.map(sp -> mapStructuredProperty(sp))
|
.map(sp -> mapStructuredProperty(sp))
|
||||||
.collect(Collectors.toList()));
|
.collect(Collectors.toList()));
|
||||||
project.setTitle(mapStringField(m.getTitle()));
|
project.setTitle(mapStringField(m.getTitle()));
|
||||||
project.setWebsiteurl(mapStringField(m.getWebsiteurl()));
|
project.setWebsiteurl(mapStringField(m.getWebsiteurl()));
|
||||||
project.setFundingtree(m.getFundingtreeList().stream()
|
project.setFundingtree(
|
||||||
|
m.getFundingtreeList().stream()
|
||||||
.map(f -> mapStringField(f))
|
.map(f -> mapStringField(f))
|
||||||
.collect(Collectors.toList()));
|
.collect(Collectors.toList()));
|
||||||
project.setJsonextrainfo(mapStringField(m.getJsonextrainfo()));
|
project.setJsonextrainfo(mapStringField(m.getJsonextrainfo()));
|
||||||
|
@ -242,12 +246,12 @@ public class ProtoConverter implements Serializable {
|
||||||
setEntity(software, oaf);
|
setEntity(software, oaf);
|
||||||
setResult(software, oaf);
|
setResult(software, oaf);
|
||||||
|
|
||||||
software.setDocumentationUrl(m.getDocumentationUrlList()
|
software.setDocumentationUrl(
|
||||||
.stream()
|
m.getDocumentationUrlList().stream()
|
||||||
.map(ProtoConverter::mapStringField)
|
.map(ProtoConverter::mapStringField)
|
||||||
.collect(Collectors.toList()));
|
.collect(Collectors.toList()));
|
||||||
software.setLicense(m.getLicenseList()
|
software.setLicense(
|
||||||
.stream()
|
m.getLicenseList().stream()
|
||||||
.map(ProtoConverter::mapStructuredProperty)
|
.map(ProtoConverter::mapStructuredProperty)
|
||||||
.collect(Collectors.toList()));
|
.collect(Collectors.toList()));
|
||||||
software.setCodeRepositoryUrl(mapStringField(m.getCodeRepositoryUrl()));
|
software.setCodeRepositoryUrl(mapStringField(m.getCodeRepositoryUrl()));
|
||||||
|
@ -260,16 +264,16 @@ public class ProtoConverter implements Serializable {
|
||||||
OtherResearchProduct otherResearchProducts = setOaf(new OtherResearchProduct(), oaf);
|
OtherResearchProduct otherResearchProducts = setOaf(new OtherResearchProduct(), oaf);
|
||||||
setEntity(otherResearchProducts, oaf);
|
setEntity(otherResearchProducts, oaf);
|
||||||
setResult(otherResearchProducts, oaf);
|
setResult(otherResearchProducts, oaf);
|
||||||
otherResearchProducts.setContactperson(m.getContactpersonList()
|
otherResearchProducts.setContactperson(
|
||||||
.stream()
|
m.getContactpersonList().stream()
|
||||||
.map(ProtoConverter::mapStringField)
|
.map(ProtoConverter::mapStringField)
|
||||||
.collect(Collectors.toList()));
|
.collect(Collectors.toList()));
|
||||||
otherResearchProducts.setContactgroup(m.getContactgroupList()
|
otherResearchProducts.setContactgroup(
|
||||||
.stream()
|
m.getContactgroupList().stream()
|
||||||
.map(ProtoConverter::mapStringField)
|
.map(ProtoConverter::mapStringField)
|
||||||
.collect(Collectors.toList()));
|
.collect(Collectors.toList()));
|
||||||
otherResearchProducts.setTool(m.getToolList()
|
otherResearchProducts.setTool(
|
||||||
.stream()
|
m.getToolList().stream()
|
||||||
.map(ProtoConverter::mapStringField)
|
.map(ProtoConverter::mapStringField)
|
||||||
.collect(Collectors.toList()));
|
.collect(Collectors.toList()));
|
||||||
|
|
||||||
|
@ -298,12 +302,11 @@ public class ProtoConverter implements Serializable {
|
||||||
dataset.setVersion(mapStringField(m.getVersion()));
|
dataset.setVersion(mapStringField(m.getVersion()));
|
||||||
dataset.setLastmetadataupdate(mapStringField(m.getLastmetadataupdate()));
|
dataset.setLastmetadataupdate(mapStringField(m.getLastmetadataupdate()));
|
||||||
dataset.setMetadataversionnumber(mapStringField(m.getMetadataversionnumber()));
|
dataset.setMetadataversionnumber(mapStringField(m.getMetadataversionnumber()));
|
||||||
dataset.setGeolocation(m.getGeolocationList()
|
dataset.setGeolocation(
|
||||||
.stream()
|
m.getGeolocationList().stream()
|
||||||
.map(ProtoConverter::mapGeolocation)
|
.map(ProtoConverter::mapGeolocation)
|
||||||
.collect(Collectors.toList()));
|
.collect(Collectors.toList()));
|
||||||
return dataset;
|
return dataset;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public static <T extends Oaf> T setOaf(T oaf, OafProtos.Oaf o) {
|
public static <T extends Oaf> T setOaf(T oaf, OafProtos.Oaf o) {
|
||||||
|
@ -317,17 +320,18 @@ public class ProtoConverter implements Serializable {
|
||||||
final OafProtos.OafEntity e = oaf.getEntity();
|
final OafProtos.OafEntity e = oaf.getEntity();
|
||||||
entity.setId(e.getId());
|
entity.setId(e.getId());
|
||||||
entity.setOriginalId(e.getOriginalIdList());
|
entity.setOriginalId(e.getOriginalIdList());
|
||||||
entity.setCollectedfrom(e.getCollectedfromList()
|
entity.setCollectedfrom(
|
||||||
.stream()
|
e.getCollectedfromList().stream()
|
||||||
.map(ProtoConverter::mapKV)
|
.map(ProtoConverter::mapKV)
|
||||||
.collect(Collectors.toList()));
|
.collect(Collectors.toList()));
|
||||||
entity.setPid(e.getPidList().stream()
|
entity.setPid(
|
||||||
|
e.getPidList().stream()
|
||||||
.map(ProtoConverter::mapStructuredProperty)
|
.map(ProtoConverter::mapStructuredProperty)
|
||||||
.collect(Collectors.toList()));
|
.collect(Collectors.toList()));
|
||||||
entity.setDateofcollection(e.getDateofcollection());
|
entity.setDateofcollection(e.getDateofcollection());
|
||||||
entity.setDateoftransformation(e.getDateoftransformation());
|
entity.setDateoftransformation(e.getDateoftransformation());
|
||||||
entity.setExtraInfo(e.getExtraInfoList()
|
entity.setExtraInfo(
|
||||||
.stream()
|
e.getExtraInfoList().stream()
|
||||||
.map(ProtoConverter::mapExtraInfo)
|
.map(ProtoConverter::mapExtraInfo)
|
||||||
.collect(Collectors.toList()));
|
.collect(Collectors.toList()));
|
||||||
return entity;
|
return entity;
|
||||||
|
@ -336,77 +340,79 @@ public class ProtoConverter implements Serializable {
|
||||||
public static <T extends Result> T setResult(T entity, OafProtos.Oaf oaf) {
|
public static <T extends Result> T setResult(T entity, OafProtos.Oaf oaf) {
|
||||||
// setting Entity fields
|
// setting Entity fields
|
||||||
final ResultProtos.Result.Metadata m = oaf.getEntity().getResult().getMetadata();
|
final ResultProtos.Result.Metadata m = oaf.getEntity().getResult().getMetadata();
|
||||||
entity.setAuthor(m.getAuthorList()
|
entity.setAuthor(
|
||||||
.stream()
|
m.getAuthorList().stream()
|
||||||
.map(ProtoConverter::mapAuthor)
|
.map(ProtoConverter::mapAuthor)
|
||||||
.collect(Collectors.toList()));
|
.collect(Collectors.toList()));
|
||||||
entity.setResulttype(mapQualifier(m.getResulttype()));
|
entity.setResulttype(mapQualifier(m.getResulttype()));
|
||||||
entity.setLanguage(mapQualifier(m.getLanguage()));
|
entity.setLanguage(mapQualifier(m.getLanguage()));
|
||||||
entity.setCountry(m.getCountryList()
|
entity.setCountry(
|
||||||
.stream()
|
m.getCountryList().stream()
|
||||||
.map(ProtoConverter::mapQualifierAsCountry)
|
.map(ProtoConverter::mapQualifierAsCountry)
|
||||||
.collect(Collectors.toList()));
|
.collect(Collectors.toList()));
|
||||||
entity.setSubject(m.getSubjectList()
|
entity.setSubject(
|
||||||
.stream()
|
m.getSubjectList().stream()
|
||||||
.map(ProtoConverter::mapStructuredProperty)
|
.map(ProtoConverter::mapStructuredProperty)
|
||||||
.collect(Collectors.toList()));
|
.collect(Collectors.toList()));
|
||||||
entity.setTitle(m.getTitleList()
|
entity.setTitle(
|
||||||
.stream()
|
m.getTitleList().stream()
|
||||||
.map(ProtoConverter::mapStructuredProperty)
|
.map(ProtoConverter::mapStructuredProperty)
|
||||||
.collect(Collectors.toList()));
|
.collect(Collectors.toList()));
|
||||||
entity.setRelevantdate(m.getRelevantdateList()
|
entity.setRelevantdate(
|
||||||
.stream()
|
m.getRelevantdateList().stream()
|
||||||
.map(ProtoConverter::mapStructuredProperty)
|
.map(ProtoConverter::mapStructuredProperty)
|
||||||
.collect(Collectors.toList()));
|
.collect(Collectors.toList()));
|
||||||
entity.setDescription(m.getDescriptionList()
|
entity.setDescription(
|
||||||
.stream()
|
m.getDescriptionList().stream()
|
||||||
.map(ProtoConverter::mapStringField)
|
.map(ProtoConverter::mapStringField)
|
||||||
.collect(Collectors.toList()));
|
.collect(Collectors.toList()));
|
||||||
entity.setDateofacceptance(mapStringField(m.getDateofacceptance()));
|
entity.setDateofacceptance(mapStringField(m.getDateofacceptance()));
|
||||||
entity.setPublisher(mapStringField(m.getPublisher()));
|
entity.setPublisher(mapStringField(m.getPublisher()));
|
||||||
entity.setEmbargoenddate(mapStringField(m.getEmbargoenddate()));
|
entity.setEmbargoenddate(mapStringField(m.getEmbargoenddate()));
|
||||||
entity.setSource(m.getSourceList()
|
entity.setSource(
|
||||||
.stream()
|
m.getSourceList().stream()
|
||||||
.map(ProtoConverter::mapStringField)
|
.map(ProtoConverter::mapStringField)
|
||||||
.collect(Collectors.toList()));
|
.collect(Collectors.toList()));
|
||||||
entity.setFulltext(m.getFulltextList()
|
entity.setFulltext(
|
||||||
.stream()
|
m.getFulltextList().stream()
|
||||||
.map(ProtoConverter::mapStringField)
|
.map(ProtoConverter::mapStringField)
|
||||||
.collect(Collectors.toList()));
|
.collect(Collectors.toList()));
|
||||||
entity.setFormat(m.getFormatList()
|
entity.setFormat(
|
||||||
.stream()
|
m.getFormatList().stream()
|
||||||
.map(ProtoConverter::mapStringField)
|
.map(ProtoConverter::mapStringField)
|
||||||
.collect(Collectors.toList()));
|
.collect(Collectors.toList()));
|
||||||
entity.setContributor(m.getContributorList()
|
entity.setContributor(
|
||||||
.stream()
|
m.getContributorList().stream()
|
||||||
.map(ProtoConverter::mapStringField)
|
.map(ProtoConverter::mapStringField)
|
||||||
.collect(Collectors.toList()));
|
.collect(Collectors.toList()));
|
||||||
entity.setResourcetype(mapQualifier(m.getResourcetype()));
|
entity.setResourcetype(mapQualifier(m.getResourcetype()));
|
||||||
entity.setCoverage(m.getCoverageList()
|
entity.setCoverage(
|
||||||
.stream()
|
m.getCoverageList().stream()
|
||||||
.map(ProtoConverter::mapStringField)
|
.map(ProtoConverter::mapStringField)
|
||||||
.collect(Collectors.toList()));
|
.collect(Collectors.toList()));
|
||||||
entity.setContext(m.getContextList()
|
entity.setContext(
|
||||||
.stream()
|
m.getContextList().stream()
|
||||||
.map(ProtoConverter::mapContext)
|
.map(ProtoConverter::mapContext)
|
||||||
.collect(Collectors.toList()));
|
.collect(Collectors.toList()));
|
||||||
|
|
||||||
entity.setBestaccessright(getBestAccessRights(oaf.getEntity().getResult().getInstanceList()));
|
entity.setBestaccessright(
|
||||||
|
getBestAccessRights(oaf.getEntity().getResult().getInstanceList()));
|
||||||
|
|
||||||
return entity;
|
return entity;
|
||||||
}
|
}
|
||||||
|
|
||||||
private static Qualifier getBestAccessRights(List<ResultProtos.Result.Instance> instanceList) {
|
private static Qualifier getBestAccessRights(List<ResultProtos.Result.Instance> instanceList) {
|
||||||
if (instanceList != null) {
|
if (instanceList != null) {
|
||||||
final Optional<FieldTypeProtos.Qualifier> min = instanceList.stream()
|
final Optional<FieldTypeProtos.Qualifier> min =
|
||||||
.map(i -> i.getAccessright()).min(new LicenseComparator());
|
instanceList.stream().map(i -> i.getAccessright()).min(new LicenseComparator());
|
||||||
|
|
||||||
final Qualifier rights = min.isPresent() ? mapQualifier(min.get()) : new Qualifier();
|
final Qualifier rights = min.isPresent() ? mapQualifier(min.get()) : new Qualifier();
|
||||||
|
|
||||||
if (StringUtils.isBlank(rights.getClassid())) {
|
if (StringUtils.isBlank(rights.getClassid())) {
|
||||||
rights.setClassid(UNKNOWN);
|
rights.setClassid(UNKNOWN);
|
||||||
}
|
}
|
||||||
if (StringUtils.isBlank(rights.getClassname()) || UNKNOWN.equalsIgnoreCase(rights.getClassname())) {
|
if (StringUtils.isBlank(rights.getClassname())
|
||||||
|
|| UNKNOWN.equalsIgnoreCase(rights.getClassname())) {
|
||||||
rights.setClassname(NOT_AVAILABLE);
|
rights.setClassname(NOT_AVAILABLE);
|
||||||
}
|
}
|
||||||
if (StringUtils.isBlank(rights.getSchemeid())) {
|
if (StringUtils.isBlank(rights.getSchemeid())) {
|
||||||
|
@ -425,14 +431,13 @@ public class ProtoConverter implements Serializable {
|
||||||
|
|
||||||
final Context entity = new Context();
|
final Context entity = new Context();
|
||||||
entity.setId(context.getId());
|
entity.setId(context.getId());
|
||||||
entity.setDataInfo(context.getDataInfoList()
|
entity.setDataInfo(
|
||||||
.stream()
|
context.getDataInfoList().stream()
|
||||||
.map(ProtoConverter::mapDataInfo)
|
.map(ProtoConverter::mapDataInfo)
|
||||||
.collect(Collectors.toList()));
|
.collect(Collectors.toList()));
|
||||||
return entity;
|
return entity;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public static KeyValue mapKV(FieldTypeProtos.KeyValue kv) {
|
public static KeyValue mapKV(FieldTypeProtos.KeyValue kv) {
|
||||||
final KeyValue keyValue = new KeyValue();
|
final KeyValue keyValue = new KeyValue();
|
||||||
keyValue.setKey(kv.getKey());
|
keyValue.setKey(kv.getKey());
|
||||||
|
@ -495,7 +500,8 @@ public class ProtoConverter implements Serializable {
|
||||||
return entity;
|
return entity;
|
||||||
}
|
}
|
||||||
|
|
||||||
public static OriginDescription mapOriginalDescription(FieldTypeProtos.OAIProvenance.OriginDescription originDescription) {
|
public static OriginDescription mapOriginalDescription(
|
||||||
|
FieldTypeProtos.OAIProvenance.OriginDescription originDescription) {
|
||||||
final OriginDescription originDescriptionResult = new OriginDescription();
|
final OriginDescription originDescriptionResult = new OriginDescription();
|
||||||
originDescriptionResult.setHarvestDate(originDescription.getHarvestDate());
|
originDescriptionResult.setHarvestDate(originDescription.getHarvestDate());
|
||||||
originDescriptionResult.setAltered(originDescription.getAltered());
|
originDescriptionResult.setAltered(originDescription.getAltered());
|
||||||
|
@ -550,9 +556,10 @@ public class ProtoConverter implements Serializable {
|
||||||
entity.setName(author.getName());
|
entity.setName(author.getName());
|
||||||
entity.setSurname(author.getSurname());
|
entity.setSurname(author.getSurname());
|
||||||
entity.setRank(author.getRank());
|
entity.setRank(author.getRank());
|
||||||
entity.setPid(author.getPidList()
|
entity.setPid(
|
||||||
.stream()
|
author.getPidList().stream()
|
||||||
.map(kv -> {
|
.map(
|
||||||
|
kv -> {
|
||||||
final StructuredProperty sp = new StructuredProperty();
|
final StructuredProperty sp = new StructuredProperty();
|
||||||
sp.setValue(kv.getValue());
|
sp.setValue(kv.getValue());
|
||||||
final Qualifier q = new Qualifier();
|
final Qualifier q = new Qualifier();
|
||||||
|
@ -562,12 +569,11 @@ public class ProtoConverter implements Serializable {
|
||||||
return sp;
|
return sp;
|
||||||
})
|
})
|
||||||
.collect(Collectors.toList()));
|
.collect(Collectors.toList()));
|
||||||
entity.setAffiliation(author.getAffiliationList()
|
entity.setAffiliation(
|
||||||
.stream()
|
author.getAffiliationList().stream()
|
||||||
.map(ProtoConverter::mapStringField)
|
.map(ProtoConverter::mapStringField)
|
||||||
.collect(Collectors.toList()));
|
.collect(Collectors.toList()));
|
||||||
return entity;
|
return entity;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public static GeoLocation mapGeolocation(ResultProtos.Result.GeoLocation geoLocation) {
|
public static GeoLocation mapGeolocation(ResultProtos.Result.GeoLocation geoLocation) {
|
||||||
|
|
|
@ -5,14 +5,17 @@ import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
import com.google.common.base.Splitter;
|
import com.google.common.base.Splitter;
|
||||||
import com.google.common.collect.Lists;
|
import com.google.common.collect.Lists;
|
||||||
import com.google.protobuf.InvalidProtocolBufferException;
|
import com.google.protobuf.InvalidProtocolBufferException;
|
||||||
import eu.dnetlib.dhp.schema.action.AtomicAction;
|
|
||||||
import eu.dnetlib.data.proto.OafProtos;
|
import eu.dnetlib.data.proto.OafProtos;
|
||||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
|
import eu.dnetlib.dhp.schema.action.AtomicAction;
|
||||||
import eu.dnetlib.dhp.schema.oaf.*;
|
import eu.dnetlib.dhp.schema.oaf.*;
|
||||||
import eu.dnetlib.dhp.utils.ISLookupClientFactory;
|
import eu.dnetlib.dhp.utils.ISLookupClientFactory;
|
||||||
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
|
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
|
||||||
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
|
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.Serializable;
|
||||||
|
import java.util.LinkedList;
|
||||||
|
import java.util.Objects;
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
import org.apache.commons.lang3.StringUtils;
|
import org.apache.commons.lang3.StringUtils;
|
||||||
import org.apache.commons.logging.Log;
|
import org.apache.commons.logging.Log;
|
||||||
|
@ -21,16 +24,9 @@ import org.apache.hadoop.fs.FileSystem;
|
||||||
import org.apache.hadoop.fs.Path;
|
import org.apache.hadoop.fs.Path;
|
||||||
import org.apache.hadoop.io.Text;
|
import org.apache.hadoop.io.Text;
|
||||||
import org.apache.hadoop.io.compress.GzipCodec;
|
import org.apache.hadoop.io.compress.GzipCodec;
|
||||||
import org.apache.hadoop.mapred.SequenceFileOutputFormat;
|
|
||||||
import org.apache.spark.SparkConf;
|
import org.apache.spark.SparkConf;
|
||||||
import org.apache.spark.api.java.JavaSparkContext;
|
import org.apache.spark.api.java.JavaSparkContext;
|
||||||
import org.apache.spark.sql.SparkSession;
|
import org.apache.spark.sql.SparkSession;
|
||||||
import scala.Tuple2;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.io.Serializable;
|
|
||||||
import java.util.LinkedList;
|
|
||||||
import java.util.Objects;
|
|
||||||
|
|
||||||
public class TransformActions implements Serializable {
|
public class TransformActions implements Serializable {
|
||||||
|
|
||||||
|
@ -38,8 +34,10 @@ public class TransformActions implements Serializable {
|
||||||
private static final String SEPARATOR = "/";
|
private static final String SEPARATOR = "/";
|
||||||
|
|
||||||
public static void main(String[] args) throws Exception {
|
public static void main(String[] args) throws Exception {
|
||||||
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
|
final ArgumentApplicationParser parser =
|
||||||
IOUtils.toString(MigrateActionSet.class.getResourceAsStream(
|
new ArgumentApplicationParser(
|
||||||
|
IOUtils.toString(
|
||||||
|
MigrateActionSet.class.getResourceAsStream(
|
||||||
"/eu/dnetlib/dhp/migration/transform_actionsets_parameters.json")));
|
"/eu/dnetlib/dhp/migration/transform_actionsets_parameters.json")));
|
||||||
parser.parseArgument(args);
|
parser.parseArgument(args);
|
||||||
|
|
||||||
|
@ -66,12 +64,19 @@ public class TransformActions implements Serializable {
|
||||||
|
|
||||||
for (String sourcePath : Lists.newArrayList(Splitter.on(",").split(inputPaths))) {
|
for (String sourcePath : Lists.newArrayList(Splitter.on(",").split(inputPaths))) {
|
||||||
|
|
||||||
LinkedList<String> pathQ = Lists.newLinkedList(Splitter.on(SEPARATOR).split(sourcePath));
|
LinkedList<String> pathQ =
|
||||||
|
Lists.newLinkedList(Splitter.on(SEPARATOR).split(sourcePath));
|
||||||
|
|
||||||
final String rawset = pathQ.pollLast();
|
final String rawset = pathQ.pollLast();
|
||||||
final String actionSetDirectory = pathQ.pollLast();
|
final String actionSetDirectory = pathQ.pollLast();
|
||||||
|
|
||||||
final Path targetDirectory = new Path(targetBaseDir + SEPARATOR + actionSetDirectory + SEPARATOR + rawset);
|
final Path targetDirectory =
|
||||||
|
new Path(
|
||||||
|
targetBaseDir
|
||||||
|
+ SEPARATOR
|
||||||
|
+ actionSetDirectory
|
||||||
|
+ SEPARATOR
|
||||||
|
+ rawset);
|
||||||
|
|
||||||
if (fs.exists(targetDirectory)) {
|
if (fs.exists(targetDirectory)) {
|
||||||
log.info(String.format("found target directory '%s", targetDirectory));
|
log.info(String.format("found target directory '%s", targetDirectory));
|
||||||
|
@ -79,10 +84,16 @@ public class TransformActions implements Serializable {
|
||||||
log.info(String.format("deleted target directory '%s", targetDirectory));
|
log.info(String.format("deleted target directory '%s", targetDirectory));
|
||||||
}
|
}
|
||||||
|
|
||||||
log.info(String.format("transforming actions from '%s' to '%s'", sourcePath, targetDirectory));
|
log.info(
|
||||||
|
String.format(
|
||||||
|
"transforming actions from '%s' to '%s'",
|
||||||
|
sourcePath, targetDirectory));
|
||||||
|
|
||||||
sc.sequenceFile(sourcePath, Text.class, Text.class)
|
sc.sequenceFile(sourcePath, Text.class, Text.class)
|
||||||
.map(a -> eu.dnetlib.actionmanager.actions.AtomicAction.fromJSON(a._2().toString()))
|
.map(
|
||||||
|
a ->
|
||||||
|
eu.dnetlib.actionmanager.actions.AtomicAction.fromJSON(
|
||||||
|
a._2().toString()))
|
||||||
.map(a -> doTransform(a))
|
.map(a -> doTransform(a))
|
||||||
.filter(Objects::isNull)
|
.filter(Objects::isNull)
|
||||||
.filter(a -> a.getPayload() == null)
|
.filter(a -> a.getPayload() == null)
|
||||||
|
@ -92,7 +103,8 @@ public class TransformActions implements Serializable {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private Text transformAction(eu.dnetlib.actionmanager.actions.AtomicAction aa) throws InvalidProtocolBufferException, JsonProcessingException {
|
private Text transformAction(eu.dnetlib.actionmanager.actions.AtomicAction aa)
|
||||||
|
throws InvalidProtocolBufferException, JsonProcessingException {
|
||||||
final Text out = new Text();
|
final Text out = new Text();
|
||||||
final ObjectMapper mapper = new ObjectMapper();
|
final ObjectMapper mapper = new ObjectMapper();
|
||||||
if (aa.getTargetValue() != null && aa.getTargetValue().length > 0) {
|
if (aa.getTargetValue() != null && aa.getTargetValue().length > 0) {
|
||||||
|
@ -135,7 +147,8 @@ public class TransformActions implements Serializable {
|
||||||
return new AtomicAction<>(Relation.class, rel);
|
return new AtomicAction<>(Relation.class, rel);
|
||||||
}
|
}
|
||||||
|
|
||||||
private AtomicAction doTransform(eu.dnetlib.actionmanager.actions.AtomicAction aa) throws InvalidProtocolBufferException {
|
private AtomicAction doTransform(eu.dnetlib.actionmanager.actions.AtomicAction aa)
|
||||||
|
throws InvalidProtocolBufferException {
|
||||||
final OafProtos.Oaf proto_oaf = OafProtos.Oaf.parseFrom(aa.getTargetValue());
|
final OafProtos.Oaf proto_oaf = OafProtos.Oaf.parseFrom(aa.getTargetValue());
|
||||||
final Oaf oaf = ProtoConverter.convert(proto_oaf);
|
final Oaf oaf = ProtoConverter.convert(proto_oaf);
|
||||||
switch (proto_oaf.getKind()) {
|
switch (proto_oaf.getKind()) {
|
||||||
|
@ -148,14 +161,21 @@ public class TransformActions implements Serializable {
|
||||||
case project:
|
case project:
|
||||||
return new AtomicAction<>(Project.class, (Project) oaf);
|
return new AtomicAction<>(Project.class, (Project) oaf);
|
||||||
case result:
|
case result:
|
||||||
final String resulttypeid = proto_oaf.getEntity().getResult().getMetadata().getResulttype().getClassid();
|
final String resulttypeid =
|
||||||
|
proto_oaf
|
||||||
|
.getEntity()
|
||||||
|
.getResult()
|
||||||
|
.getMetadata()
|
||||||
|
.getResulttype()
|
||||||
|
.getClassid();
|
||||||
switch (resulttypeid) {
|
switch (resulttypeid) {
|
||||||
case "publication":
|
case "publication":
|
||||||
return new AtomicAction<>(Publication.class, (Publication) oaf);
|
return new AtomicAction<>(Publication.class, (Publication) oaf);
|
||||||
case "software":
|
case "software":
|
||||||
return new AtomicAction<>(Software.class, (Software) oaf);
|
return new AtomicAction<>(Software.class, (Software) oaf);
|
||||||
case "other":
|
case "other":
|
||||||
return new AtomicAction<>(OtherResearchProduct.class, (OtherResearchProduct) oaf);
|
return new AtomicAction<>(
|
||||||
|
OtherResearchProduct.class, (OtherResearchProduct) oaf);
|
||||||
case "dataset":
|
case "dataset":
|
||||||
return new AtomicAction<>(Dataset.class, (Dataset) oaf);
|
return new AtomicAction<>(Dataset.class, (Dataset) oaf);
|
||||||
default:
|
default:
|
||||||
|
@ -163,7 +183,8 @@ public class TransformActions implements Serializable {
|
||||||
return new AtomicAction<>(Result.class, (Result) oaf);
|
return new AtomicAction<>(Result.class, (Result) oaf);
|
||||||
}
|
}
|
||||||
default:
|
default:
|
||||||
throw new IllegalArgumentException("invalid entity type: " + proto_oaf.getEntity().getType());
|
throw new IllegalArgumentException(
|
||||||
|
"invalid entity type: " + proto_oaf.getEntity().getType());
|
||||||
}
|
}
|
||||||
case relation:
|
case relation:
|
||||||
return new AtomicAction<>(Relation.class, (Relation) oaf);
|
return new AtomicAction<>(Relation.class, (Relation) oaf);
|
||||||
|
@ -174,15 +195,15 @@ public class TransformActions implements Serializable {
|
||||||
|
|
||||||
private String getTargetBaseDir(String isLookupUrl) throws ISLookUpException {
|
private String getTargetBaseDir(String isLookupUrl) throws ISLookUpException {
|
||||||
ISLookUpService isLookUp = ISLookupClientFactory.getLookUpService(isLookupUrl);
|
ISLookUpService isLookUp = ISLookupClientFactory.getLookUpService(isLookupUrl);
|
||||||
String XQUERY = "collection('/db/DRIVER/ServiceResources/ActionManagerServiceResourceType')//SERVICE_PROPERTIES/PROPERTY[@key = 'basePath']/@value/string()";
|
String XQUERY =
|
||||||
|
"collection('/db/DRIVER/ServiceResources/ActionManagerServiceResourceType')//SERVICE_PROPERTIES/PROPERTY[@key = 'basePath']/@value/string()";
|
||||||
return isLookUp.getResourceProfileByQuery(XQUERY);
|
return isLookUp.getResourceProfileByQuery(XQUERY);
|
||||||
}
|
}
|
||||||
|
|
||||||
private static SparkSession getSparkSession(ArgumentApplicationParser parser) {
|
private static SparkSession getSparkSession(ArgumentApplicationParser parser) {
|
||||||
SparkConf conf = new SparkConf();
|
SparkConf conf = new SparkConf();
|
||||||
|
|
||||||
return SparkSession
|
return SparkSession.builder()
|
||||||
.builder()
|
|
||||||
.appName(TransformActions.class.getSimpleName())
|
.appName(TransformActions.class.getSimpleName())
|
||||||
.master(parser.get("master"))
|
.master(parser.get("master"))
|
||||||
.config(conf)
|
.config(conf)
|
||||||
|
|
|
@ -3,29 +3,32 @@ package eu.dnetlib.dhp.transformation;
|
||||||
import eu.dnetlib.dhp.model.mdstore.MetadataRecord;
|
import eu.dnetlib.dhp.model.mdstore.MetadataRecord;
|
||||||
import eu.dnetlib.dhp.transformation.functions.Cleaner;
|
import eu.dnetlib.dhp.transformation.functions.Cleaner;
|
||||||
import eu.dnetlib.dhp.transformation.vocabulary.Vocabulary;
|
import eu.dnetlib.dhp.transformation.vocabulary.Vocabulary;
|
||||||
|
import java.io.ByteArrayInputStream;
|
||||||
|
import java.io.StringWriter;
|
||||||
|
import java.util.Map;
|
||||||
|
import javax.xml.transform.stream.StreamSource;
|
||||||
import net.sf.saxon.s9api.*;
|
import net.sf.saxon.s9api.*;
|
||||||
import org.apache.spark.api.java.function.MapFunction;
|
import org.apache.spark.api.java.function.MapFunction;
|
||||||
import org.apache.spark.util.LongAccumulator;
|
import org.apache.spark.util.LongAccumulator;
|
||||||
|
|
||||||
import javax.xml.transform.stream.StreamSource;
|
|
||||||
import java.io.ByteArrayInputStream;
|
|
||||||
import java.io.StringWriter;
|
|
||||||
import java.util.Map;
|
|
||||||
|
|
||||||
public class TransformFunction implements MapFunction<MetadataRecord, MetadataRecord> {
|
public class TransformFunction implements MapFunction<MetadataRecord, MetadataRecord> {
|
||||||
|
|
||||||
|
|
||||||
private final LongAccumulator totalItems;
|
private final LongAccumulator totalItems;
|
||||||
private final LongAccumulator errorItems;
|
private final LongAccumulator errorItems;
|
||||||
private final LongAccumulator transformedItems;
|
private final LongAccumulator transformedItems;
|
||||||
private final String transformationRule;
|
private final String transformationRule;
|
||||||
private final Cleaner cleanFunction;
|
private final Cleaner cleanFunction;
|
||||||
|
|
||||||
|
|
||||||
private final long dateOfTransformation;
|
private final long dateOfTransformation;
|
||||||
|
|
||||||
|
public TransformFunction(
|
||||||
public TransformFunction(LongAccumulator totalItems, LongAccumulator errorItems, LongAccumulator transformedItems, final String transformationRule, long dateOfTransformation, final Map<String, Vocabulary> vocabularies) throws Exception {
|
LongAccumulator totalItems,
|
||||||
|
LongAccumulator errorItems,
|
||||||
|
LongAccumulator transformedItems,
|
||||||
|
final String transformationRule,
|
||||||
|
long dateOfTransformation,
|
||||||
|
final Map<String, Vocabulary> vocabularies)
|
||||||
|
throws Exception {
|
||||||
this.totalItems = totalItems;
|
this.totalItems = totalItems;
|
||||||
this.errorItems = errorItems;
|
this.errorItems = errorItems;
|
||||||
this.transformedItems = transformedItems;
|
this.transformedItems = transformedItems;
|
||||||
|
@ -41,8 +44,16 @@ public class TransformFunction implements MapFunction<MetadataRecord, MetadataRe
|
||||||
Processor processor = new Processor(false);
|
Processor processor = new Processor(false);
|
||||||
processor.registerExtensionFunction(cleanFunction);
|
processor.registerExtensionFunction(cleanFunction);
|
||||||
final XsltCompiler comp = processor.newXsltCompiler();
|
final XsltCompiler comp = processor.newXsltCompiler();
|
||||||
XsltExecutable xslt = comp.compile(new StreamSource(new ByteArrayInputStream(transformationRule.getBytes())));
|
XsltExecutable xslt =
|
||||||
XdmNode source = processor.newDocumentBuilder().build(new StreamSource(new ByteArrayInputStream(value.getBody().getBytes())));
|
comp.compile(
|
||||||
|
new StreamSource(
|
||||||
|
new ByteArrayInputStream(transformationRule.getBytes())));
|
||||||
|
XdmNode source =
|
||||||
|
processor
|
||||||
|
.newDocumentBuilder()
|
||||||
|
.build(
|
||||||
|
new StreamSource(
|
||||||
|
new ByteArrayInputStream(value.getBody().getBytes())));
|
||||||
XsltTransformer trans = xslt.load();
|
XsltTransformer trans = xslt.load();
|
||||||
trans.setInitialContextNode(source);
|
trans.setInitialContextNode(source);
|
||||||
final StringWriter output = new StringWriter();
|
final StringWriter output = new StringWriter();
|
||||||
|
@ -61,7 +72,4 @@ public class TransformFunction implements MapFunction<MetadataRecord, MetadataRe
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
|
@ -1,7 +1,6 @@
|
||||||
package eu.dnetlib.dhp.transformation;
|
package eu.dnetlib.dhp.transformation;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
import eu.dnetlib.dhp.collection.GenerateNativeStoreSparkJob;
|
|
||||||
import eu.dnetlib.dhp.model.mdstore.MetadataRecord;
|
import eu.dnetlib.dhp.model.mdstore.MetadataRecord;
|
||||||
import eu.dnetlib.dhp.transformation.vocabulary.Vocabulary;
|
import eu.dnetlib.dhp.transformation.vocabulary.Vocabulary;
|
||||||
import eu.dnetlib.dhp.transformation.vocabulary.VocabularyHelper;
|
import eu.dnetlib.dhp.transformation.vocabulary.VocabularyHelper;
|
||||||
|
@ -9,6 +8,10 @@ import eu.dnetlib.dhp.utils.DHPUtils;
|
||||||
import eu.dnetlib.message.Message;
|
import eu.dnetlib.message.Message;
|
||||||
import eu.dnetlib.message.MessageManager;
|
import eu.dnetlib.message.MessageManager;
|
||||||
import eu.dnetlib.message.MessageType;
|
import eu.dnetlib.message.MessageType;
|
||||||
|
import java.io.ByteArrayInputStream;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.Objects;
|
||||||
import org.apache.commons.cli.*;
|
import org.apache.commons.cli.*;
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
import org.apache.spark.sql.Dataset;
|
import org.apache.spark.sql.Dataset;
|
||||||
|
@ -21,52 +24,57 @@ import org.dom4j.DocumentException;
|
||||||
import org.dom4j.Node;
|
import org.dom4j.Node;
|
||||||
import org.dom4j.io.SAXReader;
|
import org.dom4j.io.SAXReader;
|
||||||
|
|
||||||
import java.io.ByteArrayInputStream;
|
|
||||||
import java.util.Arrays;
|
|
||||||
import java.util.HashMap;
|
|
||||||
import java.util.Map;
|
|
||||||
import java.util.Objects;
|
|
||||||
|
|
||||||
public class TransformSparkJobNode {
|
public class TransformSparkJobNode {
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
public static void main(String[] args) throws Exception {
|
public static void main(String[] args) throws Exception {
|
||||||
|
|
||||||
final ArgumentApplicationParser parser = new ArgumentApplicationParser(IOUtils.toString(TransformSparkJobNode.class.getResourceAsStream("/eu/dnetlib/dhp/transformation/transformation_input_parameters.json")));
|
final ArgumentApplicationParser parser =
|
||||||
|
new ArgumentApplicationParser(
|
||||||
|
IOUtils.toString(
|
||||||
|
TransformSparkJobNode.class.getResourceAsStream(
|
||||||
|
"/eu/dnetlib/dhp/transformation/transformation_input_parameters.json")));
|
||||||
|
|
||||||
parser.parseArgument(args);
|
parser.parseArgument(args);
|
||||||
|
|
||||||
final String inputPath = parser.get("input");
|
final String inputPath = parser.get("input");
|
||||||
final String outputPath = parser.get("output");
|
final String outputPath = parser.get("output");
|
||||||
final String workflowId = parser.get("workflowId");
|
final String workflowId = parser.get("workflowId");
|
||||||
final String trasformationRule = extractXSLTFromTR(Objects.requireNonNull(DHPUtils.decompressString(parser.get("transformationRule"))));
|
final String trasformationRule =
|
||||||
|
extractXSLTFromTR(
|
||||||
|
Objects.requireNonNull(
|
||||||
|
DHPUtils.decompressString(parser.get("transformationRule"))));
|
||||||
final String master = parser.get("master");
|
final String master = parser.get("master");
|
||||||
final String rabbitUser = parser.get("rabbitUser");
|
final String rabbitUser = parser.get("rabbitUser");
|
||||||
final String rabbitPassword = parser.get("rabbitPassword");
|
final String rabbitPassword = parser.get("rabbitPassword");
|
||||||
final String rabbitHost = parser.get("rabbitHost");
|
final String rabbitHost = parser.get("rabbitHost");
|
||||||
final String rabbitReportQueue = parser.get("rabbitReportQueue");
|
final String rabbitReportQueue = parser.get("rabbitReportQueue");
|
||||||
final long dateOfCollection = new Long(parser.get("dateOfCollection"));
|
final long dateOfCollection = new Long(parser.get("dateOfCollection"));
|
||||||
final boolean test = parser.get("isTest") == null?false: Boolean.valueOf(parser.get("isTest"));
|
final boolean test =
|
||||||
|
parser.get("isTest") == null ? false : Boolean.valueOf(parser.get("isTest"));
|
||||||
|
|
||||||
final SparkSession spark = SparkSession
|
final SparkSession spark =
|
||||||
.builder()
|
SparkSession.builder()
|
||||||
.appName("TransformStoreSparkJob")
|
.appName("TransformStoreSparkJob")
|
||||||
.master(master)
|
.master(master)
|
||||||
.getOrCreate();
|
.getOrCreate();
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
final Encoder<MetadataRecord> encoder = Encoders.bean(MetadataRecord.class);
|
final Encoder<MetadataRecord> encoder = Encoders.bean(MetadataRecord.class);
|
||||||
final Dataset<MetadataRecord> mdstoreInput = spark.read().format("parquet").load(inputPath).as(encoder);
|
final Dataset<MetadataRecord> mdstoreInput =
|
||||||
|
spark.read().format("parquet").load(inputPath).as(encoder);
|
||||||
final LongAccumulator totalItems = spark.sparkContext().longAccumulator("TotalItems");
|
final LongAccumulator totalItems = spark.sparkContext().longAccumulator("TotalItems");
|
||||||
final LongAccumulator errorItems = spark.sparkContext().longAccumulator("errorItems");
|
final LongAccumulator errorItems = spark.sparkContext().longAccumulator("errorItems");
|
||||||
final LongAccumulator transformedItems = spark.sparkContext().longAccumulator("transformedItems");
|
final LongAccumulator transformedItems =
|
||||||
|
spark.sparkContext().longAccumulator("transformedItems");
|
||||||
final Map<String, Vocabulary> vocabularies = new HashMap<>();
|
final Map<String, Vocabulary> vocabularies = new HashMap<>();
|
||||||
vocabularies.put("dnet:languages", VocabularyHelper.getVocabularyFromAPI("dnet:languages"));
|
vocabularies.put("dnet:languages", VocabularyHelper.getVocabularyFromAPI("dnet:languages"));
|
||||||
final TransformFunction transformFunction = new TransformFunction(totalItems, errorItems, transformedItems, trasformationRule, dateOfCollection, vocabularies) ;
|
final TransformFunction transformFunction =
|
||||||
|
new TransformFunction(
|
||||||
|
totalItems,
|
||||||
|
errorItems,
|
||||||
|
transformedItems,
|
||||||
|
trasformationRule,
|
||||||
|
dateOfCollection,
|
||||||
|
vocabularies);
|
||||||
mdstoreInput.map(transformFunction, encoder).write().format("parquet").save(outputPath);
|
mdstoreInput.map(transformFunction, encoder).write().format("parquet").save(outputPath);
|
||||||
if (rabbitHost != null) {
|
if (rabbitHost != null) {
|
||||||
System.out.println("SEND FINAL REPORT");
|
System.out.println("SEND FINAL REPORT");
|
||||||
|
@ -76,14 +84,19 @@ public class TransformSparkJobNode {
|
||||||
reportMap.put("mdStoreSize", "" + transformedItems.value());
|
reportMap.put("mdStoreSize", "" + transformedItems.value());
|
||||||
System.out.println(new Message(workflowId, "Transform", MessageType.REPORT, reportMap));
|
System.out.println(new Message(workflowId, "Transform", MessageType.REPORT, reportMap));
|
||||||
if (!test) {
|
if (!test) {
|
||||||
final MessageManager manager = new MessageManager(rabbitHost, rabbitUser, rabbitPassword, false, false, null);
|
final MessageManager manager =
|
||||||
manager.sendMessage(new Message(workflowId, "Transform", MessageType.REPORT, reportMap), rabbitReportQueue, true, false);
|
new MessageManager(
|
||||||
|
rabbitHost, rabbitUser, rabbitPassword, false, false, null);
|
||||||
|
manager.sendMessage(
|
||||||
|
new Message(workflowId, "Transform", MessageType.REPORT, reportMap),
|
||||||
|
rabbitReportQueue,
|
||||||
|
true,
|
||||||
|
false);
|
||||||
manager.close();
|
manager.close();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
private static String extractXSLTFromTR(final String tr) throws DocumentException {
|
private static String extractXSLTFromTR(final String tr) throws DocumentException {
|
||||||
SAXReader reader = new SAXReader();
|
SAXReader reader = new SAXReader();
|
||||||
Document document = reader.read(new ByteArrayInputStream(tr.getBytes()));
|
Document document = reader.read(new ByteArrayInputStream(tr.getBytes()));
|
||||||
|
|
|
@ -2,18 +2,15 @@ package eu.dnetlib.dhp.transformation.functions;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.transformation.vocabulary.Term;
|
import eu.dnetlib.dhp.transformation.vocabulary.Term;
|
||||||
import eu.dnetlib.dhp.transformation.vocabulary.Vocabulary;
|
import eu.dnetlib.dhp.transformation.vocabulary.Vocabulary;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.Optional;
|
||||||
import net.sf.saxon.s9api.*;
|
import net.sf.saxon.s9api.*;
|
||||||
import scala.Serializable;
|
import scala.Serializable;
|
||||||
|
|
||||||
import java.util.Map;
|
|
||||||
import java.util.Optional;
|
|
||||||
|
|
||||||
public class Cleaner implements ExtensionFunction, Serializable {
|
public class Cleaner implements ExtensionFunction, Serializable {
|
||||||
|
|
||||||
|
|
||||||
private final Map<String, Vocabulary> vocabularies;
|
private final Map<String, Vocabulary> vocabularies;
|
||||||
|
|
||||||
|
|
||||||
public Cleaner(Map<String, Vocabulary> vocabularies) {
|
public Cleaner(Map<String, Vocabulary> vocabularies) {
|
||||||
this.vocabularies = vocabularies;
|
this.vocabularies = vocabularies;
|
||||||
}
|
}
|
||||||
|
@ -30,11 +27,9 @@ public class Cleaner implements ExtensionFunction, Serializable {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public SequenceType[] getArgumentTypes() {
|
public SequenceType[] getArgumentTypes() {
|
||||||
return new SequenceType[]
|
return new SequenceType[] {
|
||||||
{
|
|
||||||
SequenceType.makeSequenceType(ItemType.STRING, OccurrenceIndicator.ONE),
|
SequenceType.makeSequenceType(ItemType.STRING, OccurrenceIndicator.ONE),
|
||||||
SequenceType.makeSequenceType(ItemType.STRING, OccurrenceIndicator.ONE)
|
SequenceType.makeSequenceType(ItemType.STRING, OccurrenceIndicator.ONE)
|
||||||
|
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -42,8 +37,12 @@ public class Cleaner implements ExtensionFunction, Serializable {
|
||||||
public XdmValue call(XdmValue[] xdmValues) throws SaxonApiException {
|
public XdmValue call(XdmValue[] xdmValues) throws SaxonApiException {
|
||||||
final String currentValue = xdmValues[0].itemAt(0).getStringValue();
|
final String currentValue = xdmValues[0].itemAt(0).getStringValue();
|
||||||
final String vocabularyName = xdmValues[1].itemAt(0).getStringValue();
|
final String vocabularyName = xdmValues[1].itemAt(0).getStringValue();
|
||||||
Optional<Term> cleanedValue = vocabularies.get(vocabularyName).getTerms().stream().filter(it -> it.getNativeName().equalsIgnoreCase(currentValue)).findAny();
|
Optional<Term> cleanedValue =
|
||||||
|
vocabularies.get(vocabularyName).getTerms().stream()
|
||||||
|
.filter(it -> it.getNativeName().equalsIgnoreCase(currentValue))
|
||||||
|
.findAny();
|
||||||
|
|
||||||
return new XdmAtomicValue(cleanedValue.isPresent()?cleanedValue.get().getCode():currentValue);
|
return new XdmAtomicValue(
|
||||||
|
cleanedValue.isPresent() ? cleanedValue.get().getCode() : currentValue);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -10,7 +10,6 @@ public class Term implements Serializable {
|
||||||
private String code;
|
private String code;
|
||||||
private String synonyms;
|
private String synonyms;
|
||||||
|
|
||||||
|
|
||||||
public String getEnglishName() {
|
public String getEnglishName() {
|
||||||
return englishName;
|
return englishName;
|
||||||
}
|
}
|
||||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue