merged master

This commit is contained in:
Sandro La Bruzzo 2020-04-20 14:04:40 +02:00
commit b2c872cb4d
241 changed files with 15238 additions and 10022 deletions

View File

@ -3,7 +3,6 @@ package eu.dnetlib.maven.plugin.properties;
import java.io.File;
import java.util.ArrayList;
import java.util.List;
import org.apache.commons.lang.ArrayUtils;
import org.apache.commons.lang.StringUtils;
import org.apache.maven.plugin.AbstractMojo;
@ -12,60 +11,63 @@ import org.apache.maven.plugin.MojoFailureException;
/**
* Generates oozie properties which were not provided from commandline.
* @author mhorst
*
* @author mhorst
* @goal generate-properties
*/
public class GenerateOoziePropertiesMojo extends AbstractMojo {
public static final String PROPERTY_NAME_WF_SOURCE_DIR = "workflow.source.dir";
public static final String PROPERTY_NAME_SANDBOX_NAME = "sandboxName";
public static final String PROPERTY_NAME_WF_SOURCE_DIR = "workflow.source.dir";
public static final String PROPERTY_NAME_SANDBOX_NAME = "sandboxName";
private final String[] limiters = {"dhp", "dnetlib", "eu"};
private final String[] limiters = {"dhp", "dnetlib", "eu"};
@Override
public void execute() throws MojoExecutionException, MojoFailureException {
if (System.getProperties().containsKey(PROPERTY_NAME_WF_SOURCE_DIR) &&
!System.getProperties().containsKey(PROPERTY_NAME_SANDBOX_NAME)) {
String generatedSandboxName = generateSandboxName(System.getProperties().getProperty(
PROPERTY_NAME_WF_SOURCE_DIR));
if (generatedSandboxName!=null) {
System.getProperties().setProperty(PROPERTY_NAME_SANDBOX_NAME,
generatedSandboxName);
} else {
System.out.println("unable to generate sandbox name from path: " +
System.getProperties().getProperty(PROPERTY_NAME_WF_SOURCE_DIR));
}
}
if (System.getProperties().containsKey(PROPERTY_NAME_WF_SOURCE_DIR)
&& !System.getProperties().containsKey(PROPERTY_NAME_SANDBOX_NAME)) {
String generatedSandboxName =
generateSandboxName(
System.getProperties().getProperty(PROPERTY_NAME_WF_SOURCE_DIR));
if (generatedSandboxName != null) {
System.getProperties()
.setProperty(PROPERTY_NAME_SANDBOX_NAME, generatedSandboxName);
} else {
System.out.println(
"unable to generate sandbox name from path: "
+ System.getProperties().getProperty(PROPERTY_NAME_WF_SOURCE_DIR));
}
}
}
/**
* Generates sandbox name from workflow source directory.
*
* @param wfSourceDir
* @return generated sandbox name
*/
private String generateSandboxName(String wfSourceDir) {
// utilize all dir names until finding one of the limiters
List<String> sandboxNameParts = new ArrayList<String>();
String[] tokens = StringUtils.split(wfSourceDir, File.separatorChar);
ArrayUtils.reverse(tokens);
if (tokens.length>0) {
for (String token : tokens) {
for (String limiter : limiters) {
if (limiter.equals(token)) {
return sandboxNameParts.size()>0?
StringUtils.join(sandboxNameParts.toArray()):null;
}
}
if (sandboxNameParts.size()>0) {
sandboxNameParts.add(0, File.separator);
}
sandboxNameParts.add(0, token);
}
return StringUtils.join(sandboxNameParts.toArray());
} else {
return null;
}
// utilize all dir names until finding one of the limiters
List<String> sandboxNameParts = new ArrayList<String>();
String[] tokens = StringUtils.split(wfSourceDir, File.separatorChar);
ArrayUtils.reverse(tokens);
if (tokens.length > 0) {
for (String token : tokens) {
for (String limiter : limiters) {
if (limiter.equals(token)) {
return sandboxNameParts.size() > 0
? StringUtils.join(sandboxNameParts.toArray())
: null;
}
}
if (sandboxNameParts.size() > 0) {
sandboxNameParts.add(0, File.separator);
}
sandboxNameParts.add(0, token);
}
return StringUtils.join(sandboxNameParts.toArray());
} else {
return null;
}
}
}

View File

@ -1,19 +1,17 @@
/**
* Licensed under the Educational Community License, Version 2.0 (the "License"); you may not use
* this file except in compliance with the License. You may obtain a copy of the License at
*
* Licensed under the Educational Community License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* <p>http://www.opensource.org/licenses/ecl2.php
*
* http://www.opensource.org/licenses/ecl2.php
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* <p>Unless required by applicable law or agreed to in writing, software distributed under the
* License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
* express or implied. See the License for the specific language governing permissions and
* limitations under the License.
*/
package eu.dnetlib.maven.plugin.properties;
import edu.umd.cs.findbugs.annotations.SuppressFBWarnings;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
@ -26,7 +24,6 @@ import java.util.List;
import java.util.Map.Entry;
import java.util.Properties;
import java.util.Set;
import org.apache.commons.io.FileUtils;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang.StringUtils;
@ -38,28 +35,23 @@ import org.springframework.core.io.DefaultResourceLoader;
import org.springframework.core.io.Resource;
import org.springframework.core.io.ResourceLoader;
import edu.umd.cs.findbugs.annotations.SuppressFBWarnings;
/**
* Writes project properties for the keys listed in specified properties files.
* Based on:
* Writes project properties for the keys listed in specified properties files. Based on:
* http://site.kuali.org/maven/plugins/properties-maven-plugin/1.3.2/write-project-properties-mojo.html
*
* @author mhorst
* @goal write-project-properties
*/
public class WritePredefinedProjectProperties extends AbstractMojo {
private static final String CR = "\r";
private static final String LF = "\n";
private static final String TAB = "\t";
private static final String CR = "\r";
private static final String LF = "\n";
private static final String TAB = "\t";
protected static final String PROPERTY_PREFIX_ENV = "env.";
private static final String ENCODING_UTF8 = "utf8";
/**
* @parameter property="properties.includePropertyKeysFromFiles"
*/
private String[] includePropertyKeysFromFiles;
/** @parameter property="properties.includePropertyKeysFromFiles" */
private String[] includePropertyKeysFromFiles;
/**
* @parameter default-value="${project}"
@ -72,37 +64,39 @@ public class WritePredefinedProjectProperties extends AbstractMojo {
* The file that properties will be written to
*
* @parameter property="properties.outputFile"
* default-value="${project.build.directory}/properties/project.properties";
* default-value="${project.build.directory}/properties/project.properties";
* @required
*/
protected File outputFile;
/**
* If true, the plugin will silently ignore any non-existent properties files, and the build will continue
/**
* If true, the plugin will silently ignore any non-existent properties files, and the build
* will continue
*
* @parameter property="properties.quiet" default-value="true"
*/
private boolean quiet;
/**
* Comma separated list of characters to escape when writing property values. cr=carriage return, lf=linefeed,
* tab=tab. Any other values are taken literally.
* Comma separated list of characters to escape when writing property values. cr=carriage
* return, lf=linefeed, tab=tab. Any other values are taken literally.
*
* @parameter default-value="cr,lf,tab" property="properties.escapeChars"
*/
private String escapeChars;
/**
* If true, the plugin will include system properties when writing the properties file. System properties override
* both environment variables and project properties.
* If true, the plugin will include system properties when writing the properties file. System
* properties override both environment variables and project properties.
*
* @parameter default-value="false" property="properties.includeSystemProperties"
*/
private boolean includeSystemProperties;
/**
* If true, the plugin will include environment variables when writing the properties file. Environment variables
* are prefixed with "env". Environment variables override project properties.
* If true, the plugin will include environment variables when writing the properties file.
* Environment variables are prefixed with "env". Environment variables override project
* properties.
*
* @parameter default-value="false" property="properties.includeEnvironmentVariables"
*/
@ -116,8 +110,8 @@ public class WritePredefinedProjectProperties extends AbstractMojo {
private String exclude;
/**
* Comma separated set of properties to write to the properties file. If provided, only the properties matching
* those supplied here will be written to the properties file.
* Comma separated set of properties to write to the properties file. If provided, only the
* properties matching those supplied here will be written to the properties file.
*
* @parameter property="properties.include"
*/
@ -127,7 +121,7 @@ public class WritePredefinedProjectProperties extends AbstractMojo {
* @see org.apache.maven.plugin.AbstractMojo#execute()
*/
@Override
@SuppressFBWarnings({"NP_UNWRITTEN_FIELD","UWF_UNWRITTEN_FIELD"})
@SuppressFBWarnings({"NP_UNWRITTEN_FIELD", "UWF_UNWRITTEN_FIELD"})
public void execute() throws MojoExecutionException, MojoFailureException {
Properties properties = new Properties();
// Add project properties
@ -149,10 +143,11 @@ public class WritePredefinedProjectProperties extends AbstractMojo {
getLog().info("Creating " + outputFile);
writeProperties(outputFile, comment, properties, escapeTokens);
}
}
/**
* Provides environment variables.
*
* @return environment variables
*/
protected static Properties getEnvironmentVariables() {
@ -165,32 +160,34 @@ public class WritePredefinedProjectProperties extends AbstractMojo {
/**
* Removes properties which should not be written.
*
* @param properties
* @param omitCSV
* @param includeCSV
* @throws MojoExecutionException
*/
protected void trim(Properties properties, String omitCSV, String includeCSV) throws MojoExecutionException {
protected void trim(Properties properties, String omitCSV, String includeCSV)
throws MojoExecutionException {
List<String> omitKeys = getListFromCSV(omitCSV);
for (String key : omitKeys) {
properties.remove(key);
}
List<String> includeKeys = getListFromCSV(includeCSV);
// mh: including keys from predefined properties
if (includePropertyKeysFromFiles!=null && includePropertyKeysFromFiles.length>0) {
for (String currentIncludeLoc : includePropertyKeysFromFiles) {
if (validate(currentIncludeLoc)) {
Properties p = getProperties(currentIncludeLoc);
for (String key : p.stringPropertyNames()) {
includeKeys.add(key);
}
}
}
// mh: including keys from predefined properties
if (includePropertyKeysFromFiles != null && includePropertyKeysFromFiles.length > 0) {
for (String currentIncludeLoc : includePropertyKeysFromFiles) {
if (validate(currentIncludeLoc)) {
Properties p = getProperties(currentIncludeLoc);
for (String key : p.stringPropertyNames()) {
includeKeys.add(key);
}
}
}
}
if (includeKeys!=null && !includeKeys.isEmpty()) {
// removing only when include keys provided
Set<String> keys = properties.stringPropertyNames();
if (includeKeys != null && !includeKeys.isEmpty()) {
// removing only when include keys provided
Set<String> keys = properties.stringPropertyNames();
for (String key : keys) {
if (!includeKeys.contains(key)) {
properties.remove(key);
@ -201,6 +198,7 @@ public class WritePredefinedProjectProperties extends AbstractMojo {
/**
* Checks whether file exists.
*
* @param location
* @return true when exists, false otherwise.
*/
@ -219,6 +217,7 @@ public class WritePredefinedProjectProperties extends AbstractMojo {
/**
* Validates resource location.
*
* @param location
* @return true when valid, false otherwise
* @throws MojoExecutionException
@ -238,6 +237,7 @@ public class WritePredefinedProjectProperties extends AbstractMojo {
/**
* Provides input stream.
*
* @param location
* @return input stream
* @throws IOException
@ -254,6 +254,7 @@ public class WritePredefinedProjectProperties extends AbstractMojo {
/**
* Creates properties for given location.
*
* @param location
* @return properties for given location
* @throws MojoExecutionException
@ -278,6 +279,7 @@ public class WritePredefinedProjectProperties extends AbstractMojo {
/**
* Provides escape characters.
*
* @param escapeChars
* @return escape characters
*/
@ -293,6 +295,7 @@ public class WritePredefinedProjectProperties extends AbstractMojo {
/**
* Provides real token.
*
* @param token
* @return real token
*/
@ -310,6 +313,7 @@ public class WritePredefinedProjectProperties extends AbstractMojo {
/**
* Returns content.
*
* @param comment
* @param properties
* @param escapeTokens
@ -332,13 +336,15 @@ public class WritePredefinedProjectProperties extends AbstractMojo {
/**
* Writes properties to given file.
*
* @param file
* @param comment
* @param properties
* @param escapeTokens
* @throws MojoExecutionException
*/
protected void writeProperties(File file, String comment, Properties properties, List<String> escapeTokens)
protected void writeProperties(
File file, String comment, Properties properties, List<String> escapeTokens)
throws MojoExecutionException {
try {
String content = getContent(comment, properties, escapeTokens);
@ -350,12 +356,13 @@ public class WritePredefinedProjectProperties extends AbstractMojo {
/**
* Escapes characters.
*
* @param s
* @param escapeChars
* @return
*/
protected String escape(String s, List<String> escapeChars) {
String result = s;
String result = s;
for (String escapeChar : escapeChars) {
result = result.replace(escapeChar, getReplacementToken(escapeChar));
}
@ -364,6 +371,7 @@ public class WritePredefinedProjectProperties extends AbstractMojo {
/**
* Provides replacement token.
*
* @param escapeChar
* @return replacement token
*/
@ -380,21 +388,22 @@ public class WritePredefinedProjectProperties extends AbstractMojo {
}
/**
* Returns list from csv.
* @param csv
* @return list of values generated from CSV
*/
protected static final List<String> getListFromCSV(String csv) {
if (StringUtils.isBlank(csv)) {
return new ArrayList<String>();
}
List<String> list = new ArrayList<String>();
String[] tokens = StringUtils.split(csv, ",");
for (String token : tokens) {
list.add(token.trim());
}
return list;
}
* Returns list from csv.
*
* @param csv
* @return list of values generated from CSV
*/
protected static final List<String> getListFromCSV(String csv) {
if (StringUtils.isBlank(csv)) {
return new ArrayList<String>();
}
List<String> list = new ArrayList<String>();
String[] tokens = StringUtils.split(csv, ",");
for (String token : tokens) {
list.add(token.trim());
}
return list;
}
public void setIncludeSystemProperties(boolean includeSystemProperties) {
this.includeSystemProperties = includeSystemProperties;
@ -420,17 +429,16 @@ public class WritePredefinedProjectProperties extends AbstractMojo {
this.quiet = quiet;
}
/**
* Sets property files for which keys properties should be included.
* @param includePropertyKeysFromFiles
*/
public void setIncludePropertyKeysFromFiles(
String[] includePropertyKeysFromFiles) {
if (includePropertyKeysFromFiles!=null) {
this.includePropertyKeysFromFiles = Arrays.copyOf(
includePropertyKeysFromFiles,
includePropertyKeysFromFiles.length);
}
}
/**
* Sets property files for which keys properties should be included.
*
* @param includePropertyKeysFromFiles
*/
public void setIncludePropertyKeysFromFiles(String[] includePropertyKeysFromFiles) {
if (includePropertyKeysFromFiles != null) {
this.includePropertyKeysFromFiles =
Arrays.copyOf(
includePropertyKeysFromFiles, includePropertyKeysFromFiles.length);
}
}
}

View File

@ -1,16 +1,13 @@
package eu.dnetlib.maven.plugin.properties;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import static eu.dnetlib.maven.plugin.properties.GenerateOoziePropertiesMojo.PROPERTY_NAME_SANDBOX_NAME;
import static eu.dnetlib.maven.plugin.properties.GenerateOoziePropertiesMojo.PROPERTY_NAME_WF_SOURCE_DIR;
import static org.junit.jupiter.api.Assertions.*;
/**
* @author mhorst, claudio.atzori
*
*/
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
/** @author mhorst, claudio.atzori */
public class GenerateOoziePropertiesMojoTest {
private GenerateOoziePropertiesMojo mojo = new GenerateOoziePropertiesMojo();
@ -27,7 +24,7 @@ public class GenerateOoziePropertiesMojoTest {
mojo.execute();
// assert
assertNull(System.getProperty(PROPERTY_NAME_SANDBOX_NAME));
assertNull(System.getProperty(PROPERTY_NAME_SANDBOX_NAME));
}
@Test
@ -96,5 +93,4 @@ public class GenerateOoziePropertiesMojoTest {
// assert
assertEquals("wf/transformers", System.getProperty(PROPERTY_NAME_SANDBOX_NAME));
}
}

View File

@ -1,5 +1,12 @@
package eu.dnetlib.maven.plugin.properties;
import static eu.dnetlib.maven.plugin.properties.WritePredefinedProjectProperties.PROPERTY_PREFIX_ENV;
import static org.junit.jupiter.api.Assertions.*;
import static org.mockito.Mockito.doReturn;
import static org.mockito.Mockito.lenient;
import java.io.*;
import java.util.Properties;
import org.apache.maven.plugin.MojoExecutionException;
import org.apache.maven.project.MavenProject;
import org.junit.jupiter.api.*;
@ -9,23 +16,11 @@ import org.mockito.Mock;
import org.mockito.MockitoAnnotations;
import org.mockito.junit.jupiter.MockitoExtension;
import java.io.*;
import java.util.Properties;
import static eu.dnetlib.maven.plugin.properties.WritePredefinedProjectProperties.PROPERTY_PREFIX_ENV;
import static org.junit.jupiter.api.Assertions.*;
import static org.mockito.Mockito.doReturn;
import static org.mockito.Mockito.lenient;
/**
* @author mhorst, claudio.atzori
*
*/
/** @author mhorst, claudio.atzori */
@ExtendWith(MockitoExtension.class)
public class WritePredefinedProjectPropertiesTest {
@Mock
private MavenProject mavenProject;
@Mock private MavenProject mavenProject;
private WritePredefinedProjectProperties mojo;
@ -86,7 +81,8 @@ public class WritePredefinedProjectPropertiesTest {
}
@Test
public void testExecuteWithProjectPropertiesExclusion(@TempDir File testFolder) throws Exception {
public void testExecuteWithProjectPropertiesExclusion(@TempDir File testFolder)
throws Exception {
// given
String key = "projectPropertyKey";
String value = "projectPropertyValue";
@ -110,7 +106,8 @@ public class WritePredefinedProjectPropertiesTest {
}
@Test
public void testExecuteWithProjectPropertiesInclusion(@TempDir File testFolder) throws Exception {
public void testExecuteWithProjectPropertiesInclusion(@TempDir File testFolder)
throws Exception {
// given
String key = "projectPropertyKey";
String value = "projectPropertyValue";
@ -134,7 +131,8 @@ public class WritePredefinedProjectPropertiesTest {
}
@Test
public void testExecuteIncludingPropertyKeysFromFile(@TempDir File testFolder) throws Exception {
public void testExecuteIncludingPropertyKeysFromFile(@TempDir File testFolder)
throws Exception {
// given
String key = "projectPropertyKey";
String value = "projectPropertyValue";
@ -150,7 +148,8 @@ public class WritePredefinedProjectPropertiesTest {
includedProperties.setProperty(includedKey, "irrelevantValue");
includedProperties.store(new FileWriter(includedPropertiesFile), null);
mojo.setIncludePropertyKeysFromFiles(new String[] {includedPropertiesFile.getAbsolutePath()});
mojo.setIncludePropertyKeysFromFiles(
new String[] {includedPropertiesFile.getAbsolutePath()});
// execute
mojo.execute();
@ -164,7 +163,8 @@ public class WritePredefinedProjectPropertiesTest {
}
@Test
public void testExecuteIncludingPropertyKeysFromClasspathResource(@TempDir File testFolder) throws Exception {
public void testExecuteIncludingPropertyKeysFromClasspathResource(@TempDir File testFolder)
throws Exception {
// given
String key = "projectPropertyKey";
String value = "projectPropertyValue";
@ -175,7 +175,8 @@ public class WritePredefinedProjectPropertiesTest {
projectProperties.setProperty(includedKey, includedValue);
doReturn(projectProperties).when(mavenProject).getProperties();
mojo.setIncludePropertyKeysFromFiles(new String[] {"/eu/dnetlib/maven/plugin/properties/included.properties"});
mojo.setIncludePropertyKeysFromFiles(
new String[] {"/eu/dnetlib/maven/plugin/properties/included.properties"});
// execute
mojo.execute();
@ -207,7 +208,8 @@ public class WritePredefinedProjectPropertiesTest {
}
@Test
public void testExecuteIncludingPropertyKeysFromXmlFile(@TempDir File testFolder) throws Exception {
public void testExecuteIncludingPropertyKeysFromXmlFile(@TempDir File testFolder)
throws Exception {
// given
String key = "projectPropertyKey";
String value = "projectPropertyValue";
@ -223,7 +225,8 @@ public class WritePredefinedProjectPropertiesTest {
includedProperties.setProperty(includedKey, "irrelevantValue");
includedProperties.storeToXML(new FileOutputStream(includedPropertiesFile), null);
mojo.setIncludePropertyKeysFromFiles(new String[] {includedPropertiesFile.getAbsolutePath()});
mojo.setIncludePropertyKeysFromFiles(
new String[] {includedPropertiesFile.getAbsolutePath()});
// execute
mojo.execute();
@ -237,7 +240,8 @@ public class WritePredefinedProjectPropertiesTest {
}
@Test
public void testExecuteIncludingPropertyKeysFromInvalidXmlFile(@TempDir File testFolder) throws Exception {
public void testExecuteIncludingPropertyKeysFromInvalidXmlFile(@TempDir File testFolder)
throws Exception {
// given
String key = "projectPropertyKey";
String value = "projectPropertyValue";
@ -253,7 +257,8 @@ public class WritePredefinedProjectPropertiesTest {
includedProperties.setProperty(includedKey, "irrelevantValue");
includedProperties.store(new FileOutputStream(includedPropertiesFile), null);
mojo.setIncludePropertyKeysFromFiles(new String[] {includedPropertiesFile.getAbsolutePath()});
mojo.setIncludePropertyKeysFromFiles(
new String[] {includedPropertiesFile.getAbsolutePath()});
// execute
Assertions.assertThrows(MojoExecutionException.class, () -> mojo.execute());
@ -296,7 +301,7 @@ public class WritePredefinedProjectPropertiesTest {
Properties storedProperties = getStoredProperties(testFolder);
assertTrue(storedProperties.size() > 0);
for (Object currentKey : storedProperties.keySet()) {
assertTrue(((String)currentKey).startsWith(PROPERTY_PREFIX_ENV));
assertTrue(((String) currentKey).startsWith(PROPERTY_PREFIX_ENV));
}
}
@ -320,7 +325,8 @@ public class WritePredefinedProjectPropertiesTest {
}
@Test
public void testExecuteWithSystemPropertiesAndEscapeChars(@TempDir File testFolder) throws Exception {
public void testExecuteWithSystemPropertiesAndEscapeChars(@TempDir File testFolder)
throws Exception {
// given
String key = "systemPropertyKey ";
String value = "systemPropertyValue";
@ -347,7 +353,8 @@ public class WritePredefinedProjectPropertiesTest {
return new File(testFolder, "test.properties");
}
private Properties getStoredProperties(File testFolder) throws FileNotFoundException, IOException {
private Properties getStoredProperties(File testFolder)
throws FileNotFoundException, IOException {
Properties properties = new Properties();
properties.load(new FileInputStream(getPropertiesFileLocation(testFolder)));
return properties;

View File

@ -3,47 +3,45 @@ package eu.dnetlib.collector.worker.model;
import java.util.HashMap;
import java.util.Map;
public class ApiDescriptor {
private String id;
private String id;
private String baseUrl;
private String baseUrl;
private String protocol;
private String protocol;
private Map<String, String> params = new HashMap<>();
private Map<String, String> params = new HashMap<>();
public String getBaseUrl() {
return baseUrl;
}
public String getBaseUrl() {
return baseUrl;
}
public void setBaseUrl(final String baseUrl) {
this.baseUrl = baseUrl;
}
public void setBaseUrl(final String baseUrl) {
this.baseUrl = baseUrl;
}
public String getId() {
return id;
}
public String getId() {
return id;
}
public void setId(final String id) {
this.id = id;
}
public void setId(final String id) {
this.id = id;
}
public Map<String, String> getParams() {
return params;
}
public Map<String, String> getParams() {
return params;
}
public void setParams(final HashMap<String, String> params) {
this.params = params;
}
public void setParams(final HashMap<String, String> params) {
this.params = params;
}
public String getProtocol() {
return protocol;
}
public void setProtocol(final String protocol) {
this.protocol = protocol;
}
public String getProtocol() {
return protocol;
}
public void setProtocol(final String protocol) {
this.protocol = protocol;
}
}

View File

@ -2,7 +2,6 @@ package eu.dnetlib.data.mdstore.manager.common.model;
import java.io.Serializable;
import java.util.UUID;
import javax.persistence.Column;
import javax.persistence.Entity;
import javax.persistence.Id;
@ -12,108 +11,107 @@ import javax.persistence.Table;
@Table(name = "mdstores")
public class MDStore implements Serializable {
/**
*
*/
private static final long serialVersionUID = 3160530489149700055L;
/** */
private static final long serialVersionUID = 3160530489149700055L;
@Id
@Column(name = "id")
private String id;
@Id
@Column(name = "id")
private String id;
@Column(name = "format")
private String format;
@Column(name = "format")
private String format;
@Column(name = "layout")
private String layout;
@Column(name = "layout")
private String layout;
@Column(name = "interpretation")
private String interpretation;
@Column(name = "interpretation")
private String interpretation;
@Column(name = "datasource_name")
private String datasourceName;
@Column(name = "datasource_name")
private String datasourceName;
@Column(name = "datasource_id")
private String datasourceId;
@Column(name = "datasource_id")
private String datasourceId;
@Column(name = "api_id")
private String apiId;
@Column(name = "api_id")
private String apiId;
public String getId() {
return id;
}
public String getId() {
return id;
}
public void setId(final String id) {
this.id = id;
}
public void setId(final String id) {
this.id = id;
}
public String getFormat() {
return format;
}
public String getFormat() {
return format;
}
public void setFormat(final String format) {
this.format = format;
}
public void setFormat(final String format) {
this.format = format;
}
public String getLayout() {
return layout;
}
public String getLayout() {
return layout;
}
public void setLayout(final String layout) {
this.layout = layout;
}
public void setLayout(final String layout) {
this.layout = layout;
}
public String getInterpretation() {
return interpretation;
}
public String getInterpretation() {
return interpretation;
}
public void setInterpretation(final String interpretation) {
this.interpretation = interpretation;
}
public void setInterpretation(final String interpretation) {
this.interpretation = interpretation;
}
public String getDatasourceName() {
return datasourceName;
}
public String getDatasourceName() {
return datasourceName;
}
public void setDatasourceName(final String datasourceName) {
this.datasourceName = datasourceName;
}
public void setDatasourceName(final String datasourceName) {
this.datasourceName = datasourceName;
}
public String getDatasourceId() {
return datasourceId;
}
public String getDatasourceId() {
return datasourceId;
}
public void setDatasourceId(final String datasourceId) {
this.datasourceId = datasourceId;
}
public void setDatasourceId(final String datasourceId) {
this.datasourceId = datasourceId;
}
public String getApiId() {
return apiId;
}
public String getApiId() {
return apiId;
}
public void setApiId(final String apiId) {
this.apiId = apiId;
}
public void setApiId(final String apiId) {
this.apiId = apiId;
}
public static MDStore newInstance(final String format, final String layout, final String interpretation) {
return newInstance(format, layout, interpretation, null, null, null);
}
public static MDStore newInstance(final String format,
final String layout,
final String interpretation,
final String dsName,
final String dsId,
final String apiId) {
final MDStore md = new MDStore();
md.setId("md-" + UUID.randomUUID());
md.setFormat(format);
md.setLayout(layout);
md.setInterpretation(interpretation);
md.setDatasourceName(dsName);
md.setDatasourceId(dsId);
md.setApiId(apiId);
return md;
}
public static MDStore newInstance(
final String format, final String layout, final String interpretation) {
return newInstance(format, layout, interpretation, null, null, null);
}
public static MDStore newInstance(
final String format,
final String layout,
final String interpretation,
final String dsName,
final String dsId,
final String apiId) {
final MDStore md = new MDStore();
md.setId("md-" + UUID.randomUUID());
md.setFormat(format);
md.setLayout(layout);
md.setInterpretation(interpretation);
md.setDatasourceName(dsName);
md.setDatasourceId(dsId);
md.setApiId(apiId);
return md;
}
}

View File

@ -1,7 +1,6 @@
package eu.dnetlib.data.mdstore.manager.common.model;
import java.io.Serializable;
import javax.persistence.Column;
import javax.persistence.Entity;
import javax.persistence.Id;
@ -11,42 +10,40 @@ import javax.persistence.Table;
@Table(name = "mdstore_current_versions")
public class MDStoreCurrentVersion implements Serializable {
/**
*
*/
private static final long serialVersionUID = -4757725888593745773L;
/** */
private static final long serialVersionUID = -4757725888593745773L;
@Id
@Column(name = "mdstore")
private String mdstore;
@Id
@Column(name = "mdstore")
private String mdstore;
@Column(name = "current_version")
private String currentVersion;
@Column(name = "current_version")
private String currentVersion;
public String getMdstore() {
return mdstore;
}
public String getMdstore() {
return mdstore;
}
public void setMdstore(final String mdstore) {
this.mdstore = mdstore;
}
public void setMdstore(final String mdstore) {
this.mdstore = mdstore;
}
public String getCurrentVersion() {
return currentVersion;
}
public String getCurrentVersion() {
return currentVersion;
}
public void setCurrentVersion(final String currentVersion) {
this.currentVersion = currentVersion;
}
public void setCurrentVersion(final String currentVersion) {
this.currentVersion = currentVersion;
}
public static MDStoreCurrentVersion newInstance(final String mdId, final String versionId) {
final MDStoreCurrentVersion cv = new MDStoreCurrentVersion();
cv.setMdstore(mdId);
cv.setCurrentVersion(versionId);
return cv;
}
public static MDStoreCurrentVersion newInstance(final String mdId, final String versionId) {
final MDStoreCurrentVersion cv = new MDStoreCurrentVersion();
cv.setMdstore(mdId);
cv.setCurrentVersion(versionId);
return cv;
}
public static MDStoreCurrentVersion newInstance(final MDStoreVersion v) {
return newInstance(v.getMdstore(), v.getId());
}
public static MDStoreCurrentVersion newInstance(final MDStoreVersion v) {
return newInstance(v.getMdstore(), v.getId());
}
}

View File

@ -2,7 +2,6 @@ package eu.dnetlib.data.mdstore.manager.common.model;
import java.io.Serializable;
import java.util.Date;
import javax.persistence.Column;
import javax.persistence.Entity;
import javax.persistence.Id;
@ -14,88 +13,85 @@ import javax.persistence.TemporalType;
@Table(name = "mdstore_versions")
public class MDStoreVersion implements Serializable {
/**
*
*/
private static final long serialVersionUID = -4763494442274298339L;
/** */
private static final long serialVersionUID = -4763494442274298339L;
@Id
@Column(name = "id")
private String id;
@Id
@Column(name = "id")
private String id;
@Column(name = "mdstore")
private String mdstore;
@Column(name = "mdstore")
private String mdstore;
@Column(name = "writing")
private boolean writing;
@Column(name = "writing")
private boolean writing;
@Column(name = "readcount")
private int readCount = 0;
@Column(name = "readcount")
private int readCount = 0;
@Column(name = "lastupdate")
@Temporal(TemporalType.TIMESTAMP)
private Date lastUpdate;
@Column(name = "lastupdate")
@Temporal(TemporalType.TIMESTAMP)
private Date lastUpdate;
@Column(name = "size")
private long size = 0;
@Column(name = "size")
private long size = 0;
public static MDStoreVersion newInstance(final String mdId, final boolean writing) {
final MDStoreVersion t = new MDStoreVersion();
t.setId(mdId + "-" + new Date().getTime());
t.setMdstore(mdId);
t.setLastUpdate(null);
t.setWriting(writing);
t.setReadCount(0);
t.setSize(0);
return t;
}
public static MDStoreVersion newInstance(final String mdId, final boolean writing) {
final MDStoreVersion t = new MDStoreVersion();
t.setId(mdId + "-" + new Date().getTime());
t.setMdstore(mdId);
t.setLastUpdate(null);
t.setWriting(writing);
t.setReadCount(0);
t.setSize(0);
return t;
}
public String getId() {
return id;
}
public String getId() {
return id;
}
public void setId(final String id) {
this.id = id;
}
public void setId(final String id) {
this.id = id;
}
public String getMdstore() {
return mdstore;
}
public String getMdstore() {
return mdstore;
}
public void setMdstore(final String mdstore) {
this.mdstore = mdstore;
}
public void setMdstore(final String mdstore) {
this.mdstore = mdstore;
}
public boolean isWriting() {
return writing;
}
public boolean isWriting() {
return writing;
}
public void setWriting(final boolean writing) {
this.writing = writing;
}
public void setWriting(final boolean writing) {
this.writing = writing;
}
public int getReadCount() {
return readCount;
}
public int getReadCount() {
return readCount;
}
public void setReadCount(final int readCount) {
this.readCount = readCount;
}
public void setReadCount(final int readCount) {
this.readCount = readCount;
}
public Date getLastUpdate() {
return lastUpdate;
}
public Date getLastUpdate() {
return lastUpdate;
}
public void setLastUpdate(final Date lastUpdate) {
this.lastUpdate = lastUpdate;
}
public void setLastUpdate(final Date lastUpdate) {
this.lastUpdate = lastUpdate;
}
public long getSize() {
return size;
}
public void setSize(final long size) {
this.size = size;
}
public long getSize() {
return size;
}
public void setSize(final long size) {
this.size = size;
}
}

View File

@ -2,7 +2,6 @@ package eu.dnetlib.data.mdstore.manager.common.model;
import java.io.Serializable;
import java.util.Date;
import javax.persistence.Column;
import javax.persistence.Entity;
import javax.persistence.Id;
@ -14,132 +13,129 @@ import javax.persistence.TemporalType;
@Table(name = "mdstores_with_info")
public class MDStoreWithInfo implements Serializable {
/**
*
*/
private static final long serialVersionUID = -8445784770687571492L;
/** */
private static final long serialVersionUID = -8445784770687571492L;
@Id
@Column(name = "id")
private String id;
@Id
@Column(name = "id")
private String id;
@Column(name = "format")
private String format;
@Column(name = "format")
private String format;
@Column(name = "layout")
private String layout;
@Column(name = "layout")
private String layout;
@Column(name = "interpretation")
private String interpretation;
@Column(name = "interpretation")
private String interpretation;
@Column(name = "datasource_name")
private String datasourceName;
@Column(name = "datasource_name")
private String datasourceName;
@Column(name = "datasource_id")
private String datasourceId;
@Column(name = "datasource_id")
private String datasourceId;
@Column(name = "api_id")
private String apiId;
@Column(name = "api_id")
private String apiId;
@Column(name = "current_version")
private String currentVersion;
@Column(name = "current_version")
private String currentVersion;
@Column(name = "lastupdate")
@Temporal(TemporalType.TIMESTAMP)
private Date lastUpdate;
@Column(name = "lastupdate")
@Temporal(TemporalType.TIMESTAMP)
private Date lastUpdate;
@Column(name = "size")
private long size = 0;
@Column(name = "size")
private long size = 0;
@Column(name = "n_versions")
private long numberOfVersions = 0;
@Column(name = "n_versions")
private long numberOfVersions = 0;
public String getId() {
return id;
}
public String getId() {
return id;
}
public void setId(final String id) {
this.id = id;
}
public void setId(final String id) {
this.id = id;
}
public String getFormat() {
return format;
}
public String getFormat() {
return format;
}
public void setFormat(final String format) {
this.format = format;
}
public void setFormat(final String format) {
this.format = format;
}
public String getLayout() {
return layout;
}
public String getLayout() {
return layout;
}
public void setLayout(final String layout) {
this.layout = layout;
}
public void setLayout(final String layout) {
this.layout = layout;
}
public String getInterpretation() {
return interpretation;
}
public String getInterpretation() {
return interpretation;
}
public void setInterpretation(final String interpretation) {
this.interpretation = interpretation;
}
public void setInterpretation(final String interpretation) {
this.interpretation = interpretation;
}
public String getDatasourceName() {
return datasourceName;
}
public String getDatasourceName() {
return datasourceName;
}
public void setDatasourceName(final String datasourceName) {
this.datasourceName = datasourceName;
}
public void setDatasourceName(final String datasourceName) {
this.datasourceName = datasourceName;
}
public String getDatasourceId() {
return datasourceId;
}
public String getDatasourceId() {
return datasourceId;
}
public void setDatasourceId(final String datasourceId) {
this.datasourceId = datasourceId;
}
public void setDatasourceId(final String datasourceId) {
this.datasourceId = datasourceId;
}
public String getApiId() {
return apiId;
}
public String getApiId() {
return apiId;
}
public void setApiId(final String apiId) {
this.apiId = apiId;
}
public void setApiId(final String apiId) {
this.apiId = apiId;
}
public String getCurrentVersion() {
return currentVersion;
}
public String getCurrentVersion() {
return currentVersion;
}
public void setCurrentVersion(final String currentVersion) {
this.currentVersion = currentVersion;
}
public void setCurrentVersion(final String currentVersion) {
this.currentVersion = currentVersion;
}
public Date getLastUpdate() {
return lastUpdate;
}
public Date getLastUpdate() {
return lastUpdate;
}
public void setLastUpdate(final Date lastUpdate) {
this.lastUpdate = lastUpdate;
}
public void setLastUpdate(final Date lastUpdate) {
this.lastUpdate = lastUpdate;
}
public long getSize() {
return size;
}
public long getSize() {
return size;
}
public void setSize(final long size) {
this.size = size;
}
public void setSize(final long size) {
this.size = size;
}
public long getNumberOfVersions() {
return numberOfVersions;
}
public void setNumberOfVersions(final long numberOfVersions) {
this.numberOfVersions = numberOfVersions;
}
public long getNumberOfVersions() {
return numberOfVersions;
}
public void setNumberOfVersions(final long numberOfVersions) {
this.numberOfVersions = numberOfVersions;
}
}

View File

@ -1,10 +1,6 @@
package eu.dnetlib.dhp.application;
import com.fasterxml.jackson.databind.ObjectMapper;
import org.apache.commons.cli.*;
import org.apache.commons.codec.binary.Base64;
import org.apache.commons.io.IOUtils;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.Serializable;
@ -12,7 +8,9 @@ import java.io.StringWriter;
import java.util.*;
import java.util.zip.GZIPInputStream;
import java.util.zip.GZIPOutputStream;
import java.util.zip.Inflater;
import org.apache.commons.cli.*;
import org.apache.commons.codec.binary.Base64;
import org.apache.commons.io.IOUtils;
public class ArgumentApplicationParser implements Serializable {
@ -23,7 +21,8 @@ public class ArgumentApplicationParser implements Serializable {
public ArgumentApplicationParser(final String json_configuration) throws Exception {
final ObjectMapper mapper = new ObjectMapper();
final OptionsParameter[] configuration = mapper.readValue(json_configuration, OptionsParameter[].class);
final OptionsParameter[] configuration =
mapper.readValue(json_configuration, OptionsParameter[].class);
createOptionMap(configuration);
}
@ -33,23 +32,26 @@ public class ArgumentApplicationParser implements Serializable {
private void createOptionMap(final OptionsParameter[] configuration) {
Arrays.stream(configuration).map(conf -> {
final Option o = new Option(conf.getParamName(), true, conf.getParamDescription());
o.setLongOpt(conf.getParamLongName());
o.setRequired(conf.isParamRequired());
if (conf.isCompressed()) {
compressedValues.add(conf.getParamLongName());
}
return o;
}).forEach(options::addOption);
// HelpFormatter formatter = new HelpFormatter();
// formatter.printHelp("myapp", null, options, null, true);
Arrays.stream(configuration)
.map(
conf -> {
final Option o =
new Option(
conf.getParamName(), true, conf.getParamDescription());
o.setLongOpt(conf.getParamLongName());
o.setRequired(conf.isParamRequired());
if (conf.isCompressed()) {
compressedValues.add(conf.getParamLongName());
}
return o;
})
.forEach(options::addOption);
// HelpFormatter formatter = new HelpFormatter();
// formatter.printHelp("myapp", null, options, null, true);
}
public static String decompressValue(final String abstractCompressed) {
try {
byte[] byteArray = Base64.decodeBase64(abstractCompressed.getBytes());
@ -63,7 +65,7 @@ public class ArgumentApplicationParser implements Serializable {
}
}
public static String compressArgument(final String value) throws Exception{
public static String compressArgument(final String value) throws Exception {
ByteArrayOutputStream out = new ByteArrayOutputStream();
GZIPOutputStream gzip = new GZIPOutputStream(out);
gzip.write(value.getBytes());
@ -74,7 +76,14 @@ public class ArgumentApplicationParser implements Serializable {
public void parseArgument(final String[] args) throws Exception {
CommandLineParser parser = new BasicParser();
CommandLine cmd = parser.parse(options, args);
Arrays.stream(cmd.getOptions()).forEach(it -> objectMap.put(it.getLongOpt(), compressedValues.contains(it.getLongOpt())? decompressValue(it.getValue()): it.getValue()));
Arrays.stream(cmd.getOptions())
.forEach(
it ->
objectMap.put(
it.getLongOpt(),
compressedValues.contains(it.getLongOpt())
? decompressValue(it.getValue())
: it.getValue()));
}
public String get(final String key) {

View File

@ -1,6 +1,5 @@
package eu.dnetlib.dhp.application;
public class OptionsParameter {
private String paramName;
@ -9,8 +8,7 @@ public class OptionsParameter {
private boolean paramRequired;
private boolean compressed;
public OptionsParameter() {
}
public OptionsParameter() {}
public String getParamName() {
return paramName;

View File

@ -3,23 +3,19 @@ package eu.dnetlib.dhp.common;
import java.io.Serializable;
import java.util.function.Supplier;
/**
* Provides serializable and throwing extensions to standard functional interfaces.
*/
/** Provides serializable and throwing extensions to standard functional interfaces. */
public class FunctionalInterfaceSupport {
private FunctionalInterfaceSupport() {
}
private FunctionalInterfaceSupport() {}
/**
* Serializable supplier of any kind of objects. To be used withing spark processing pipelines when supplying
* functions externally.
* Serializable supplier of any kind of objects. To be used withing spark processing pipelines
* when supplying functions externally.
*
* @param <T>
*/
@FunctionalInterface
public interface SerializableSupplier<T> extends Supplier<T>, Serializable {
}
public interface SerializableSupplier<T> extends Supplier<T>, Serializable {}
/**
* Extension of consumer accepting functions throwing an exception.
@ -52,5 +48,4 @@ public class FunctionalInterfaceSupport {
public interface ThrowingRunnable<E extends Exception> {
void run() throws E;
}
}

View File

@ -1,5 +1,10 @@
package eu.dnetlib.dhp.common;
import static eu.dnetlib.dhp.common.ThrowingSupport.rethrowAsRuntimeException;
import java.util.Arrays;
import java.util.List;
import java.util.stream.Collectors;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
@ -7,51 +12,60 @@ import org.apache.hadoop.fs.Path;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.Arrays;
import java.util.List;
import java.util.stream.Collectors;
import static eu.dnetlib.dhp.common.ThrowingSupport.rethrowAsRuntimeException;
/**
* HDFS utility methods.
*/
/** HDFS utility methods. */
public class HdfsSupport {
private static final Logger logger = LoggerFactory.getLogger(HdfsSupport.class);
private HdfsSupport() {
private HdfsSupport() {}
/**
* Checks a path (file or dir) exists on HDFS.
*
* @param path Path to be checked
* @param configuration Configuration of hadoop env
*/
public static boolean exists(String path, Configuration configuration) {
logger.info("Removing path: {}", path);
return rethrowAsRuntimeException(
() -> {
Path f = new Path(path);
FileSystem fileSystem = FileSystem.get(configuration);
return fileSystem.exists(f);
});
}
/**
* Removes a path (file or dir) from HDFS.
*
* @param path Path to be removed
* @param path Path to be removed
* @param configuration Configuration of hadoop env
*/
public static void remove(String path, Configuration configuration) {
logger.info("Removing path: {}", path);
rethrowAsRuntimeException(() -> {
Path f = new Path(path);
FileSystem fileSystem = FileSystem.get(configuration);
if (fileSystem.exists(f)) {
fileSystem.delete(f, true);
}
});
rethrowAsRuntimeException(
() -> {
Path f = new Path(path);
FileSystem fileSystem = FileSystem.get(configuration);
if (fileSystem.exists(f)) {
fileSystem.delete(f, true);
}
});
}
/**
* Lists hadoop files located below path or alternatively lists subdirs under path.
*
* @param path Path to be listed for hadoop files
* @param path Path to be listed for hadoop files
* @param configuration Configuration of hadoop env
* @return List with string locations of hadoop files
*/
public static List<String> listFiles(String path, Configuration configuration) {
logger.info("Listing files in path: {}", path);
return rethrowAsRuntimeException(() -> Arrays
.stream(FileSystem.get(configuration).listStatus(new Path(path)))
.filter(FileStatus::isDirectory)
.map(x -> x.getPath().toString())
.collect(Collectors.toList()));
return rethrowAsRuntimeException(
() ->
Arrays.stream(FileSystem.get(configuration).listStatus(new Path(path)))
.filter(FileStatus::isDirectory)
.map(x -> x.getPath().toString())
.collect(Collectors.toList()));
}
}

View File

@ -1,61 +1,71 @@
package eu.dnetlib.dhp.common;
import eu.dnetlib.dhp.common.FunctionalInterfaceSupport.ThrowingConsumer;
import java.util.Objects;
import java.util.function.Function;
import org.apache.spark.SparkConf;
import org.apache.spark.sql.SparkSession;
import java.util.Objects;
import java.util.function.Function;
/**
* SparkSession utility methods.
*/
/** SparkSession utility methods. */
public class SparkSessionSupport {
private SparkSessionSupport() {
private SparkSessionSupport() {}
/**
* Runs a given function using SparkSession created using default builder and supplied
* SparkConf. Stops SparkSession when SparkSession is managed. Allows to reuse SparkSession
* created externally.
*
* @param conf SparkConf instance
* @param isSparkSessionManaged When true will stop SparkSession
* @param fn Consumer to be applied to constructed SparkSession
*/
public static void runWithSparkSession(
SparkConf conf,
Boolean isSparkSessionManaged,
ThrowingConsumer<SparkSession, Exception> fn) {
runWithSparkSession(
c -> SparkSession.builder().config(c).getOrCreate(),
conf,
isSparkSessionManaged,
fn);
}
/**
* Runs a given function using SparkSession created using default builder and supplied SparkConf. Stops SparkSession
* when SparkSession is managed. Allows to reuse SparkSession created externally.
* Runs a given function using SparkSession created with hive support and using default builder
* and supplied SparkConf. Stops SparkSession when SparkSession is managed. Allows to reuse
* SparkSession created externally.
*
* @param conf SparkConf instance
* @param conf SparkConf instance
* @param isSparkSessionManaged When true will stop SparkSession
* @param fn Consumer to be applied to constructed SparkSession
* @param fn Consumer to be applied to constructed SparkSession
*/
public static void runWithSparkSession(SparkConf conf,
Boolean isSparkSessionManaged,
ThrowingConsumer<SparkSession, Exception> fn) {
runWithSparkSession(c -> SparkSession.builder().config(c).getOrCreate(), conf, isSparkSessionManaged, fn);
public static void runWithSparkHiveSession(
SparkConf conf,
Boolean isSparkSessionManaged,
ThrowingConsumer<SparkSession, Exception> fn) {
runWithSparkSession(
c -> SparkSession.builder().config(c).enableHiveSupport().getOrCreate(),
conf,
isSparkSessionManaged,
fn);
}
/**
* Runs a given function using SparkSession created with hive support and using default builder and supplied SparkConf.
* Stops SparkSession when SparkSession is managed. Allows to reuse SparkSession created externally.
* Runs a given function using SparkSession created using supplied builder and supplied
* SparkConf. Stops SparkSession when SparkSession is managed. Allows to reuse SparkSession
* created externally.
*
* @param conf SparkConf instance
* @param sparkSessionBuilder Builder of SparkSession
* @param conf SparkConf instance
* @param isSparkSessionManaged When true will stop SparkSession
* @param fn Consumer to be applied to constructed SparkSession
* @param fn Consumer to be applied to constructed SparkSession
*/
public static void runWithSparkHiveSession(SparkConf conf,
Boolean isSparkSessionManaged,
ThrowingConsumer<SparkSession, Exception> fn) {
runWithSparkSession(c -> SparkSession.builder().config(c).enableHiveSupport().getOrCreate(), conf, isSparkSessionManaged, fn);
}
/**
* Runs a given function using SparkSession created using supplied builder and supplied SparkConf. Stops SparkSession
* when SparkSession is managed. Allows to reuse SparkSession created externally.
*
* @param sparkSessionBuilder Builder of SparkSession
* @param conf SparkConf instance
* @param isSparkSessionManaged When true will stop SparkSession
* @param fn Consumer to be applied to constructed SparkSession
*/
public static void runWithSparkSession(Function<SparkConf, SparkSession> sparkSessionBuilder,
SparkConf conf,
Boolean isSparkSessionManaged,
ThrowingConsumer<SparkSession, Exception> fn) {
public static void runWithSparkSession(
Function<SparkConf, SparkSession> sparkSessionBuilder,
SparkConf conf,
Boolean isSparkSessionManaged,
ThrowingConsumer<SparkSession, Exception> fn) {
SparkSession spark = null;
try {
spark = sparkSessionBuilder.apply(conf);

View File

@ -3,18 +3,15 @@ package eu.dnetlib.dhp.common;
import eu.dnetlib.dhp.common.FunctionalInterfaceSupport.ThrowingRunnable;
import eu.dnetlib.dhp.common.FunctionalInterfaceSupport.ThrowingSupplier;
/**
* Exception handling utility methods.
*/
/** Exception handling utility methods. */
public class ThrowingSupport {
private ThrowingSupport() {
}
private ThrowingSupport() {}
/**
* Executes given runnable and rethrows any exceptions as RuntimeException.
*
* @param fn Runnable to be executed
* @param fn Runnable to be executed
* @param <E> Type of exception thrown
*/
public static <E extends Exception> void rethrowAsRuntimeException(ThrowingRunnable<E> fn) {
@ -28,11 +25,12 @@ public class ThrowingSupport {
/**
* Executes given runnable and rethrows any exceptions as RuntimeException with custom message.
*
* @param fn Runnable to be executed
* @param fn Runnable to be executed
* @param msg Message to be set for rethrown exception
* @param <E> Type of exception thrown
*/
public static <E extends Exception> void rethrowAsRuntimeException(ThrowingRunnable<E> fn, String msg) {
public static <E extends Exception> void rethrowAsRuntimeException(
ThrowingRunnable<E> fn, String msg) {
try {
fn.run();
} catch (Exception e) {
@ -43,7 +41,7 @@ public class ThrowingSupport {
/**
* Executes given supplier and rethrows any exceptions as RuntimeException.
*
* @param fn Supplier to be executed
* @param fn Supplier to be executed
* @param <T> Type of returned value
* @param <E> Type of exception thrown
* @return Result of supplier execution
@ -59,18 +57,18 @@ public class ThrowingSupport {
/**
* Executes given supplier and rethrows any exceptions as RuntimeException with custom message.
*
* @param fn Supplier to be executed
* @param fn Supplier to be executed
* @param msg Message to be set for rethrown exception
* @param <T> Type of returned value
* @param <E> Type of exception thrown
* @return Result of supplier execution
*/
public static <T, E extends Exception> T rethrowAsRuntimeException(ThrowingSupplier<T, E> fn, String msg) {
public static <T, E extends Exception> T rethrowAsRuntimeException(
ThrowingSupplier<T, E> fn, String msg) {
try {
return fn.get();
} catch (Exception e) {
throw new RuntimeException(msg, e);
}
}
}

View File

@ -1,66 +1,52 @@
package eu.dnetlib.dhp.model.mdstore;
import eu.dnetlib.dhp.utils.DHPUtils;
import java.io.Serializable;
/**
* This class models a record inside the new Metadata store collection on HDFS *
*
*/
/** This class models a record inside the new Metadata store collection on HDFS * */
public class MetadataRecord implements Serializable {
/**
* The D-Net Identifier associated to the record
*/
/** The D-Net Identifier associated to the record */
private String id;
/**
* The original Identifier of the record
*/
/** The original Identifier of the record */
private String originalId;
/**
* The encoding of the record, should be JSON or XML
*/
/** The encoding of the record, should be JSON or XML */
private String encoding;
/**
* The information about the provenance of the record see @{@link Provenance}
* for the model of this information
* The information about the provenance of the record see @{@link Provenance} for the model of
* this information
*/
private Provenance provenance;
/**
* The content of the metadata
*/
/** The content of the metadata */
private String body;
/**
* the date when the record has been stored
*/
/** the date when the record has been stored */
private long dateOfCollection;
/**
* the date when the record has been stored
*/
/** the date when the record has been stored */
private long dateOfTransformation;
public MetadataRecord() {
this.dateOfCollection = System.currentTimeMillis();
}
public MetadataRecord(String originalId, String encoding, Provenance provenance, String body, long dateOfCollection) {
public MetadataRecord(
String originalId,
String encoding,
Provenance provenance,
String body,
long dateOfCollection) {
this.originalId = originalId;
this.encoding = encoding;
this.provenance = provenance;
this.body = body;
this.dateOfCollection = dateOfCollection;
this.id = DHPUtils.generateIdentifier(originalId,this.provenance.getNsPrefix());
this.id = DHPUtils.generateIdentifier(originalId, this.provenance.getNsPrefix());
}
public String getId() {
@ -71,7 +57,6 @@ public class MetadataRecord implements Serializable {
this.id = id;
}
public String getOriginalId() {
return originalId;
}
@ -96,7 +81,6 @@ public class MetadataRecord implements Serializable {
this.provenance = provenance;
}
public String getBody() {
return body;
}
@ -127,7 +111,6 @@ public class MetadataRecord implements Serializable {
return false;
}
return ((MetadataRecord) o).getId().equalsIgnoreCase(id);
}
@Override

View File

@ -2,27 +2,20 @@ package eu.dnetlib.dhp.model.mdstore;
import java.io.Serializable;
/**
* @author Sandro La Bruzzo
*
* Provenace class models the provenance of the record in the metadataStore
* It contains the identifier and the name of the datasource that gives the
* record
*
* <p>Provenace class models the provenance of the record in the metadataStore It contains the
* identifier and the name of the datasource that gives the record
*/
public class Provenance implements Serializable {
private String datasourceId;
private String datasourceName;
private String nsPrefix;
public Provenance() {
}
public Provenance() {}
public Provenance(String datasourceId, String datasourceName, String nsPrefix) {
this.datasourceId = datasourceId;

View File

@ -1,20 +1,17 @@
package eu.dnetlib.dhp.parser.utility;
import com.ximpleware.AutoPilot;
import com.ximpleware.VTDNav;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import com.ximpleware.AutoPilot;
import com.ximpleware.VTDNav;
/**
* Created by sandro on 9/29/16.
*/
/** Created by sandro on 9/29/16. */
public class VtdUtilityParser {
public static List<Node> getTextValuesWithAttributes(final AutoPilot ap, final VTDNav vn, final String xpath, final List<String> attributes)
public static List<Node> getTextValuesWithAttributes(
final AutoPilot ap, final VTDNav vn, final String xpath, final List<String> attributes)
throws VtdException {
final List<Node> results = new ArrayList<>();
try {
@ -35,25 +32,28 @@ public class VtdUtilityParser {
}
}
private static Map<String, String> getAttributes(final VTDNav vn, final List<String> attributes) {
private static Map<String, String> getAttributes(
final VTDNav vn, final List<String> attributes) {
final Map<String, String> currentAttributes = new HashMap<>();
if (attributes != null) {
attributes.forEach(attributeKey -> {
try {
int attr = vn.getAttrVal(attributeKey);
if (attr > -1) {
currentAttributes.put(attributeKey, vn.toNormalizedString(attr));
}
} catch (Throwable e) {
throw new RuntimeException(e);
}
});
attributes.forEach(
attributeKey -> {
try {
int attr = vn.getAttrVal(attributeKey);
if (attr > -1) {
currentAttributes.put(attributeKey, vn.toNormalizedString(attr));
}
} catch (Throwable e) {
throw new RuntimeException(e);
}
});
}
return currentAttributes;
}
public static List<String> getTextValue(final AutoPilot ap, final VTDNav vn, final String xpath) throws VtdException {
public static List<String> getTextValue(final AutoPilot ap, final VTDNav vn, final String xpath)
throws VtdException {
List<String> results = new ArrayList<>();
try {
ap.selectXPath(xpath);
@ -67,13 +67,13 @@ public class VtdUtilityParser {
}
}
public static String getSingleValue(final AutoPilot ap, final VTDNav nav, final String xpath) throws VtdException {
public static String getSingleValue(final AutoPilot ap, final VTDNav nav, final String xpath)
throws VtdException {
try {
ap.selectXPath(xpath);
while (ap.evalXPath() != -1) {
int it = nav.getText();
if (it > -1)
return nav.toNormalizedString(it);
if (it > -1) return nav.toNormalizedString(it);
}
return null;
} catch (Exception e) {
@ -103,5 +103,4 @@ public class VtdUtilityParser {
this.attributes = attributes;
}
}
}

View File

@ -1,18 +1,16 @@
package eu.dnetlib.dhp.utils;
import com.jayway.jsonpath.JsonPath;
import net.minidev.json.JSONArray;
import org.apache.commons.codec.binary.Base64;
import org.apache.commons.codec.binary.Base64OutputStream;
import org.apache.commons.codec.binary.Hex;
import org.apache.commons.lang3.StringUtils;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.nio.charset.StandardCharsets;
import java.security.MessageDigest;
import java.util.zip.GZIPInputStream;
import java.util.zip.GZIPOutputStream;
import net.minidev.json.JSONArray;
import org.apache.commons.codec.binary.Base64;
import org.apache.commons.codec.binary.Base64OutputStream;
import org.apache.commons.codec.binary.Hex;
public class DHPUtils {
@ -28,41 +26,40 @@ public class DHPUtils {
}
public static String generateIdentifier(final String originalId, final String nsPrefix) {
return String.format("%s::%s",nsPrefix, DHPUtils.md5(originalId));
return String.format("%s::%s", nsPrefix, DHPUtils.md5(originalId));
}
public static String compressString(final String input ) {
try ( ByteArrayOutputStream out = new ByteArrayOutputStream(); Base64OutputStream b64os = new Base64OutputStream(out)) {
public static String compressString(final String input) {
try (ByteArrayOutputStream out = new ByteArrayOutputStream();
Base64OutputStream b64os = new Base64OutputStream(out)) {
GZIPOutputStream gzip = new GZIPOutputStream(b64os);
gzip.write(input.getBytes(StandardCharsets.UTF_8));
gzip.close();
return out.toString();
} catch (Throwable e ) {
} catch (Throwable e) {
return null;
}
}
public static String decompressString(final String input) {
byte[] byteArray = Base64.decodeBase64(input.getBytes());
int len;
try (GZIPInputStream gis = new GZIPInputStream(new ByteArrayInputStream((byteArray))); ByteArrayOutputStream bos = new ByteArrayOutputStream(byteArray.length)) {
try (GZIPInputStream gis = new GZIPInputStream(new ByteArrayInputStream((byteArray)));
ByteArrayOutputStream bos = new ByteArrayOutputStream(byteArray.length)) {
byte[] buffer = new byte[1024];
while((len = gis.read(buffer)) != -1){
while ((len = gis.read(buffer)) != -1) {
bos.write(buffer, 0, len);
}
return bos.toString();
} catch (Exception e) {
return null;
}
}
public static String getJPathString(final String jsonPath, final String json) {
try {
Object o = JsonPath.read(json, jsonPath);
if (o instanceof String)
return (String) o;
if (o instanceof String) return (String) o;
if (o instanceof JSONArray && ((JSONArray) o).size() > 0)
return (String) ((JSONArray) o).get(0);
return o.toString();
@ -70,5 +67,4 @@ public class DHPUtils {
return "";
}
}
}

View File

@ -9,10 +9,13 @@ import net.sf.saxon.trans.XPathException;
public abstract class AbstractExtensionFunction extends ExtensionFunctionDefinition {
public static String DEFAULT_SAXON_EXT_NS_URI = "http://www.d-net.research-infrastructures.eu/saxon-extension";
public static String DEFAULT_SAXON_EXT_NS_URI =
"http://www.d-net.research-infrastructures.eu/saxon-extension";
public abstract String getName();
public abstract Sequence doCall(XPathContext context, Sequence[] arguments) throws XPathException;
public abstract Sequence doCall(XPathContext context, Sequence[] arguments)
throws XPathException;
@Override
public StructuredQName getFunctionQName() {
@ -28,5 +31,4 @@ public abstract class AbstractExtensionFunction extends ExtensionFunctionDefinit
}
};
}
}

View File

@ -1,5 +1,9 @@
package eu.dnetlib.dhp.utils.saxon;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Calendar;
import java.util.GregorianCalendar;
import net.sf.saxon.expr.XPathContext;
import net.sf.saxon.om.Item;
import net.sf.saxon.om.Sequence;
@ -7,14 +11,9 @@ import net.sf.saxon.trans.XPathException;
import net.sf.saxon.value.SequenceType;
import net.sf.saxon.value.StringValue;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Calendar;
import java.util.GregorianCalendar;
public class ExtractYear extends AbstractExtensionFunction {
private static final String[] dateFormats = { "yyyy-MM-dd", "yyyy/MM/dd" };
private static final String[] dateFormats = {"yyyy-MM-dd", "yyyy/MM/dd"};
@Override
public String getName() {
@ -45,7 +44,7 @@ public class ExtractYear extends AbstractExtensionFunction {
@Override
public SequenceType[] getArgumentTypes() {
return new SequenceType[] { SequenceType.OPTIONAL_ITEM };
return new SequenceType[] {SequenceType.OPTIONAL_ITEM};
}
@Override
@ -60,7 +59,8 @@ public class ExtractYear extends AbstractExtensionFunction {
c.setTime(new SimpleDateFormat(format).parse(s));
String year = String.valueOf(c.get(Calendar.YEAR));
return year;
} catch (ParseException e) {}
} catch (ParseException e) {
}
}
return "";
}

View File

@ -1,18 +1,19 @@
package eu.dnetlib.dhp.utils.saxon;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Date;
import net.sf.saxon.expr.XPathContext;
import net.sf.saxon.om.Sequence;
import net.sf.saxon.trans.XPathException;
import net.sf.saxon.value.SequenceType;
import net.sf.saxon.value.StringValue;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Date;
public class NormalizeDate extends AbstractExtensionFunction {
private static final String[] normalizeDateFormats = { "yyyy-MM-dd'T'hh:mm:ss", "yyyy-MM-dd", "yyyy/MM/dd", "yyyy" };
private static final String[] normalizeDateFormats = {
"yyyy-MM-dd'T'hh:mm:ss", "yyyy-MM-dd", "yyyy/MM/dd", "yyyy"
};
private static final String normalizeOutFormat = new String("yyyy-MM-dd'T'hh:mm:ss'Z'");
@ -42,7 +43,7 @@ public class NormalizeDate extends AbstractExtensionFunction {
@Override
public SequenceType[] getArgumentTypes() {
return new SequenceType[] { SequenceType.OPTIONAL_ITEM };
return new SequenceType[] {SequenceType.OPTIONAL_ITEM};
}
@Override
@ -58,9 +59,9 @@ public class NormalizeDate extends AbstractExtensionFunction {
Date parse = new SimpleDateFormat(format).parse(date);
String res = new SimpleDateFormat(normalizeOutFormat).format(parse);
return res;
} catch (ParseException e) {}
} catch (ParseException e) {
}
}
return "";
}
}

View File

@ -24,7 +24,8 @@ public class PickFirst extends AbstractExtensionFunction {
final String s1 = getValue(arguments[0]);
final String s2 = getValue(arguments[1]);
return new StringValue(StringUtils.isNotBlank(s1) ? s1 : StringUtils.isNotBlank(s2) ? s2 : "");
return new StringValue(
StringUtils.isNotBlank(s1) ? s1 : StringUtils.isNotBlank(s2) ? s2 : "");
}
private String getValue(final Sequence arg) throws XPathException {
@ -49,12 +50,11 @@ public class PickFirst extends AbstractExtensionFunction {
@Override
public SequenceType[] getArgumentTypes() {
return new SequenceType[] { SequenceType.OPTIONAL_ITEM };
return new SequenceType[] {SequenceType.OPTIONAL_ITEM};
}
@Override
public SequenceType getResultType(SequenceType[] suppliedArgumentTypes) {
return SequenceType.SINGLE_STRING;
}
}

View File

@ -1,17 +1,17 @@
package eu.dnetlib.dhp.utils.saxon;
import net.sf.saxon.Configuration;
import net.sf.saxon.TransformerFactoryImpl;
import java.io.StringReader;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerException;
import javax.xml.transform.stream.StreamSource;
import java.io.StringReader;
import net.sf.saxon.Configuration;
import net.sf.saxon.TransformerFactoryImpl;
public class SaxonTransformerFactory {
/**
* Creates the index record transformer from the given XSLT
*
* @param xslt
* @return
* @throws TransformerException
@ -26,5 +26,4 @@ public class SaxonTransformerFactory {
return factory.newTransformer(new StreamSource(new StringReader(xslt)));
}
}

View File

@ -2,7 +2,6 @@ package eu.dnetlib.message;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.ObjectMapper;
import java.io.IOException;
import java.util.Map;
@ -16,20 +15,12 @@ public class Message {
private Map<String, String> body;
public static Message fromJson(final String json) throws IOException {
final ObjectMapper jsonMapper = new ObjectMapper();
return jsonMapper.readValue(json, Message.class);
}
public Message() {
}
public Message() {}
public Message(String workflowId, String jobName, MessageType type, Map<String, String> body) {
this.workflowId = workflowId;

View File

@ -4,7 +4,6 @@ import com.rabbitmq.client.AMQP;
import com.rabbitmq.client.Channel;
import com.rabbitmq.client.DefaultConsumer;
import com.rabbitmq.client.Envelope;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.concurrent.LinkedBlockingQueue;
@ -13,7 +12,6 @@ public class MessageConsumer extends DefaultConsumer {
final LinkedBlockingQueue<Message> queueMessages;
/**
* Constructs a new instance and records its association to the passed-in channel.
*
@ -25,19 +23,20 @@ public class MessageConsumer extends DefaultConsumer {
this.queueMessages = queueMessages;
}
@Override
public void handleDelivery(String consumerTag, Envelope envelope, AMQP.BasicProperties properties, byte[] body) throws IOException {
public void handleDelivery(
String consumerTag, Envelope envelope, AMQP.BasicProperties properties, byte[] body)
throws IOException {
final String json = new String(body, StandardCharsets.UTF_8);
Message message = Message.fromJson(json);
try {
this.queueMessages.put(message);
System.out.println("Receiving Message "+message);
System.out.println("Receiving Message " + message);
} catch (InterruptedException e) {
if (message.getType()== MessageType.REPORT)
if (message.getType() == MessageType.REPORT)
throw new RuntimeException("Error on sending message");
else {
//TODO LOGGING EXCEPTION
// TODO LOGGING EXCEPTION
}
} finally {
getChannel().basicAck(envelope.getDeliveryTag(), false);

View File

@ -3,8 +3,6 @@ package eu.dnetlib.message;
import com.rabbitmq.client.Channel;
import com.rabbitmq.client.Connection;
import com.rabbitmq.client.ConnectionFactory;
import sun.rmi.runtime.Log;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
@ -21,23 +19,32 @@ public class MessageManager {
private Connection connection;
private Map<String , Channel> channels = new HashMap<>();
private Map<String, Channel> channels = new HashMap<>();
private boolean durable;
private boolean durable;
private boolean autodelete;
private boolean autodelete;
final private LinkedBlockingQueue<Message> queueMessages;
private final LinkedBlockingQueue<Message> queueMessages;
public MessageManager(String messageHost, String username, String password, final LinkedBlockingQueue<Message> queueMessages) {
public MessageManager(
String messageHost,
String username,
String password,
final LinkedBlockingQueue<Message> queueMessages) {
this.queueMessages = queueMessages;
this.messageHost = messageHost;
this.username = username;
this.password = password;
}
public MessageManager(String messageHost, String username, String password, boolean durable, boolean autodelete, final LinkedBlockingQueue<Message> queueMessages) {
public MessageManager(
String messageHost,
String username,
String password,
boolean durable,
boolean autodelete,
final LinkedBlockingQueue<Message> queueMessages) {
this.queueMessages = queueMessages;
this.messageHost = messageHost;
this.username = username;
@ -55,7 +62,12 @@ public class MessageManager {
return factory.newConnection();
}
private Channel createChannel(final Connection connection, final String queueName, final boolean durable, final boolean autodelete ) throws Exception {
private Channel createChannel(
final Connection connection,
final String queueName,
final boolean durable,
final boolean autodelete)
throws Exception {
Map<String, Object> args = new HashMap<>();
args.put("x-message-ttl", 10000);
Channel channel = connection.createChannel();
@ -63,9 +75,10 @@ public class MessageManager {
return channel;
}
private Channel getOrCreateChannel(final String queueName, boolean durable, boolean autodelete) throws Exception {
private Channel getOrCreateChannel(final String queueName, boolean durable, boolean autodelete)
throws Exception {
if (channels.containsKey(queueName)) {
return channels.get(queueName);
return channels.get(queueName);
}
if (this.connection == null) {
@ -75,16 +88,16 @@ public class MessageManager {
return channels.get(queueName);
}
public void close() throws IOException {
channels.values().forEach(ch-> {
try {
ch.close();
} catch (Exception e) {
//TODO LOG
}
});
channels.values()
.forEach(
ch -> {
try {
ch.close();
} catch (Exception e) {
// TODO LOG
}
});
this.connection.close();
}
@ -92,26 +105,30 @@ public class MessageManager {
public boolean sendMessage(final Message message, String queueName) throws Exception {
try {
Channel channel = getOrCreateChannel(queueName, this.durable, this.autodelete);
channel.basicPublish("", queueName,null, message.toString().getBytes());
channel.basicPublish("", queueName, null, message.toString().getBytes());
return true;
} catch (Throwable e) {
throw new RuntimeException(e);
}
}
public boolean sendMessage(final Message message, String queueName, boolean durable_var, boolean autodelete_var) throws Exception {
public boolean sendMessage(
final Message message, String queueName, boolean durable_var, boolean autodelete_var)
throws Exception {
try {
Channel channel = getOrCreateChannel(queueName, durable_var, autodelete_var);
channel.basicPublish("", queueName,null, message.toString().getBytes());
channel.basicPublish("", queueName, null, message.toString().getBytes());
return true;
} catch (Throwable e) {
throw new RuntimeException(e);
}
}
public void startConsumingMessage(final String queueName, final boolean durable, final boolean autodelete) throws Exception{
public void startConsumingMessage(
final String queueName, final boolean durable, final boolean autodelete)
throws Exception {
Channel channel = createChannel(createConnection(), queueName, durable, autodelete);
channel.basicConsume(queueName, false, new MessageConsumer(channel,queueMessages));
channel.basicConsume(queueName, false, new MessageConsumer(channel, queueMessages));
}
}

View File

@ -1,8 +1,6 @@
package eu.dnetlib.message;
public enum MessageType {
ONGOING,
REPORT
}

View File

@ -2,7 +2,7 @@ package eu.dnetlib.scholexplorer.relation;
import java.io.Serializable;
public class RelInfo implements Serializable {
public class RelInfo implements Serializable {
private String original;
private String inverse;

View File

@ -1,19 +1,18 @@
package eu.dnetlib.scholexplorer.relation;
import com.fasterxml.jackson.databind.ObjectMapper;
import org.apache.commons.io.IOUtils;
import java.io.Serializable;
import java.util.HashMap;
import org.apache.commons.io.IOUtils;
public class RelationMapper extends HashMap<String,RelInfo > implements Serializable {
public class RelationMapper extends HashMap<String, RelInfo> implements Serializable {
public static RelationMapper load() throws Exception {
final String json = IOUtils.toString(RelationMapper.class.getResourceAsStream("relations.json"));
final String json =
IOUtils.toString(RelationMapper.class.getResourceAsStream("relations.json"));
ObjectMapper mapper = new ObjectMapper();
return mapper.readValue(json, RelationMapper.class);
}
}

View File

@ -1,30 +1,46 @@
package eu.dnetlib.dhp.application;
import org.apache.commons.io.IOUtils;
import org.junit.jupiter.api.Test;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNotNull;
import org.apache.commons.io.IOUtils;
import org.junit.jupiter.api.Test;
public class ArgumentApplicationParserTest {
@Test
public void testParseParameter() throws Exception {
final String jsonConfiguration = IOUtils.toString(this.getClass().getResourceAsStream("/eu/dnetlib/application/parameters.json"));
final String jsonConfiguration =
IOUtils.toString(
this.getClass()
.getResourceAsStream("/eu/dnetlib/application/parameters.json"));
assertNotNull(jsonConfiguration);
ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
parser.parseArgument(new String[]{"-p", "value0",
"-a", "value1",
"-n", "value2",
"-u", "value3",
"-ru", "value4",
"-rp", "value5",
"-rh", "value6",
"-ro", "value7",
"-rr", "value8",
"-w", "value9",
"-cc", ArgumentApplicationParser.compressArgument(jsonConfiguration)
});
parser.parseArgument(
new String[] {
"-p",
"value0",
"-a",
"value1",
"-n",
"value2",
"-u",
"value3",
"-ru",
"value4",
"-rp",
"value5",
"-rh",
"value6",
"-ro",
"value7",
"-rr",
"value8",
"-w",
"value9",
"-cc",
ArgumentApplicationParser.compressArgument(jsonConfiguration)
});
assertNotNull(parser.get("hdfsPath"));
assertNotNull(parser.get("apidescriptor"));
assertNotNull(parser.get("namenode"));
@ -47,10 +63,4 @@ public class ArgumentApplicationParserTest {
assertEquals("value9", parser.get("workflowId"));
assertEquals(jsonConfiguration, parser.get("ccCoco"));
}
}

View File

@ -1,9 +1,6 @@
package eu.dnetlib.dhp.common;
import org.apache.hadoop.conf.Configuration;
import org.junit.jupiter.api.Nested;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.io.TempDir;
import static org.junit.jupiter.api.Assertions.*;
import java.io.IOException;
import java.nio.file.Files;
@ -11,8 +8,10 @@ import java.nio.file.Path;
import java.util.Arrays;
import java.util.List;
import java.util.stream.Collectors;
import static org.junit.jupiter.api.Assertions.*;
import org.apache.hadoop.conf.Configuration;
import org.junit.jupiter.api.Nested;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.io.TempDir;
public class HdfsSupportTest {
@ -22,8 +21,8 @@ public class HdfsSupportTest {
@Test
public void shouldThrowARuntimeExceptionOnError() {
// when
assertThrows(RuntimeException.class, () ->
HdfsSupport.remove(null, new Configuration()));
assertThrows(
RuntimeException.class, () -> HdfsSupport.remove(null, new Configuration()));
}
@Test
@ -54,8 +53,8 @@ public class HdfsSupportTest {
@Test
public void shouldThrowARuntimeExceptionOnError() {
// when
assertThrows(RuntimeException.class, () ->
HdfsSupport.listFiles(null, new Configuration()));
assertThrows(
RuntimeException.class, () -> HdfsSupport.listFiles(null, new Configuration()));
}
@Test
@ -68,8 +67,10 @@ public class HdfsSupportTest {
// then
assertEquals(2, paths.size());
List<String> expecteds = Arrays.stream(new String[]{subDir1.toString(), subDir2.toString()})
.sorted().collect(Collectors.toList());
List<String> expecteds =
Arrays.stream(new String[] {subDir1.toString(), subDir2.toString()})
.sorted()
.collect(Collectors.toList());
List<String> actuals = paths.stream().sorted().collect(Collectors.toList());
assertTrue(actuals.get(0).contains(expecteds.get(0)));
assertTrue(actuals.get(1).contains(expecteds.get(1)));

View File

@ -1,22 +1,22 @@
package eu.dnetlib.dhp.common;
import static org.mockito.Mockito.*;
import eu.dnetlib.dhp.common.FunctionalInterfaceSupport.ThrowingConsumer;
import java.util.function.Function;
import org.apache.spark.SparkConf;
import org.apache.spark.sql.SparkSession;
import org.junit.jupiter.api.Nested;
import org.junit.jupiter.api.Test;
import java.util.function.Function;
import static org.mockito.Mockito.*;
public class SparkSessionSupportTest {
@Nested
class RunWithSparkSession {
@Test
public void shouldExecuteFunctionAndNotStopSparkSessionWhenSparkSessionIsNotManaged() throws Exception {
public void shouldExecuteFunctionAndNotStopSparkSessionWhenSparkSessionIsNotManaged()
throws Exception {
// given
SparkSession spark = mock(SparkSession.class);
SparkConf conf = mock(SparkConf.class);
@ -34,7 +34,8 @@ public class SparkSessionSupportTest {
}
@Test
public void shouldExecuteFunctionAndStopSparkSessionWhenSparkSessionIsManaged() throws Exception {
public void shouldExecuteFunctionAndStopSparkSessionWhenSparkSessionIsManaged()
throws Exception {
// given
SparkSession spark = mock(SparkSession.class);
SparkConf conf = mock(SparkConf.class);

View File

@ -1,9 +1,9 @@
package eu.dnetlib.dhp.model.mdstore;
import org.junit.jupiter.api.Test;
import static org.junit.jupiter.api.Assertions.assertTrue;
import org.junit.jupiter.api.Test;
public class MetadataRecordTest {
@Test

View File

@ -1,12 +1,11 @@
package eu.dnetlib.message;
import org.junit.jupiter.api.Test;
import static org.junit.jupiter.api.Assertions.*;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import static org.junit.jupiter.api.Assertions.*;
import org.junit.jupiter.api.Test;
public class MessageTest {
@ -16,7 +15,7 @@ public class MessageTest {
m.setWorkflowId("wId");
m.setType(MessageType.ONGOING);
m.setJobName("Collection");
Map<String,String> body= new HashMap<>();
Map<String, String> body = new HashMap<>();
body.put("parsedItem", "300");
body.put("ExecutionTime", "30s");
@ -28,28 +27,26 @@ public class MessageTest {
assertEquals(m1.getJobName(), m.getJobName());
assertNotNull(m1.getBody());
m1.getBody().keySet().forEach(it -> assertEquals(m1.getBody().get(it), m.getBody().get(it)));
m1.getBody()
.keySet()
.forEach(it -> assertEquals(m1.getBody().get(it), m.getBody().get(it)));
assertEquals(m1.getJobName(), m.getJobName());
}
@Test
public void toStringTest() {
final String expectedJson= "{\"workflowId\":\"wId\",\"jobName\":\"Collection\",\"type\":\"ONGOING\",\"body\":{\"ExecutionTime\":\"30s\",\"parsedItem\":\"300\"}}";
final String expectedJson =
"{\"workflowId\":\"wId\",\"jobName\":\"Collection\",\"type\":\"ONGOING\",\"body\":{\"ExecutionTime\":\"30s\",\"parsedItem\":\"300\"}}";
Message m = new Message();
m.setWorkflowId("wId");
m.setType(MessageType.ONGOING);
m.setJobName("Collection");
Map<String,String> body= new HashMap<>();
Map<String, String> body = new HashMap<>();
body.put("parsedItem", "300");
body.put("ExecutionTime", "30s");
m.setBody(body);
assertEquals(expectedJson,m.toString());
assertEquals(expectedJson, m.toString());
}
}

View File

@ -2,14 +2,12 @@ package eu.dnetlib.scholexplorer.relation;
import org.junit.jupiter.api.Test;
public class RelationMapperTest {
@Test
public void testLoadRels() throws Exception{
public void testLoadRels() throws Exception {
RelationMapper relationMapper = RelationMapper.load();
relationMapper.keySet().forEach(System.out::println);
}
}

View File

@ -2,7 +2,6 @@ package eu.dnetlib.dhp.schema.action;
import com.fasterxml.jackson.databind.annotation.JsonDeserialize;
import eu.dnetlib.dhp.schema.oaf.Oaf;
import java.io.Serializable;
@JsonDeserialize(using = AtomicActionDeserializer.class)
@ -12,8 +11,7 @@ public class AtomicAction<T extends Oaf> implements Serializable {
private T payload;
public AtomicAction() {
}
public AtomicAction() {}
public AtomicAction(Class<T> clazz, T payload) {
this.clazz = clazz;

View File

@ -7,13 +7,13 @@ import com.fasterxml.jackson.databind.JsonDeserializer;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.schema.oaf.Oaf;
import java.io.IOException;
public class AtomicActionDeserializer extends JsonDeserializer {
@Override
public Object deserialize(JsonParser jp, DeserializationContext ctxt) throws IOException, JsonProcessingException {
public Object deserialize(JsonParser jp, DeserializationContext ctxt)
throws IOException, JsonProcessingException {
JsonNode node = jp.getCodec().readTree(jp);
String classTag = node.get("clazz").asText();
JsonNode payload = node.get("payload");

View File

@ -2,15 +2,19 @@ package eu.dnetlib.dhp.schema.common;
import eu.dnetlib.dhp.schema.oaf.OafEntity;
/**
* Actual entity types in the Graph
*/
/** Actual entity types in the Graph */
public enum EntityType {
publication, dataset, otherresearchproduct, software, datasource, organization, project;
publication,
dataset,
otherresearchproduct,
software,
datasource,
organization,
project;
/**
* Resolves the EntityType, given the relative class name
*
* @param clazz the given class name
* @param <T> actual OafEntity subclass
* @return the EntityType associated to the given class
@ -19,5 +23,4 @@ public enum EntityType {
return EntityType.valueOf(clazz.getSimpleName().toLowerCase());
}
}

View File

@ -1,9 +1,9 @@
package eu.dnetlib.dhp.schema.common;
/**
* Main entity types in the Graph
*/
/** Main entity types in the Graph */
public enum MainEntityType {
result, datasource, organization, project
result,
datasource,
organization,
project
}

View File

@ -2,33 +2,29 @@ package eu.dnetlib.dhp.schema.common;
import com.google.common.collect.Maps;
import eu.dnetlib.dhp.schema.oaf.*;
import java.util.Map;
/**
* Oaf model utility methods.
*/
/** Oaf model utility methods. */
public class ModelSupport {
/**
* Defines the mapping between the actual entity type and the main entity type
*/
/** Defines the mapping between the actual entity type and the main entity type */
private static Map<EntityType, MainEntityType> entityMapping = Maps.newHashMap();
static {
entityMapping.put(EntityType.publication, MainEntityType.result);
entityMapping.put(EntityType.dataset, MainEntityType.result);
entityMapping.put(EntityType.otherresearchproduct, MainEntityType.result);
entityMapping.put(EntityType.software, MainEntityType.result);
entityMapping.put(EntityType.datasource, MainEntityType.datasource);
entityMapping.put(EntityType.organization, MainEntityType.organization);
entityMapping.put(EntityType.project, MainEntityType.project);
entityMapping.put(EntityType.publication, MainEntityType.result);
entityMapping.put(EntityType.dataset, MainEntityType.result);
entityMapping.put(EntityType.otherresearchproduct, MainEntityType.result);
entityMapping.put(EntityType.software, MainEntityType.result);
entityMapping.put(EntityType.datasource, MainEntityType.datasource);
entityMapping.put(EntityType.organization, MainEntityType.organization);
entityMapping.put(EntityType.project, MainEntityType.project);
}
/**
* Defines the mapping between the actual entity types and the relative classes implementing them
* Defines the mapping between the actual entity types and the relative classes implementing
* them
*/
public final static Map<EntityType, Class> entityTypes = Maps.newHashMap();
public static final Map<EntityType, Class> entityTypes = Maps.newHashMap();
static {
entityTypes.put(EntityType.datasource, Datasource.class);
@ -40,7 +36,7 @@ public class ModelSupport {
entityTypes.put(EntityType.publication, Publication.class);
}
public final static Map<String, Class> oafTypes = Maps.newHashMap();
public static final Map<String, Class> oafTypes = Maps.newHashMap();
static {
oafTypes.put("datasource", Datasource.class);
@ -55,19 +51,19 @@ public class ModelSupport {
private static final String schemeTemplate = "dnet:%s_%s_relations";
private ModelSupport() {
}
private ModelSupport() {}
/**
* Checks subclass-superclass relationship.
*
* @param subClazzObject Subclass object instance
* @param subClazzObject Subclass object instance
* @param superClazzObject Superclass object instance
* @param <X> Subclass type
* @param <Y> Superclass type
* @param <X> Subclass type
* @param <Y> Superclass type
* @return True if X is a subclass of Y
*/
public static <X extends Oaf, Y extends Oaf> Boolean isSubClass(X subClazzObject, Y superClazzObject) {
public static <X extends Oaf, Y extends Oaf> Boolean isSubClass(
X subClazzObject, Y superClazzObject) {
return isSubClass(subClazzObject.getClass(), superClazzObject.getClass());
}
@ -75,25 +71,27 @@ public class ModelSupport {
* Checks subclass-superclass relationship.
*
* @param subClazzObject Subclass object instance
* @param superClazz Superclass class
* @param <X> Subclass type
* @param <Y> Superclass type
* @param superClazz Superclass class
* @param <X> Subclass type
* @param <Y> Superclass type
* @return True if X is a subclass of Y
*/
public static <X extends Oaf, Y extends Oaf> Boolean isSubClass(X subClazzObject, Class<Y> superClazz) {
public static <X extends Oaf, Y extends Oaf> Boolean isSubClass(
X subClazzObject, Class<Y> superClazz) {
return isSubClass(subClazzObject.getClass(), superClazz);
}
/**
* Checks subclass-superclass relationship.
*
* @param subClazz Subclass class
* @param subClazz Subclass class
* @param superClazz Superclass class
* @param <X> Subclass type
* @param <Y> Superclass type
* @param <X> Subclass type
* @param <Y> Superclass type
* @return True if X is a subclass of Y
*/
public static <X extends Oaf, Y extends Oaf> Boolean isSubClass(Class<X> subClazz, Class<Y> superClazz) {
public static <X extends Oaf, Y extends Oaf> Boolean isSubClass(
Class<X> subClazz, Class<Y> superClazz) {
return superClazz.isAssignableFrom(subClazz);
}
@ -104,33 +102,33 @@ public class ModelSupport {
* @return
*/
public static <T extends Oaf> Class<T>[] getOafModelClasses() {
return new Class[]{
Author.class,
Context.class,
Country.class,
DataInfo.class,
Dataset.class,
Datasource.class,
ExternalReference.class,
ExtraInfo.class,
Field.class,
GeoLocation.class,
Instance.class,
Journal.class,
KeyValue.class,
Oaf.class,
OafEntity.class,
OAIProvenance.class,
Organization.class,
OriginDescription.class,
OtherResearchProduct.class,
Project.class,
Publication.class,
Qualifier.class,
Relation.class,
Result.class,
Software.class,
StructuredProperty.class
return new Class[] {
Author.class,
Context.class,
Country.class,
DataInfo.class,
Dataset.class,
Datasource.class,
ExternalReference.class,
ExtraInfo.class,
Field.class,
GeoLocation.class,
Instance.class,
Journal.class,
KeyValue.class,
Oaf.class,
OafEntity.class,
OAIProvenance.class,
Organization.class,
OriginDescription.class,
OtherResearchProduct.class,
Project.class,
Publication.class,
Qualifier.class,
Relation.class,
Result.class,
Software.class,
StructuredProperty.class
};
}
@ -143,9 +141,9 @@ public class ModelSupport {
}
public static String getScheme(final String sourceType, final String targetType) {
return String.format(schemeTemplate,
return String.format(
schemeTemplate,
entityMapping.get(EntityType.valueOf(sourceType)).name(),
entityMapping.get(EntityType.valueOf(targetType)).name());
}
}

View File

@ -71,12 +71,12 @@ public class Author implements Serializable {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
Author author = (Author) o;
return Objects.equals(fullname, author.fullname) &&
Objects.equals(name, author.name) &&
Objects.equals(surname, author.surname) &&
Objects.equals(rank, author.rank) &&
Objects.equals(pid, author.pid) &&
Objects.equals(affiliation, author.affiliation);
return Objects.equals(fullname, author.fullname)
&& Objects.equals(name, author.name)
&& Objects.equals(surname, author.surname)
&& Objects.equals(rank, author.rank)
&& Objects.equals(pid, author.pid)
&& Objects.equals(affiliation, author.affiliation);
}
@Override

View File

@ -26,17 +26,14 @@ public class Context implements Serializable {
@Override
public int hashCode() {
return id ==null? 0 : id.hashCode();
return id == null ? 0 : id.hashCode();
}
@Override
public boolean equals(Object obj) {
if (this == obj)
return true;
if (obj == null)
return false;
if (getClass() != obj.getClass())
return false;
if (this == obj) return true;
if (obj == null) return false;
if (getClass() != obj.getClass()) return false;
Context other = (Context) obj;

View File

@ -1,7 +1,6 @@
package eu.dnetlib.dhp.schema.oaf;
import java.io.Serializable;
import java.util.List;
import java.util.Objects;
public class DataInfo implements Serializable {
@ -13,7 +12,6 @@ public class DataInfo implements Serializable {
private String inferenceprovenance;
private Qualifier provenanceaction;
public Boolean getInvisible() {
return invisible;
}
@ -67,16 +65,22 @@ public class DataInfo implements Serializable {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
DataInfo dataInfo = (DataInfo) o;
return Objects.equals(invisible, dataInfo.invisible) &&
Objects.equals(inferred, dataInfo.inferred) &&
Objects.equals(deletedbyinference, dataInfo.deletedbyinference) &&
Objects.equals(trust, dataInfo.trust) &&
Objects.equals(inferenceprovenance, dataInfo.inferenceprovenance) &&
Objects.equals(provenanceaction, dataInfo.provenanceaction);
return Objects.equals(invisible, dataInfo.invisible)
&& Objects.equals(inferred, dataInfo.inferred)
&& Objects.equals(deletedbyinference, dataInfo.deletedbyinference)
&& Objects.equals(trust, dataInfo.trust)
&& Objects.equals(inferenceprovenance, dataInfo.inferenceprovenance)
&& Objects.equals(provenanceaction, dataInfo.provenanceaction);
}
@Override
public int hashCode() {
return Objects.hash(invisible, inferred, deletedbyinference, trust, inferenceprovenance, provenanceaction);
return Objects.hash(
invisible,
inferred,
deletedbyinference,
trust,
inferenceprovenance,
provenanceaction);
}
}

View File

@ -20,7 +20,7 @@ public class Dataset extends Result implements Serializable {
private List<GeoLocation> geolocation;
public Field<String> getStoragedate() {
public Field<String> getStoragedate() {
return storagedate;
}
@ -80,23 +80,32 @@ public class Dataset extends Result implements Serializable {
public void mergeFrom(OafEntity e) {
super.mergeFrom(e);
if (!Dataset.class.isAssignableFrom(e.getClass())){
if (!Dataset.class.isAssignableFrom(e.getClass())) {
return;
}
final Dataset d = (Dataset) e;
storagedate = d.getStoragedate() != null && compareTrust(this, e)<0? d.getStoragedate() : storagedate;
storagedate =
d.getStoragedate() != null && compareTrust(this, e) < 0
? d.getStoragedate()
: storagedate;
device= d.getDevice() != null && compareTrust(this, e)<0? d.getDevice() : device;
device = d.getDevice() != null && compareTrust(this, e) < 0 ? d.getDevice() : device;
size= d.getSize() != null && compareTrust(this, e)<0? d.getSize() : size;
size = d.getSize() != null && compareTrust(this, e) < 0 ? d.getSize() : size;
version= d.getVersion() != null && compareTrust(this, e)<0? d.getVersion() : version;
version = d.getVersion() != null && compareTrust(this, e) < 0 ? d.getVersion() : version;
lastmetadataupdate= d.getLastmetadataupdate() != null && compareTrust(this, e)<0? d.getLastmetadataupdate() :lastmetadataupdate;
lastmetadataupdate =
d.getLastmetadataupdate() != null && compareTrust(this, e) < 0
? d.getLastmetadataupdate()
: lastmetadataupdate;
metadataversionnumber= d.getMetadataversionnumber() != null && compareTrust(this, e)<0? d.getMetadataversionnumber() : metadataversionnumber;
metadataversionnumber =
d.getMetadataversionnumber() != null && compareTrust(this, e) < 0
? d.getMetadataversionnumber()
: metadataversionnumber;
geolocation = mergeLists(geolocation, d.getGeolocation());
@ -109,17 +118,25 @@ public class Dataset extends Result implements Serializable {
if (o == null || getClass() != o.getClass()) return false;
if (!super.equals(o)) return false;
Dataset dataset = (Dataset) o;
return Objects.equals(storagedate, dataset.storagedate) &&
Objects.equals(device, dataset.device) &&
Objects.equals(size, dataset.size) &&
Objects.equals(version, dataset.version) &&
Objects.equals(lastmetadataupdate, dataset.lastmetadataupdate) &&
Objects.equals(metadataversionnumber, dataset.metadataversionnumber) &&
Objects.equals(geolocation, dataset.geolocation);
return Objects.equals(storagedate, dataset.storagedate)
&& Objects.equals(device, dataset.device)
&& Objects.equals(size, dataset.size)
&& Objects.equals(version, dataset.version)
&& Objects.equals(lastmetadataupdate, dataset.lastmetadataupdate)
&& Objects.equals(metadataversionnumber, dataset.metadataversionnumber)
&& Objects.equals(geolocation, dataset.geolocation);
}
@Override
public int hashCode() {
return Objects.hash(super.hashCode(), storagedate, device, size, version, lastmetadataupdate, metadataversionnumber, geolocation);
return Objects.hash(
super.hashCode(),
storagedate,
device,
size,
version,
lastmetadataupdate,
metadataversionnumber,
geolocation);
}
}

View File

@ -72,7 +72,7 @@ public class Datasource extends OafEntity implements Serializable {
private Field<String> citationguidelineurl;
//{yes, no, uknown}
// {yes, no, uknown}
private Field<String> qualitymanagementkind;
private Field<String> pidsystems;
@ -367,65 +367,148 @@ public class Datasource extends OafEntity implements Serializable {
public void mergeFrom(OafEntity e) {
super.mergeFrom(e);
if (!Datasource.class.isAssignableFrom(e.getClass())){
if (!Datasource.class.isAssignableFrom(e.getClass())) {
return;
}
Datasource d = (Datasource)e;
Datasource d = (Datasource) e;
datasourcetype = d.getDatasourcetype() != null && compareTrust(this, e)<0? d.getDatasourcetype() : datasourcetype;
openairecompatibility = d.getOpenairecompatibility() != null && compareTrust(this, e)<0? d.getOpenairecompatibility() : openairecompatibility;
officialname = d.getOfficialname() != null && compareTrust(this, e)<0? d.getOfficialname() : officialname;
englishname = d.getEnglishname() != null && compareTrust(this, e)<0? d.getEnglishname() : officialname;
websiteurl = d.getWebsiteurl() != null && compareTrust(this, e)<0? d.getWebsiteurl() : websiteurl;
logourl = d.getLogourl() != null && compareTrust(this, e)<0? d.getLogourl() : getLogourl();
contactemail = d.getContactemail() != null && compareTrust(this, e)<0? d.getContactemail() : contactemail;
namespaceprefix = d.getNamespaceprefix() != null && compareTrust(this, e)<0? d.getNamespaceprefix() : namespaceprefix;
latitude = d.getLatitude() != null && compareTrust(this, e)<0? d.getLatitude() : latitude;
longitude = d.getLongitude() != null && compareTrust(this, e)<0? d.getLongitude() : longitude;
dateofvalidation = d.getDateofvalidation() != null && compareTrust(this, e)<0? d.getDateofvalidation() : dateofvalidation;
description = d.getDescription() != null && compareTrust(this, e)<0? d.getDescription() : description;
datasourcetype =
d.getDatasourcetype() != null && compareTrust(this, e) < 0
? d.getDatasourcetype()
: datasourcetype;
openairecompatibility =
d.getOpenairecompatibility() != null && compareTrust(this, e) < 0
? d.getOpenairecompatibility()
: openairecompatibility;
officialname =
d.getOfficialname() != null && compareTrust(this, e) < 0
? d.getOfficialname()
: officialname;
englishname =
d.getEnglishname() != null && compareTrust(this, e) < 0
? d.getEnglishname()
: officialname;
websiteurl =
d.getWebsiteurl() != null && compareTrust(this, e) < 0
? d.getWebsiteurl()
: websiteurl;
logourl =
d.getLogourl() != null && compareTrust(this, e) < 0 ? d.getLogourl() : getLogourl();
contactemail =
d.getContactemail() != null && compareTrust(this, e) < 0
? d.getContactemail()
: contactemail;
namespaceprefix =
d.getNamespaceprefix() != null && compareTrust(this, e) < 0
? d.getNamespaceprefix()
: namespaceprefix;
latitude =
d.getLatitude() != null && compareTrust(this, e) < 0 ? d.getLatitude() : latitude;
longitude =
d.getLongitude() != null && compareTrust(this, e) < 0
? d.getLongitude()
: longitude;
dateofvalidation =
d.getDateofvalidation() != null && compareTrust(this, e) < 0
? d.getDateofvalidation()
: dateofvalidation;
description =
d.getDescription() != null && compareTrust(this, e) < 0
? d.getDescription()
: description;
subjects = mergeLists(subjects, d.getSubjects());
// opendoar specific fields (od*)
odnumberofitems = d.getOdnumberofitems() != null && compareTrust(this, e)<0? d.getOdnumberofitems() : odnumberofitems;
odnumberofitemsdate = d.getOdnumberofitemsdate() != null && compareTrust(this, e)<0? d.getOdnumberofitemsdate() : odnumberofitemsdate;
odpolicies = d.getOdpolicies() != null && compareTrust(this, e)<0? d.getOdpolicies() : odpolicies;
odnumberofitems =
d.getOdnumberofitems() != null && compareTrust(this, e) < 0
? d.getOdnumberofitems()
: odnumberofitems;
odnumberofitemsdate =
d.getOdnumberofitemsdate() != null && compareTrust(this, e) < 0
? d.getOdnumberofitemsdate()
: odnumberofitemsdate;
odpolicies =
d.getOdpolicies() != null && compareTrust(this, e) < 0
? d.getOdpolicies()
: odpolicies;
odlanguages = mergeLists(odlanguages, d.getOdlanguages());
odcontenttypes = mergeLists(odcontenttypes, d.getOdcontenttypes());
accessinfopackage = mergeLists(accessinfopackage, d.getAccessinfopackage());
// re3data fields
releasestartdate = d.getReleasestartdate() != null && compareTrust(this, e)<0? d.getReleasestartdate() : releasestartdate;
releaseenddate = d.getReleaseenddate() != null && compareTrust(this, e)<0? d.getReleaseenddate() : releaseenddate;
missionstatementurl = d.getMissionstatementurl() != null && compareTrust(this, e)<0? d.getMissionstatementurl() : missionstatementurl;
dataprovider = d.getDataprovider() != null && compareTrust(this, e)<0? d.getDataprovider() : dataprovider;
serviceprovider = d.getServiceprovider() != null && compareTrust(this, e)<0? d.getServiceprovider() : serviceprovider;
releasestartdate =
d.getReleasestartdate() != null && compareTrust(this, e) < 0
? d.getReleasestartdate()
: releasestartdate;
releaseenddate =
d.getReleaseenddate() != null && compareTrust(this, e) < 0
? d.getReleaseenddate()
: releaseenddate;
missionstatementurl =
d.getMissionstatementurl() != null && compareTrust(this, e) < 0
? d.getMissionstatementurl()
: missionstatementurl;
dataprovider =
d.getDataprovider() != null && compareTrust(this, e) < 0
? d.getDataprovider()
: dataprovider;
serviceprovider =
d.getServiceprovider() != null && compareTrust(this, e) < 0
? d.getServiceprovider()
: serviceprovider;
// {open, restricted or closed}
databaseaccesstype = d.getDatabaseaccesstype() != null && compareTrust(this, e)<0? d.getDatabaseaccesstype() : databaseaccesstype;
databaseaccesstype =
d.getDatabaseaccesstype() != null && compareTrust(this, e) < 0
? d.getDatabaseaccesstype()
: databaseaccesstype;
// {open, restricted or closed}
datauploadtype = d.getDatauploadtype() != null && compareTrust(this, e)<0? d.getDatauploadtype() : datauploadtype;
datauploadtype =
d.getDatauploadtype() != null && compareTrust(this, e) < 0
? d.getDatauploadtype()
: datauploadtype;
// {feeRequired, registration, other}
databaseaccessrestriction = d.getDatabaseaccessrestriction() != null && compareTrust(this, e)<0? d.getDatabaseaccessrestriction() : databaseaccessrestriction;
databaseaccessrestriction =
d.getDatabaseaccessrestriction() != null && compareTrust(this, e) < 0
? d.getDatabaseaccessrestriction()
: databaseaccessrestriction;
// {feeRequired, registration, other}
datauploadrestriction = d.getDatauploadrestriction() != null && compareTrust(this, e)<0? d.getDatauploadrestriction() : datauploadrestriction;
datauploadrestriction =
d.getDatauploadrestriction() != null && compareTrust(this, e) < 0
? d.getDatauploadrestriction()
: datauploadrestriction;
versioning = d.getVersioning() != null && compareTrust(this, e)<0? d.getVersioning() : versioning;
citationguidelineurl = d.getCitationguidelineurl() != null && compareTrust(this, e)<0? d.getCitationguidelineurl() : citationguidelineurl;
versioning =
d.getVersioning() != null && compareTrust(this, e) < 0
? d.getVersioning()
: versioning;
citationguidelineurl =
d.getCitationguidelineurl() != null && compareTrust(this, e) < 0
? d.getCitationguidelineurl()
: citationguidelineurl;
//{yes, no, unknown}
qualitymanagementkind = d.getQualitymanagementkind() != null && compareTrust(this, e)<0? d.getQualitymanagementkind() : qualitymanagementkind;
pidsystems = d.getPidsystems() != null && compareTrust(this, e)<0? d.getPidsystems() : pidsystems;
// {yes, no, unknown}
qualitymanagementkind =
d.getQualitymanagementkind() != null && compareTrust(this, e) < 0
? d.getQualitymanagementkind()
: qualitymanagementkind;
pidsystems =
d.getPidsystems() != null && compareTrust(this, e) < 0
? d.getPidsystems()
: pidsystems;
certificates = d.getCertificates() != null && compareTrust(this, e)<0? d.getCertificates() : certificates;
certificates =
d.getCertificates() != null && compareTrust(this, e) < 0
? d.getCertificates()
: certificates;
policies = mergeLists(policies, d.getPolicies());
journal = d.getJournal() != null && compareTrust(this, e)<0? d.getJournal() : journal;
journal = d.getJournal() != null && compareTrust(this, e) < 0 ? d.getJournal() : journal;
mergeOAFDataInfo(e);
}
@ -436,45 +519,81 @@ public class Datasource extends OafEntity implements Serializable {
if (o == null || getClass() != o.getClass()) return false;
if (!super.equals(o)) return false;
Datasource that = (Datasource) o;
return Objects.equals(datasourcetype, that.datasourcetype) &&
Objects.equals(openairecompatibility, that.openairecompatibility) &&
Objects.equals(officialname, that.officialname) &&
Objects.equals(englishname, that.englishname) &&
Objects.equals(websiteurl, that.websiteurl) &&
Objects.equals(logourl, that.logourl) &&
Objects.equals(contactemail, that.contactemail) &&
Objects.equals(namespaceprefix, that.namespaceprefix) &&
Objects.equals(latitude, that.latitude) &&
Objects.equals(longitude, that.longitude) &&
Objects.equals(dateofvalidation, that.dateofvalidation) &&
Objects.equals(description, that.description) &&
Objects.equals(subjects, that.subjects) &&
Objects.equals(odnumberofitems, that.odnumberofitems) &&
Objects.equals(odnumberofitemsdate, that.odnumberofitemsdate) &&
Objects.equals(odpolicies, that.odpolicies) &&
Objects.equals(odlanguages, that.odlanguages) &&
Objects.equals(odcontenttypes, that.odcontenttypes) &&
Objects.equals(accessinfopackage, that.accessinfopackage) &&
Objects.equals(releasestartdate, that.releasestartdate) &&
Objects.equals(releaseenddate, that.releaseenddate) &&
Objects.equals(missionstatementurl, that.missionstatementurl) &&
Objects.equals(dataprovider, that.dataprovider) &&
Objects.equals(serviceprovider, that.serviceprovider) &&
Objects.equals(databaseaccesstype, that.databaseaccesstype) &&
Objects.equals(datauploadtype, that.datauploadtype) &&
Objects.equals(databaseaccessrestriction, that.databaseaccessrestriction) &&
Objects.equals(datauploadrestriction, that.datauploadrestriction) &&
Objects.equals(versioning, that.versioning) &&
Objects.equals(citationguidelineurl, that.citationguidelineurl) &&
Objects.equals(qualitymanagementkind, that.qualitymanagementkind) &&
Objects.equals(pidsystems, that.pidsystems) &&
Objects.equals(certificates, that.certificates) &&
Objects.equals(policies, that.policies) &&
Objects.equals(journal, that.journal);
return Objects.equals(datasourcetype, that.datasourcetype)
&& Objects.equals(openairecompatibility, that.openairecompatibility)
&& Objects.equals(officialname, that.officialname)
&& Objects.equals(englishname, that.englishname)
&& Objects.equals(websiteurl, that.websiteurl)
&& Objects.equals(logourl, that.logourl)
&& Objects.equals(contactemail, that.contactemail)
&& Objects.equals(namespaceprefix, that.namespaceprefix)
&& Objects.equals(latitude, that.latitude)
&& Objects.equals(longitude, that.longitude)
&& Objects.equals(dateofvalidation, that.dateofvalidation)
&& Objects.equals(description, that.description)
&& Objects.equals(subjects, that.subjects)
&& Objects.equals(odnumberofitems, that.odnumberofitems)
&& Objects.equals(odnumberofitemsdate, that.odnumberofitemsdate)
&& Objects.equals(odpolicies, that.odpolicies)
&& Objects.equals(odlanguages, that.odlanguages)
&& Objects.equals(odcontenttypes, that.odcontenttypes)
&& Objects.equals(accessinfopackage, that.accessinfopackage)
&& Objects.equals(releasestartdate, that.releasestartdate)
&& Objects.equals(releaseenddate, that.releaseenddate)
&& Objects.equals(missionstatementurl, that.missionstatementurl)
&& Objects.equals(dataprovider, that.dataprovider)
&& Objects.equals(serviceprovider, that.serviceprovider)
&& Objects.equals(databaseaccesstype, that.databaseaccesstype)
&& Objects.equals(datauploadtype, that.datauploadtype)
&& Objects.equals(databaseaccessrestriction, that.databaseaccessrestriction)
&& Objects.equals(datauploadrestriction, that.datauploadrestriction)
&& Objects.equals(versioning, that.versioning)
&& Objects.equals(citationguidelineurl, that.citationguidelineurl)
&& Objects.equals(qualitymanagementkind, that.qualitymanagementkind)
&& Objects.equals(pidsystems, that.pidsystems)
&& Objects.equals(certificates, that.certificates)
&& Objects.equals(policies, that.policies)
&& Objects.equals(journal, that.journal);
}
@Override
public int hashCode() {
return Objects.hash(super.hashCode(), datasourcetype, openairecompatibility, officialname, englishname, websiteurl, logourl, contactemail, namespaceprefix, latitude, longitude, dateofvalidation, description, subjects, odnumberofitems, odnumberofitemsdate, odpolicies, odlanguages, odcontenttypes, accessinfopackage, releasestartdate, releaseenddate, missionstatementurl, dataprovider, serviceprovider, databaseaccesstype, datauploadtype, databaseaccessrestriction, datauploadrestriction, versioning, citationguidelineurl, qualitymanagementkind, pidsystems, certificates, policies, journal);
return Objects.hash(
super.hashCode(),
datasourcetype,
openairecompatibility,
officialname,
englishname,
websiteurl,
logourl,
contactemail,
namespaceprefix,
latitude,
longitude,
dateofvalidation,
description,
subjects,
odnumberofitems,
odnumberofitemsdate,
odpolicies,
odlanguages,
odcontenttypes,
accessinfopackage,
releasestartdate,
releaseenddate,
missionstatementurl,
dataprovider,
serviceprovider,
databaseaccesstype,
datauploadtype,
databaseaccessrestriction,
datauploadrestriction,
versioning,
citationguidelineurl,
qualitymanagementkind,
pidsystems,
certificates,
policies,
journal);
}
}

View File

@ -97,18 +97,19 @@ public class ExternalReference implements Serializable {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
ExternalReference that = (ExternalReference) o;
return Objects.equals(sitename, that.sitename) &&
Objects.equals(label, that.label) &&
Objects.equals(url, that.url) &&
Objects.equals(description, that.description) &&
Objects.equals(qualifier, that.qualifier) &&
Objects.equals(refidentifier, that.refidentifier) &&
Objects.equals(query, that.query) &&
Objects.equals(dataInfo, that.dataInfo);
return Objects.equals(sitename, that.sitename)
&& Objects.equals(label, that.label)
&& Objects.equals(url, that.url)
&& Objects.equals(description, that.description)
&& Objects.equals(qualifier, that.qualifier)
&& Objects.equals(refidentifier, that.refidentifier)
&& Objects.equals(query, that.query)
&& Objects.equals(dataInfo, that.dataInfo);
}
@Override
public int hashCode() {
return Objects.hash(sitename, label, url, description, qualifier, refidentifier, query, dataInfo);
return Objects.hash(
sitename, label, url, description, qualifier, refidentifier, query, dataInfo);
}
}

View File

@ -60,11 +60,11 @@ public class ExtraInfo implements Serializable {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
ExtraInfo extraInfo = (ExtraInfo) o;
return Objects.equals(name, extraInfo.name) &&
Objects.equals(typology, extraInfo.typology) &&
Objects.equals(provenance, extraInfo.provenance) &&
Objects.equals(trust, extraInfo.trust) &&
Objects.equals(value, extraInfo.value);
return Objects.equals(name, extraInfo.name)
&& Objects.equals(typology, extraInfo.typology)
&& Objects.equals(provenance, extraInfo.provenance)
&& Objects.equals(trust, extraInfo.trust)
&& Objects.equals(value, extraInfo.value);
}
@Override

View File

@ -31,12 +31,9 @@ public class Field<T> implements Serializable {
@Override
public boolean equals(Object obj) {
if (this == obj)
return true;
if (obj == null)
return false;
if (getClass() != obj.getClass())
return false;
if (this == obj) return true;
if (obj == null) return false;
if (getClass() != obj.getClass()) return false;
Field<T> other = (Field<T>) obj;
return getValue().equals(other.getValue());
}

View File

@ -1,9 +1,8 @@
package eu.dnetlib.dhp.schema.oaf;
import com.fasterxml.jackson.annotation.JsonIgnore;
import org.apache.commons.lang3.StringUtils;
import java.io.Serializable;
import org.apache.commons.lang3.StringUtils;
public class GeoLocation implements Serializable {
@ -39,13 +38,17 @@ public class GeoLocation implements Serializable {
@JsonIgnore
public boolean isBlank() {
return StringUtils.isBlank(point) &&
StringUtils.isBlank(box) &&
StringUtils.isBlank(place);
return StringUtils.isBlank(point) && StringUtils.isBlank(box) && StringUtils.isBlank(place);
}
public String toComparableString() {
return isBlank()?"":String.format("%s::%s%s", point != null ? point.toLowerCase() : "", box != null ? box.toLowerCase() : "", place != null ? place.toLowerCase() : "");
return isBlank()
? ""
: String.format(
"%s::%s%s",
point != null ? point.toLowerCase() : "",
box != null ? box.toLowerCase() : "",
place != null ? place.toLowerCase() : "");
}
@Override
@ -55,16 +58,12 @@ public class GeoLocation implements Serializable {
@Override
public boolean equals(Object obj) {
if (this == obj)
return true;
if (obj == null)
return false;
if (getClass() != obj.getClass())
return false;
if (this == obj) return true;
if (obj == null) return false;
if (getClass() != obj.getClass()) return false;
GeoLocation other = (GeoLocation) obj;
return toComparableString()
.equals(other.toComparableString());
return toComparableString().equals(other.toComparableString());
}
}

View File

@ -16,19 +16,21 @@ public class Instance implements Serializable {
private List<String> url;
// other research products specifc
private String distributionlocation;
private String distributionlocation;
private KeyValue collectedfrom;
private Field<String> dateofacceptance;
// ( article | book ) processing charges. Defined here to cope with possible wrongly typed results
// ( article | book ) processing charges. Defined here to cope with possible wrongly typed
// results
private Field<String> processingchargeamount;
// currency - alphabetic code describe in ISO-4217. Defined here to cope with possible wrongly typed results
// currency - alphabetic code describe in ISO-4217. Defined here to cope with possible wrongly
// typed results
private Field<String> processingchargecurrency;
private Field<String> refereed; //peer-review status
private Field<String> refereed; // peer-review status
public Field<String> getLicense() {
return license;
@ -118,12 +120,19 @@ public class Instance implements Serializable {
this.refereed = refereed;
}
public String toComparableString(){
return String.format("%s::%s::%s::%s",
hostedby != null && hostedby.getKey()!= null ? hostedby.getKey().toLowerCase() : "",
accessright!= null && accessright.getClassid()!= null ? accessright.getClassid() : "",
instancetype!= null && instancetype.getClassid()!= null ? instancetype.getClassid() : "",
url != null ? url:"");
public String toComparableString() {
return String.format(
"%s::%s::%s::%s",
hostedby != null && hostedby.getKey() != null
? hostedby.getKey().toLowerCase()
: "",
accessright != null && accessright.getClassid() != null
? accessright.getClassid()
: "",
instancetype != null && instancetype.getClassid() != null
? instancetype.getClassid()
: "",
url != null ? url : "");
}
@Override
@ -133,16 +142,12 @@ public class Instance implements Serializable {
@Override
public boolean equals(Object obj) {
if (this == obj)
return true;
if (obj == null)
return false;
if (getClass() != obj.getClass())
return false;
if (this == obj) return true;
if (obj == null) return false;
if (getClass() != obj.getClass()) return false;
Instance other = (Instance) obj;
return toComparableString()
.equals(other.toComparableString());
return toComparableString().equals(other.toComparableString());
}
}

View File

@ -130,22 +130,34 @@ public class Journal implements Serializable {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
Journal journal = (Journal) o;
return Objects.equals(name, journal.name) &&
Objects.equals(issnPrinted, journal.issnPrinted) &&
Objects.equals(issnOnline, journal.issnOnline) &&
Objects.equals(issnLinking, journal.issnLinking) &&
Objects.equals(ep, journal.ep) &&
Objects.equals(iss, journal.iss) &&
Objects.equals(sp, journal.sp) &&
Objects.equals(vol, journal.vol) &&
Objects.equals(edition, journal.edition) &&
Objects.equals(conferenceplace, journal.conferenceplace) &&
Objects.equals(conferencedate, journal.conferencedate) &&
Objects.equals(dataInfo, journal.dataInfo);
return Objects.equals(name, journal.name)
&& Objects.equals(issnPrinted, journal.issnPrinted)
&& Objects.equals(issnOnline, journal.issnOnline)
&& Objects.equals(issnLinking, journal.issnLinking)
&& Objects.equals(ep, journal.ep)
&& Objects.equals(iss, journal.iss)
&& Objects.equals(sp, journal.sp)
&& Objects.equals(vol, journal.vol)
&& Objects.equals(edition, journal.edition)
&& Objects.equals(conferenceplace, journal.conferenceplace)
&& Objects.equals(conferencedate, journal.conferencedate)
&& Objects.equals(dataInfo, journal.dataInfo);
}
@Override
public int hashCode() {
return Objects.hash(name, issnPrinted, issnOnline, issnLinking, ep, iss, sp, vol, edition, conferenceplace, conferencedate, dataInfo);
return Objects.hash(
name,
issnPrinted,
issnOnline,
issnLinking,
ep,
iss,
sp,
vol,
edition,
conferenceplace,
conferencedate,
dataInfo);
}
}

View File

@ -1,9 +1,8 @@
package eu.dnetlib.dhp.schema.oaf;
import com.fasterxml.jackson.annotation.JsonIgnore;
import org.apache.commons.lang3.StringUtils;
import java.io.Serializable;
import org.apache.commons.lang3.StringUtils;
public class KeyValue implements Serializable {
@ -38,7 +37,12 @@ public class KeyValue implements Serializable {
}
public String toComparableString() {
return isBlank()?"":String.format("%s::%s", key != null ? key.toLowerCase() : "", value != null ? value.toLowerCase() : "");
return isBlank()
? ""
: String.format(
"%s::%s",
key != null ? key.toLowerCase() : "",
value != null ? value.toLowerCase() : "");
}
@JsonIgnore
@ -53,12 +57,9 @@ public class KeyValue implements Serializable {
@Override
public boolean equals(Object obj) {
if (this == obj)
return true;
if (obj == null)
return false;
if (getClass() != obj.getClass())
return false;
if (this == obj) return true;
if (obj == null) return false;
if (getClass() != obj.getClass()) return false;
KeyValue other = (KeyValue) obj;

View File

@ -3,7 +3,7 @@ package eu.dnetlib.dhp.schema.oaf;
import java.io.Serializable;
import java.util.Objects;
public class OAIProvenance implements Serializable {
public class OAIProvenance implements Serializable {
private OriginDescription originDescription;

View File

@ -1,14 +1,25 @@
package eu.dnetlib.dhp.schema.oaf;
import java.io.Serializable;
import java.util.List;
import java.util.Objects;
public abstract class Oaf implements Serializable {
protected List<KeyValue> collectedfrom;
private DataInfo dataInfo;
private Long lastupdatetimestamp;
public List<KeyValue> getCollectedfrom() {
return collectedfrom;
}
public void setCollectedfrom(List<KeyValue> collectedfrom) {
this.collectedfrom = collectedfrom;
}
public DataInfo getDataInfo() {
return dataInfo;
}
@ -25,24 +36,18 @@ public abstract class Oaf implements Serializable {
this.lastupdatetimestamp = lastupdatetimestamp;
}
public void mergeOAFDataInfo(Oaf e) {
if (e.getDataInfo()!= null && compareTrust(this,e)<0)
dataInfo = e.getDataInfo();
if (e.getDataInfo() != null && compareTrust(this, e) < 0) dataInfo = e.getDataInfo();
}
protected String extractTrust(Oaf e) {
if (e == null || e.getDataInfo()== null || e.getDataInfo().getTrust()== null)
return "0.0";
if (e == null || e.getDataInfo() == null || e.getDataInfo().getTrust() == null)
return "0.0";
return e.getDataInfo().getTrust();
}
protected int compareTrust(Oaf a, Oaf b) {
return extractTrust(a).compareTo(extractTrust(b));
}
@Override
@ -50,8 +55,8 @@ public abstract class Oaf implements Serializable {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
Oaf oaf = (Oaf) o;
return Objects.equals(dataInfo, oaf.dataInfo) &&
Objects.equals(lastupdatetimestamp, oaf.lastupdatetimestamp);
return Objects.equals(dataInfo, oaf.dataInfo)
&& Objects.equals(lastupdatetimestamp, oaf.lastupdatetimestamp);
}
@Override

View File

@ -10,8 +10,6 @@ public abstract class OafEntity extends Oaf implements Serializable {
private List<String> originalId;
private List<KeyValue> collectedfrom;
private List<StructuredProperty> pid;
private String dateofcollection;
@ -38,14 +36,6 @@ public abstract class OafEntity extends Oaf implements Serializable {
this.originalId = originalId;
}
public List<KeyValue> getCollectedfrom() {
return collectedfrom;
}
public void setCollectedfrom(List<KeyValue> collectedfrom) {
this.collectedfrom = collectedfrom;
}
public List<StructuredProperty> getPid() {
return pid;
}
@ -86,11 +76,9 @@ public abstract class OafEntity extends Oaf implements Serializable {
this.oaiprovenance = oaiprovenance;
}
public void mergeFrom(OafEntity e) {
if (e == null)
return;
if (e == null) return;
originalId = mergeLists(originalId, e.getOriginalId());
@ -108,12 +96,15 @@ public abstract class OafEntity extends Oaf implements Serializable {
if (e.getOaiprovenance() != null && compareTrust(this, e) < 0)
oaiprovenance = e.getOaiprovenance();
}
protected <T> List<T> mergeLists(final List<T>... lists) {
return Arrays.stream(lists).filter(Objects::nonNull).flatMap(List::stream).distinct().collect(Collectors.toList());
return Arrays.stream(lists)
.filter(Objects::nonNull)
.flatMap(List::stream)
.distinct()
.collect(Collectors.toList());
}
@Override
@ -122,18 +113,27 @@ public abstract class OafEntity extends Oaf implements Serializable {
if (o == null || getClass() != o.getClass()) return false;
if (!super.equals(o)) return false;
OafEntity oafEntity = (OafEntity) o;
return Objects.equals(id, oafEntity.id) &&
Objects.equals(originalId, oafEntity.originalId) &&
Objects.equals(collectedfrom, oafEntity.collectedfrom) &&
Objects.equals(pid, oafEntity.pid) &&
Objects.equals(dateofcollection, oafEntity.dateofcollection) &&
Objects.equals(dateoftransformation, oafEntity.dateoftransformation) &&
Objects.equals(extraInfo, oafEntity.extraInfo) &&
Objects.equals(oaiprovenance, oafEntity.oaiprovenance);
return Objects.equals(id, oafEntity.id)
&& Objects.equals(originalId, oafEntity.originalId)
&& Objects.equals(collectedfrom, oafEntity.collectedfrom)
&& Objects.equals(pid, oafEntity.pid)
&& Objects.equals(dateofcollection, oafEntity.dateofcollection)
&& Objects.equals(dateoftransformation, oafEntity.dateoftransformation)
&& Objects.equals(extraInfo, oafEntity.extraInfo)
&& Objects.equals(oaiprovenance, oafEntity.oaiprovenance);
}
@Override
public int hashCode() {
return Objects.hash(super.hashCode(), id, originalId, collectedfrom, pid, dateofcollection, dateoftransformation, extraInfo, oaiprovenance);
return Objects.hash(
super.hashCode(),
id,
originalId,
collectedfrom,
pid,
dateofcollection,
dateoftransformation,
extraInfo,
oaiprovenance);
}
}

View File

@ -122,7 +122,8 @@ public class Organization extends OafEntity implements Serializable {
return ecinternationalorganizationeurinterests;
}
public void setEcinternationalorganizationeurinterests(Field<String> ecinternationalorganizationeurinterests) {
public void setEcinternationalorganizationeurinterests(
Field<String> ecinternationalorganizationeurinterests) {
this.ecinternationalorganizationeurinterests = ecinternationalorganizationeurinterests;
}
@ -166,32 +167,70 @@ public class Organization extends OafEntity implements Serializable {
this.country = country;
}
@Override
public void mergeFrom(OafEntity e) {
super.mergeFrom(e);
if (!Organization.class.isAssignableFrom(e.getClass())){
if (!Organization.class.isAssignableFrom(e.getClass())) {
return;
}
final Organization o = (Organization) e;
legalshortname = o.getLegalshortname() != null && compareTrust(this, e)<0? o.getLegalshortname() : legalshortname;
legalname = o.getLegalname() != null && compareTrust(this, e)<0 ? o.getLegalname() : legalname;
legalshortname =
o.getLegalshortname() != null && compareTrust(this, e) < 0
? o.getLegalshortname()
: legalshortname;
legalname =
o.getLegalname() != null && compareTrust(this, e) < 0
? o.getLegalname()
: legalname;
alternativeNames = mergeLists(o.getAlternativeNames(), alternativeNames);
websiteurl = o.getWebsiteurl() != null && compareTrust(this, e)<0? o.getWebsiteurl() : websiteurl;
logourl = o.getLogourl() != null && compareTrust(this, e)<0? o.getLogourl() : logourl;
eclegalbody = o.getEclegalbody() != null && compareTrust(this, e)<0? o.getEclegalbody() : eclegalbody;
eclegalperson = o.getEclegalperson() != null && compareTrust(this, e)<0? o.getEclegalperson() : eclegalperson;
ecnonprofit = o.getEcnonprofit() != null && compareTrust(this, e)<0? o.getEcnonprofit() : ecnonprofit;
ecresearchorganization = o.getEcresearchorganization() != null && compareTrust(this, e)<0? o.getEcresearchorganization() : ecresearchorganization;
echighereducation = o.getEchighereducation() != null && compareTrust(this, e)<0? o.getEchighereducation() : echighereducation;
ecinternationalorganizationeurinterests = o.getEcinternationalorganizationeurinterests() != null && compareTrust(this, e)<0? o.getEcinternationalorganizationeurinterests() : ecinternationalorganizationeurinterests;
ecinternationalorganization = o.getEcinternationalorganization() != null && compareTrust(this, e)<0? o.getEcinternationalorganization() : ecinternationalorganization;
ecenterprise = o.getEcenterprise() != null && compareTrust(this, e)<0? o.getEcenterprise() :ecenterprise;
ecsmevalidated = o.getEcsmevalidated() != null && compareTrust(this, e)<0? o.getEcsmevalidated() :ecsmevalidated;
ecnutscode = o.getEcnutscode() != null && compareTrust(this, e)<0? o.getEcnutscode() :ecnutscode;
country = o.getCountry() != null && compareTrust(this, e)<0 ? o.getCountry() :country;
websiteurl =
o.getWebsiteurl() != null && compareTrust(this, e) < 0
? o.getWebsiteurl()
: websiteurl;
logourl = o.getLogourl() != null && compareTrust(this, e) < 0 ? o.getLogourl() : logourl;
eclegalbody =
o.getEclegalbody() != null && compareTrust(this, e) < 0
? o.getEclegalbody()
: eclegalbody;
eclegalperson =
o.getEclegalperson() != null && compareTrust(this, e) < 0
? o.getEclegalperson()
: eclegalperson;
ecnonprofit =
o.getEcnonprofit() != null && compareTrust(this, e) < 0
? o.getEcnonprofit()
: ecnonprofit;
ecresearchorganization =
o.getEcresearchorganization() != null && compareTrust(this, e) < 0
? o.getEcresearchorganization()
: ecresearchorganization;
echighereducation =
o.getEchighereducation() != null && compareTrust(this, e) < 0
? o.getEchighereducation()
: echighereducation;
ecinternationalorganizationeurinterests =
o.getEcinternationalorganizationeurinterests() != null && compareTrust(this, e) < 0
? o.getEcinternationalorganizationeurinterests()
: ecinternationalorganizationeurinterests;
ecinternationalorganization =
o.getEcinternationalorganization() != null && compareTrust(this, e) < 0
? o.getEcinternationalorganization()
: ecinternationalorganization;
ecenterprise =
o.getEcenterprise() != null && compareTrust(this, e) < 0
? o.getEcenterprise()
: ecenterprise;
ecsmevalidated =
o.getEcsmevalidated() != null && compareTrust(this, e) < 0
? o.getEcsmevalidated()
: ecsmevalidated;
ecnutscode =
o.getEcnutscode() != null && compareTrust(this, e) < 0
? o.getEcnutscode()
: ecnutscode;
country = o.getCountry() != null && compareTrust(this, e) < 0 ? o.getCountry() : country;
mergeOAFDataInfo(o);
}
@ -201,26 +240,45 @@ public class Organization extends OafEntity implements Serializable {
if (o == null || getClass() != o.getClass()) return false;
if (!super.equals(o)) return false;
Organization that = (Organization) o;
return Objects.equals(legalshortname, that.legalshortname) &&
Objects.equals(legalname, that.legalname) &&
Objects.equals(alternativeNames, that.alternativeNames) &&
Objects.equals(websiteurl, that.websiteurl) &&
Objects.equals(logourl, that.logourl) &&
Objects.equals(eclegalbody, that.eclegalbody) &&
Objects.equals(eclegalperson, that.eclegalperson) &&
Objects.equals(ecnonprofit, that.ecnonprofit) &&
Objects.equals(ecresearchorganization, that.ecresearchorganization) &&
Objects.equals(echighereducation, that.echighereducation) &&
Objects.equals(ecinternationalorganizationeurinterests, that.ecinternationalorganizationeurinterests) &&
Objects.equals(ecinternationalorganization, that.ecinternationalorganization) &&
Objects.equals(ecenterprise, that.ecenterprise) &&
Objects.equals(ecsmevalidated, that.ecsmevalidated) &&
Objects.equals(ecnutscode, that.ecnutscode) &&
Objects.equals(country, that.country);
return Objects.equals(legalshortname, that.legalshortname)
&& Objects.equals(legalname, that.legalname)
&& Objects.equals(alternativeNames, that.alternativeNames)
&& Objects.equals(websiteurl, that.websiteurl)
&& Objects.equals(logourl, that.logourl)
&& Objects.equals(eclegalbody, that.eclegalbody)
&& Objects.equals(eclegalperson, that.eclegalperson)
&& Objects.equals(ecnonprofit, that.ecnonprofit)
&& Objects.equals(ecresearchorganization, that.ecresearchorganization)
&& Objects.equals(echighereducation, that.echighereducation)
&& Objects.equals(
ecinternationalorganizationeurinterests,
that.ecinternationalorganizationeurinterests)
&& Objects.equals(ecinternationalorganization, that.ecinternationalorganization)
&& Objects.equals(ecenterprise, that.ecenterprise)
&& Objects.equals(ecsmevalidated, that.ecsmevalidated)
&& Objects.equals(ecnutscode, that.ecnutscode)
&& Objects.equals(country, that.country);
}
@Override
public int hashCode() {
return Objects.hash(super.hashCode(), legalshortname, legalname, alternativeNames, websiteurl, logourl, eclegalbody, eclegalperson, ecnonprofit, ecresearchorganization, echighereducation, ecinternationalorganizationeurinterests, ecinternationalorganization, ecenterprise, ecsmevalidated, ecnutscode, country);
return Objects.hash(
super.hashCode(),
legalshortname,
legalname,
alternativeNames,
websiteurl,
logourl,
eclegalbody,
eclegalperson,
ecnonprofit,
ecresearchorganization,
echighereducation,
ecinternationalorganizationeurinterests,
ecinternationalorganization,
ecenterprise,
ecsmevalidated,
ecnutscode,
country);
}
}

View File

@ -70,16 +70,17 @@ public class OriginDescription implements Serializable {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
OriginDescription that = (OriginDescription) o;
return Objects.equals(harvestDate, that.harvestDate) &&
Objects.equals(altered, that.altered) &&
Objects.equals(baseURL, that.baseURL) &&
Objects.equals(identifier, that.identifier) &&
Objects.equals(datestamp, that.datestamp) &&
Objects.equals(metadataNamespace, that.metadataNamespace);
return Objects.equals(harvestDate, that.harvestDate)
&& Objects.equals(altered, that.altered)
&& Objects.equals(baseURL, that.baseURL)
&& Objects.equals(identifier, that.identifier)
&& Objects.equals(datestamp, that.datestamp)
&& Objects.equals(metadataNamespace, that.metadataNamespace);
}
@Override
public int hashCode() {
return Objects.hash(harvestDate, altered, baseURL, identifier, datestamp, metadataNamespace);
return Objects.hash(
harvestDate, altered, baseURL, identifier, datestamp, metadataNamespace);
}
}

View File

@ -40,11 +40,11 @@ public class OtherResearchProduct extends Result implements Serializable {
public void mergeFrom(OafEntity e) {
super.mergeFrom(e);
if (!OtherResearchProduct.class.isAssignableFrom(e.getClass())){
if (!OtherResearchProduct.class.isAssignableFrom(e.getClass())) {
return;
}
OtherResearchProduct o = (OtherResearchProduct)e;
OtherResearchProduct o = (OtherResearchProduct) e;
contactperson = mergeLists(contactperson, o.getContactperson());
contactgroup = mergeLists(contactgroup, o.getContactgroup());
@ -58,9 +58,9 @@ public class OtherResearchProduct extends Result implements Serializable {
if (o == null || getClass() != o.getClass()) return false;
if (!super.equals(o)) return false;
OtherResearchProduct that = (OtherResearchProduct) o;
return Objects.equals(contactperson, that.contactperson) &&
Objects.equals(contactgroup, that.contactgroup) &&
Objects.equals(tool, that.tool);
return Objects.equals(contactperson, that.contactperson)
&& Objects.equals(contactgroup, that.contactgroup)
&& Objects.equals(tool, that.tool);
}
@Override

View File

@ -266,44 +266,91 @@ public class Project extends OafEntity implements Serializable {
this.fundedamount = fundedamount;
}
@Override
public void mergeFrom(OafEntity e) {
super.mergeFrom(e);
if (!Project.class.isAssignableFrom(e.getClass())){
if (!Project.class.isAssignableFrom(e.getClass())) {
return;
}
Project p = (Project)e;
Project p = (Project) e;
websiteurl= p.getWebsiteurl()!= null && compareTrust(this,e)<0?p.getWebsiteurl():websiteurl;
code= p.getCode()!=null && compareTrust(this,e)<0?p.getCode():code;
acronym= p.getAcronym()!= null && compareTrust(this,e)<0?p.getAcronym():acronym;
title= p.getTitle()!= null && compareTrust(this,e)<0?p.getTitle():title;
startdate= p.getStartdate()!=null && compareTrust(this,e)<0?p.getStartdate():startdate;
enddate= p.getEnddate()!=null && compareTrust(this,e)<0?p.getEnddate():enddate;
callidentifier= p.getCallidentifier()!=null && compareTrust(this,e)<0?p.getCallidentifier():callidentifier;
keywords= p.getKeywords()!=null && compareTrust(this,e)<0?p.getKeywords():keywords;
duration= p.getDuration()!=null && compareTrust(this,e)<0?p.getDuration():duration;
ecsc39= p.getEcsc39()!=null && compareTrust(this,e)<0?p.getEcsc39():ecsc39;
oamandatepublications= p.getOamandatepublications()!=null && compareTrust(this,e)<0?p.getOamandatepublications():oamandatepublications;
ecarticle29_3= p.getEcarticle29_3()!=null && compareTrust(this,e)<0?p.getEcarticle29_3():ecarticle29_3;
subjects= mergeLists(subjects, p.getSubjects());
fundingtree= mergeLists(fundingtree, p.getFundingtree());
contracttype= p.getContracttype()!=null && compareTrust(this,e)<0?p.getContracttype():contracttype;
optional1= p.getOptional1()!=null && compareTrust(this,e)<0?p.getOptional1():optional1;
optional2= p.getOptional2()!=null && compareTrust(this,e)<0?p.getOptional2():optional2;
jsonextrainfo= p.getJsonextrainfo()!=null && compareTrust(this,e)<0?p.getJsonextrainfo():jsonextrainfo;
contactfullname= p.getContactfullname()!=null && compareTrust(this,e)<0?p.getContactfullname():contactfullname;
contactfax= p.getContactfax()!=null && compareTrust(this,e)<0?p.getContactfax():contactfax;
contactphone= p.getContactphone()!=null && compareTrust(this,e)<0?p.getContactphone():contactphone;
contactemail= p.getContactemail()!=null && compareTrust(this,e)<0?p.getContactemail():contactemail;
summary= p.getSummary()!=null && compareTrust(this,e)<0?p.getSummary():summary;
currency= p.getCurrency()!=null && compareTrust(this,e)<0?p.getCurrency():currency;
totalcost= p.getTotalcost()!=null && compareTrust(this,e)<0?p.getTotalcost():totalcost;
fundedamount= p.getFundedamount()!= null && compareTrust(this,e)<0?p.getFundedamount():fundedamount;
mergeOAFDataInfo(e);
websiteurl =
p.getWebsiteurl() != null && compareTrust(this, e) < 0
? p.getWebsiteurl()
: websiteurl;
code = p.getCode() != null && compareTrust(this, e) < 0 ? p.getCode() : code;
acronym = p.getAcronym() != null && compareTrust(this, e) < 0 ? p.getAcronym() : acronym;
title = p.getTitle() != null && compareTrust(this, e) < 0 ? p.getTitle() : title;
startdate =
p.getStartdate() != null && compareTrust(this, e) < 0
? p.getStartdate()
: startdate;
enddate = p.getEnddate() != null && compareTrust(this, e) < 0 ? p.getEnddate() : enddate;
callidentifier =
p.getCallidentifier() != null && compareTrust(this, e) < 0
? p.getCallidentifier()
: callidentifier;
keywords =
p.getKeywords() != null && compareTrust(this, e) < 0 ? p.getKeywords() : keywords;
duration =
p.getDuration() != null && compareTrust(this, e) < 0 ? p.getDuration() : duration;
ecsc39 = p.getEcsc39() != null && compareTrust(this, e) < 0 ? p.getEcsc39() : ecsc39;
oamandatepublications =
p.getOamandatepublications() != null && compareTrust(this, e) < 0
? p.getOamandatepublications()
: oamandatepublications;
ecarticle29_3 =
p.getEcarticle29_3() != null && compareTrust(this, e) < 0
? p.getEcarticle29_3()
: ecarticle29_3;
subjects = mergeLists(subjects, p.getSubjects());
fundingtree = mergeLists(fundingtree, p.getFundingtree());
contracttype =
p.getContracttype() != null && compareTrust(this, e) < 0
? p.getContracttype()
: contracttype;
optional1 =
p.getOptional1() != null && compareTrust(this, e) < 0
? p.getOptional1()
: optional1;
optional2 =
p.getOptional2() != null && compareTrust(this, e) < 0
? p.getOptional2()
: optional2;
jsonextrainfo =
p.getJsonextrainfo() != null && compareTrust(this, e) < 0
? p.getJsonextrainfo()
: jsonextrainfo;
contactfullname =
p.getContactfullname() != null && compareTrust(this, e) < 0
? p.getContactfullname()
: contactfullname;
contactfax =
p.getContactfax() != null && compareTrust(this, e) < 0
? p.getContactfax()
: contactfax;
contactphone =
p.getContactphone() != null && compareTrust(this, e) < 0
? p.getContactphone()
: contactphone;
contactemail =
p.getContactemail() != null && compareTrust(this, e) < 0
? p.getContactemail()
: contactemail;
summary = p.getSummary() != null && compareTrust(this, e) < 0 ? p.getSummary() : summary;
currency =
p.getCurrency() != null && compareTrust(this, e) < 0 ? p.getCurrency() : currency;
totalcost =
p.getTotalcost() != null && compareTrust(this, e) < 0
? p.getTotalcost()
: totalcost;
fundedamount =
p.getFundedamount() != null && compareTrust(this, e) < 0
? p.getFundedamount()
: fundedamount;
mergeOAFDataInfo(e);
}
@Override
@ -312,36 +359,63 @@ public class Project extends OafEntity implements Serializable {
if (o == null || getClass() != o.getClass()) return false;
if (!super.equals(o)) return false;
Project project = (Project) o;
return Objects.equals(websiteurl, project.websiteurl) &&
Objects.equals(code, project.code) &&
Objects.equals(acronym, project.acronym) &&
Objects.equals(title, project.title) &&
Objects.equals(startdate, project.startdate) &&
Objects.equals(enddate, project.enddate) &&
Objects.equals(callidentifier, project.callidentifier) &&
Objects.equals(keywords, project.keywords) &&
Objects.equals(duration, project.duration) &&
Objects.equals(ecsc39, project.ecsc39) &&
Objects.equals(oamandatepublications, project.oamandatepublications) &&
Objects.equals(ecarticle29_3, project.ecarticle29_3) &&
Objects.equals(subjects, project.subjects) &&
Objects.equals(fundingtree, project.fundingtree) &&
Objects.equals(contracttype, project.contracttype) &&
Objects.equals(optional1, project.optional1) &&
Objects.equals(optional2, project.optional2) &&
Objects.equals(jsonextrainfo, project.jsonextrainfo) &&
Objects.equals(contactfullname, project.contactfullname) &&
Objects.equals(contactfax, project.contactfax) &&
Objects.equals(contactphone, project.contactphone) &&
Objects.equals(contactemail, project.contactemail) &&
Objects.equals(summary, project.summary) &&
Objects.equals(currency, project.currency) &&
Objects.equals(totalcost, project.totalcost) &&
Objects.equals(fundedamount, project.fundedamount);
return Objects.equals(websiteurl, project.websiteurl)
&& Objects.equals(code, project.code)
&& Objects.equals(acronym, project.acronym)
&& Objects.equals(title, project.title)
&& Objects.equals(startdate, project.startdate)
&& Objects.equals(enddate, project.enddate)
&& Objects.equals(callidentifier, project.callidentifier)
&& Objects.equals(keywords, project.keywords)
&& Objects.equals(duration, project.duration)
&& Objects.equals(ecsc39, project.ecsc39)
&& Objects.equals(oamandatepublications, project.oamandatepublications)
&& Objects.equals(ecarticle29_3, project.ecarticle29_3)
&& Objects.equals(subjects, project.subjects)
&& Objects.equals(fundingtree, project.fundingtree)
&& Objects.equals(contracttype, project.contracttype)
&& Objects.equals(optional1, project.optional1)
&& Objects.equals(optional2, project.optional2)
&& Objects.equals(jsonextrainfo, project.jsonextrainfo)
&& Objects.equals(contactfullname, project.contactfullname)
&& Objects.equals(contactfax, project.contactfax)
&& Objects.equals(contactphone, project.contactphone)
&& Objects.equals(contactemail, project.contactemail)
&& Objects.equals(summary, project.summary)
&& Objects.equals(currency, project.currency)
&& Objects.equals(totalcost, project.totalcost)
&& Objects.equals(fundedamount, project.fundedamount);
}
@Override
public int hashCode() {
return Objects.hash(super.hashCode(), websiteurl, code, acronym, title, startdate, enddate, callidentifier, keywords, duration, ecsc39, oamandatepublications, ecarticle29_3, subjects, fundingtree, contracttype, optional1, optional2, jsonextrainfo, contactfullname, contactfax, contactphone, contactemail, summary, currency, totalcost, fundedamount);
return Objects.hash(
super.hashCode(),
websiteurl,
code,
acronym,
title,
startdate,
enddate,
callidentifier,
keywords,
duration,
ecsc39,
oamandatepublications,
ecarticle29_3,
subjects,
fundingtree,
contracttype,
optional1,
optional2,
jsonextrainfo,
contactfullname,
contactfax,
contactphone,
contactemail,
summary,
currency,
totalcost,
fundedamount);
}
}

View File

@ -20,14 +20,13 @@ public class Publication extends Result implements Serializable {
public void mergeFrom(OafEntity e) {
super.mergeFrom(e);
if (!Publication.class.isAssignableFrom(e.getClass())){
if (!Publication.class.isAssignableFrom(e.getClass())) {
return;
}
Publication p = (Publication) e;
if (p.getJournal() != null && compareTrust(this, e)<0)
journal = p.getJournal();
if (p.getJournal() != null && compareTrust(this, e) < 0) journal = p.getJournal();
mergeOAFDataInfo(e);
}

View File

@ -1,9 +1,8 @@
package eu.dnetlib.dhp.schema.oaf;
import com.fasterxml.jackson.annotation.JsonIgnore;
import org.apache.commons.lang3.StringUtils;
import java.io.Serializable;
import org.apache.commons.lang3.StringUtils;
public class Qualifier implements Serializable {
@ -45,20 +44,24 @@ public class Qualifier implements Serializable {
}
public String toComparableString() {
return isBlank()?"": String.format("%s::%s::%s::%s",
classid != null ? classid : "",
classname != null ? classname : "",
schemeid != null ? schemeid : "",
schemename != null ? schemename : "");
return isBlank()
? ""
: String.format(
"%s::%s::%s::%s",
classid != null ? classid : "",
classname != null ? classname : "",
schemeid != null ? schemeid : "",
schemename != null ? schemename : "");
}
@JsonIgnore
public boolean isBlank() {
return StringUtils.isBlank(classid) &&
StringUtils.isBlank(classname) &&
StringUtils.isBlank(schemeid) &&
StringUtils.isBlank(schemename);
return StringUtils.isBlank(classid)
&& StringUtils.isBlank(classname)
&& StringUtils.isBlank(schemeid)
&& StringUtils.isBlank(schemename);
}
@Override
public int hashCode() {
return toComparableString().hashCode();
@ -66,16 +69,12 @@ public class Qualifier implements Serializable {
@Override
public boolean equals(Object obj) {
if (this == obj)
return true;
if (obj == null)
return false;
if (getClass() != obj.getClass())
return false;
if (this == obj) return true;
if (obj == null) return false;
if (getClass() != obj.getClass()) return false;
Qualifier other = (Qualifier) obj;
return toComparableString()
.equals(other.toComparableString());
return toComparableString().equals(other.toComparableString());
}
}

View File

@ -1,104 +1,98 @@
package eu.dnetlib.dhp.schema.oaf;
import static com.google.common.base.Preconditions.checkArgument;
import java.util.*;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import static com.google.common.base.Preconditions.checkArgument;
public class Relation extends Oaf {
private String relType;
private String relType;
private String subRelType;
private String subRelType;
private String relClass;
private String relClass;
private String source;
private String source;
private String target;
private String target;
private List<KeyValue> collectedFrom = new ArrayList<>();
public String getRelType() {
return relType;
}
public String getRelType() {
return relType;
}
public void setRelType(final String relType) {
this.relType = relType;
}
public void setRelType(final String relType) {
this.relType = relType;
}
public String getSubRelType() {
return subRelType;
}
public String getSubRelType() {
return subRelType;
}
public void setSubRelType(final String subRelType) {
this.subRelType = subRelType;
}
public void setSubRelType(final String subRelType) {
this.subRelType = subRelType;
}
public String getRelClass() {
return relClass;
}
public String getRelClass() {
return relClass;
}
public void setRelClass(final String relClass) {
this.relClass = relClass;
}
public void setRelClass(final String relClass) {
this.relClass = relClass;
}
public String getSource() {
return source;
}
public String getSource() {
return source;
}
public void setSource(final String source) {
this.source = source;
}
public void setSource(final String source) {
this.source = source;
}
public String getTarget() {
return target;
}
public String getTarget() {
return target;
}
public void setTarget(final String target) {
this.target = target;
}
public void setTarget(final String target) {
this.target = target;
}
public void mergeFrom(final Relation r) {
public List<KeyValue> getCollectedFrom() {
return collectedFrom;
}
checkArgument(Objects.equals(getSource(), r.getSource()), "source ids must be equal");
checkArgument(Objects.equals(getTarget(), r.getTarget()), "target ids must be equal");
checkArgument(Objects.equals(getRelType(), r.getRelType()), "relType(s) must be equal");
checkArgument(
Objects.equals(getSubRelType(), r.getSubRelType()), "subRelType(s) must be equal");
checkArgument(Objects.equals(getRelClass(), r.getRelClass()), "relClass(es) must be equal");
public void setCollectedFrom(final List<KeyValue> collectedFrom) {
this.collectedFrom = collectedFrom;
}
setCollectedfrom(
Stream.concat(
Optional.ofNullable(getCollectedfrom())
.map(Collection::stream)
.orElse(Stream.empty()),
Optional.ofNullable(r.getCollectedfrom())
.map(Collection::stream)
.orElse(Stream.empty()))
.distinct() // relies on KeyValue.equals
.collect(Collectors.toList()));
}
public void mergeFrom(final Relation r) {
checkArgument(Objects.equals(getSource(), r.getSource()),"source ids must be equal");
checkArgument(Objects.equals(getTarget(), r.getTarget()),"target ids must be equal");
checkArgument(Objects.equals(getRelType(), r.getRelType()),"relType(s) must be equal");
checkArgument(Objects.equals(getSubRelType(), r.getSubRelType()),"subRelType(s) must be equal");
checkArgument(Objects.equals(getRelClass(), r.getRelClass()),"relClass(es) must be equal");
setCollectedFrom(
Stream
.concat(Optional.ofNullable(getCollectedFrom()).map(Collection::stream).orElse(Stream.empty()),
Optional.ofNullable(r.getCollectedFrom()).map(Collection::stream).orElse(Stream.empty()))
.distinct() // relies on KeyValue.equals
.collect(Collectors.toList()));
}
@Override
public boolean equals(Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
Relation relation = (Relation) o;
return relType.equals(relation.relType) &&
subRelType.equals(relation.subRelType) &&
relClass.equals(relation.relClass) &&
source.equals(relation.source) &&
target.equals(relation.target);
}
@Override
public int hashCode() {
return Objects.hash(relType, subRelType, relClass, source, target, collectedFrom);
}
@Override
public boolean equals(Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
Relation relation = (Relation) o;
return relType.equals(relation.relType)
&& subRelType.equals(relation.subRelType)
&& relClass.equals(relation.relClass)
&& source.equals(relation.source)
&& target.equals(relation.target);
}
@Override
public int hashCode() {
return Objects.hash(relType, subRelType, relClass, source, target, collectedfrom);
}
}

View File

@ -223,7 +223,7 @@ public class Result extends OafEntity implements Serializable {
public void mergeFrom(OafEntity e) {
super.mergeFrom(e);
if (!Result.class.isAssignableFrom(e.getClass())){
if (!Result.class.isAssignableFrom(e.getClass())) {
return;
}
@ -231,11 +231,9 @@ public class Result extends OafEntity implements Serializable {
instance = mergeLists(instance, r.getInstance());
if (r.getResulttype() != null && compareTrust(this, r) < 0)
resulttype = r.getResulttype();
if (r.getResulttype() != null && compareTrust(this, r) < 0) resulttype = r.getResulttype();
if (r.getLanguage() != null && compareTrust(this, r) < 0)
language = r.getLanguage();
if (r.getLanguage() != null && compareTrust(this, r) < 0) language = r.getLanguage();
country = mergeLists(country, r.getCountry());
@ -247,8 +245,7 @@ public class Result extends OafEntity implements Serializable {
description = longestLists(description, r.getDescription());
if (r.getPublisher() != null && compareTrust(this, r) < 0)
publisher = r.getPublisher();
if (r.getPublisher() != null && compareTrust(this, r) < 0) publisher = r.getPublisher();
if (r.getEmbargoenddate() != null && compareTrust(this, r) < 0)
embargoenddate = r.getEmbargoenddate();
@ -261,8 +258,7 @@ public class Result extends OafEntity implements Serializable {
contributor = mergeLists(contributor, r.getContributor());
if (r.getResourcetype() != null)
resourcetype = r.getResourcetype();
if (r.getResourcetype() != null) resourcetype = r.getResourcetype();
coverage = mergeLists(coverage, r.getCoverage());
@ -271,13 +267,21 @@ public class Result extends OafEntity implements Serializable {
externalReference = mergeLists(externalReference, r.getExternalReference());
}
private List<Field<String>> longestLists(List<Field<String>> a, List<Field<String>> b) {
if (a == null || b == null)
return a == null ? b : a;
if (a == null || b == null) return a == null ? b : a;
if (a.size() == b.size()) {
int msa = a.stream().filter(i -> i.getValue() != null).map(i -> i.getValue().length()).max(Comparator.naturalOrder()).orElse(0);
int msb = b.stream().filter(i -> i.getValue() != null).map(i -> i.getValue().length()).max(Comparator.naturalOrder()).orElse(0);
int msa =
a.stream()
.filter(i -> i.getValue() != null)
.map(i -> i.getValue().length())
.max(Comparator.naturalOrder())
.orElse(0);
int msb =
b.stream()
.filter(i -> i.getValue() != null)
.map(i -> i.getValue().length())
.max(Comparator.naturalOrder())
.orElse(0);
return msa > msb ? a : b;
}
return a.size() > b.size() ? a : b;
@ -289,31 +293,53 @@ public class Result extends OafEntity implements Serializable {
if (o == null || getClass() != o.getClass()) return false;
if (!super.equals(o)) return false;
Result result = (Result) o;
return Objects.equals(author, result.author) &&
Objects.equals(resulttype, result.resulttype) &&
Objects.equals(language, result.language) &&
Objects.equals(country, result.country) &&
Objects.equals(subject, result.subject) &&
Objects.equals(title, result.title) &&
Objects.equals(relevantdate, result.relevantdate) &&
Objects.equals(description, result.description) &&
Objects.equals(dateofacceptance, result.dateofacceptance) &&
Objects.equals(publisher, result.publisher) &&
Objects.equals(embargoenddate, result.embargoenddate) &&
Objects.equals(source, result.source) &&
Objects.equals(fulltext, result.fulltext) &&
Objects.equals(format, result.format) &&
Objects.equals(contributor, result.contributor) &&
Objects.equals(resourcetype, result.resourcetype) &&
Objects.equals(coverage, result.coverage) &&
Objects.equals(bestaccessright, result.bestaccessright) &&
Objects.equals(context, result.context) &&
Objects.equals(externalReference, result.externalReference) &&
Objects.equals(instance, result.instance);
return Objects.equals(author, result.author)
&& Objects.equals(resulttype, result.resulttype)
&& Objects.equals(language, result.language)
&& Objects.equals(country, result.country)
&& Objects.equals(subject, result.subject)
&& Objects.equals(title, result.title)
&& Objects.equals(relevantdate, result.relevantdate)
&& Objects.equals(description, result.description)
&& Objects.equals(dateofacceptance, result.dateofacceptance)
&& Objects.equals(publisher, result.publisher)
&& Objects.equals(embargoenddate, result.embargoenddate)
&& Objects.equals(source, result.source)
&& Objects.equals(fulltext, result.fulltext)
&& Objects.equals(format, result.format)
&& Objects.equals(contributor, result.contributor)
&& Objects.equals(resourcetype, result.resourcetype)
&& Objects.equals(coverage, result.coverage)
&& Objects.equals(bestaccessright, result.bestaccessright)
&& Objects.equals(context, result.context)
&& Objects.equals(externalReference, result.externalReference)
&& Objects.equals(instance, result.instance);
}
@Override
public int hashCode() {
return Objects.hash(super.hashCode(), author, resulttype, language, country, subject, title, relevantdate, description, dateofacceptance, publisher, embargoenddate, source, fulltext, format, contributor, resourcetype, coverage, bestaccessright, context, externalReference, instance);
return Objects.hash(
super.hashCode(),
author,
resulttype,
language,
country,
subject,
title,
relevantdate,
description,
dateofacceptance,
publisher,
embargoenddate,
source,
fulltext,
format,
contributor,
resourcetype,
coverage,
bestaccessright,
context,
externalReference,
instance);
}
}

View File

@ -50,7 +50,7 @@ public class Software extends Result implements Serializable {
public void mergeFrom(OafEntity e) {
super.mergeFrom(e);
if (!Software.class.isAssignableFrom(e.getClass())){
if (!Software.class.isAssignableFrom(e.getClass())) {
return;
}
@ -59,9 +59,15 @@ public class Software extends Result implements Serializable {
license = mergeLists(license, s.getLicense());
codeRepositoryUrl = s.getCodeRepositoryUrl()!= null && compareTrust(this, s)<0?s.getCodeRepositoryUrl():codeRepositoryUrl;
codeRepositoryUrl =
s.getCodeRepositoryUrl() != null && compareTrust(this, s) < 0
? s.getCodeRepositoryUrl()
: codeRepositoryUrl;
programmingLanguage= s.getProgrammingLanguage()!= null && compareTrust(this, s)<0?s.getProgrammingLanguage():programmingLanguage;
programmingLanguage =
s.getProgrammingLanguage() != null && compareTrust(this, s) < 0
? s.getProgrammingLanguage()
: programmingLanguage;
mergeOAFDataInfo(e);
}
@ -72,14 +78,19 @@ public class Software extends Result implements Serializable {
if (o == null || getClass() != o.getClass()) return false;
if (!super.equals(o)) return false;
Software software = (Software) o;
return Objects.equals(documentationUrl, software.documentationUrl) &&
Objects.equals(license, software.license) &&
Objects.equals(codeRepositoryUrl, software.codeRepositoryUrl) &&
Objects.equals(programmingLanguage, software.programmingLanguage);
return Objects.equals(documentationUrl, software.documentationUrl)
&& Objects.equals(license, software.license)
&& Objects.equals(codeRepositoryUrl, software.codeRepositoryUrl)
&& Objects.equals(programmingLanguage, software.programmingLanguage);
}
@Override
public int hashCode() {
return Objects.hash(super.hashCode(), documentationUrl, license, codeRepositoryUrl, programmingLanguage);
return Objects.hash(
super.hashCode(),
documentationUrl,
license,
codeRepositoryUrl,
programmingLanguage);
}
}

View File

@ -2,7 +2,7 @@ package eu.dnetlib.dhp.schema.oaf;
import java.io.Serializable;
public class StructuredProperty implements Serializable {
public class StructuredProperty implements Serializable {
private String value;
@ -34,8 +34,8 @@ public class StructuredProperty implements Serializable {
this.dataInfo = dataInfo;
}
public String toComparableString(){
return value != null ? value.toLowerCase() : "";
public String toComparableString() {
return value != null ? value.toLowerCase() : "";
}
@Override
@ -45,16 +45,12 @@ public class StructuredProperty implements Serializable {
@Override
public boolean equals(Object obj) {
if (this == obj)
return true;
if (obj == null)
return false;
if (getClass() != obj.getClass())
return false;
if (this == obj) return true;
if (obj == null) return false;
if (getClass() != obj.getClass()) return false;
StructuredProperty other = (StructuredProperty) obj;
return toComparableString()
.equals(other.toComparableString());
return toComparableString().equals(other.toComparableString());
}
}

View File

@ -2,12 +2,11 @@ package eu.dnetlib.dhp.schema.scholexplorer;
import eu.dnetlib.dhp.schema.oaf.Dataset;
import eu.dnetlib.dhp.schema.oaf.OafEntity;
import org.apache.commons.lang3.StringUtils;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.commons.lang3.StringUtils;
public class DLIDataset extends Dataset {
@ -47,33 +46,45 @@ public class DLIDataset extends Dataset {
DLIDataset p = (DLIDataset) e;
if (StringUtils.isBlank(completionStatus) && StringUtils.isNotBlank(p.completionStatus))
completionStatus = p.completionStatus;
if ("complete".equalsIgnoreCase(p.completionStatus))
completionStatus = "complete";
if ("complete".equalsIgnoreCase(p.completionStatus)) completionStatus = "complete";
dlicollectedfrom = mergeProvenance(dlicollectedfrom, p.getDlicollectedfrom());
}
private List<ProvenaceInfo> mergeProvenance(final List<ProvenaceInfo> a, final List<ProvenaceInfo> b) {
private List<ProvenaceInfo> mergeProvenance(
final List<ProvenaceInfo> a, final List<ProvenaceInfo> b) {
Map<String, ProvenaceInfo> result = new HashMap<>();
if (a != null)
a.forEach(p -> {
if (p != null && StringUtils.isNotBlank(p.getId()) && result.containsKey(p.getId())) {
if ("incomplete".equalsIgnoreCase(result.get(p.getId()).getCompletionStatus()) && StringUtils.isNotBlank(p.getCompletionStatus())) {
result.put(p.getId(), p);
}
a.forEach(
p -> {
if (p != null
&& StringUtils.isNotBlank(p.getId())
&& result.containsKey(p.getId())) {
if ("incomplete"
.equalsIgnoreCase(
result.get(p.getId()).getCompletionStatus())
&& StringUtils.isNotBlank(p.getCompletionStatus())) {
result.put(p.getId(), p);
}
} else if (p != null && p.getId() != null && !result.containsKey(p.getId()))
result.put(p.getId(), p);
});
} else if (p != null && p.getId() != null && !result.containsKey(p.getId()))
result.put(p.getId(), p);
});
if (b != null)
b.forEach(p -> {
if (p != null && StringUtils.isNotBlank(p.getId()) && result.containsKey(p.getId())) {
if ("incomplete".equalsIgnoreCase(result.get(p.getId()).getCompletionStatus()) && StringUtils.isNotBlank(p.getCompletionStatus())) {
result.put(p.getId(), p);
}
b.forEach(
p -> {
if (p != null
&& StringUtils.isNotBlank(p.getId())
&& result.containsKey(p.getId())) {
if ("incomplete"
.equalsIgnoreCase(
result.get(p.getId()).getCompletionStatus())
&& StringUtils.isNotBlank(p.getCompletionStatus())) {
result.put(p.getId(), p);
}
} else if (p != null && p.getId() != null && !result.containsKey(p.getId()))
result.put(p.getId(), p);
});
} else if (p != null && p.getId() != null && !result.containsKey(p.getId()))
result.put(p.getId(), p);
});
return new ArrayList<>(result.values());
}

View File

@ -2,9 +2,9 @@ package eu.dnetlib.dhp.schema.scholexplorer;
import eu.dnetlib.dhp.schema.oaf.OafEntity;
import eu.dnetlib.dhp.schema.oaf.Publication;
import org.apache.commons.lang3.StringUtils;
import java.io.Serializable;
import java.util.*;
import org.apache.commons.lang3.StringUtils;
public class DLIPublication extends Publication implements Serializable {
@ -44,33 +44,45 @@ public class DLIPublication extends Publication implements Serializable {
DLIPublication p = (DLIPublication) e;
if (StringUtils.isBlank(completionStatus) && StringUtils.isNotBlank(p.completionStatus))
completionStatus = p.completionStatus;
if ("complete".equalsIgnoreCase(p.completionStatus))
completionStatus = "complete";
if ("complete".equalsIgnoreCase(p.completionStatus)) completionStatus = "complete";
dlicollectedfrom = mergeProvenance(dlicollectedfrom, p.getDlicollectedfrom());
}
private List<ProvenaceInfo> mergeProvenance(final List<ProvenaceInfo> a, final List<ProvenaceInfo> b) {
private List<ProvenaceInfo> mergeProvenance(
final List<ProvenaceInfo> a, final List<ProvenaceInfo> b) {
Map<String, ProvenaceInfo> result = new HashMap<>();
if (a != null)
a.forEach(p -> {
if (p != null && StringUtils.isNotBlank(p.getId()) && result.containsKey(p.getId())) {
if ("incomplete".equalsIgnoreCase(result.get(p.getId()).getCompletionStatus()) && StringUtils.isNotBlank(p.getCompletionStatus())) {
result.put(p.getId(), p);
}
a.forEach(
p -> {
if (p != null
&& StringUtils.isNotBlank(p.getId())
&& result.containsKey(p.getId())) {
if ("incomplete"
.equalsIgnoreCase(
result.get(p.getId()).getCompletionStatus())
&& StringUtils.isNotBlank(p.getCompletionStatus())) {
result.put(p.getId(), p);
}
} else if (p != null && p.getId() != null && !result.containsKey(p.getId()))
result.put(p.getId(), p);
});
} else if (p != null && p.getId() != null && !result.containsKey(p.getId()))
result.put(p.getId(), p);
});
if (b != null)
b.forEach(p -> {
if (p != null && StringUtils.isNotBlank(p.getId()) && result.containsKey(p.getId())) {
if ("incomplete".equalsIgnoreCase(result.get(p.getId()).getCompletionStatus()) && StringUtils.isNotBlank(p.getCompletionStatus())) {
result.put(p.getId(), p);
}
b.forEach(
p -> {
if (p != null
&& StringUtils.isNotBlank(p.getId())
&& result.containsKey(p.getId())) {
if ("incomplete"
.equalsIgnoreCase(
result.get(p.getId()).getCompletionStatus())
&& StringUtils.isNotBlank(p.getCompletionStatus())) {
result.put(p.getId(), p);
}
} else if (p != null && p.getId() != null && !result.containsKey(p.getId()))
result.put(p.getId(), p);
});
} else if (p != null && p.getId() != null && !result.containsKey(p.getId()))
result.put(p.getId(), p);
});
return new ArrayList<>(result.values());
}

View File

@ -1,15 +1,13 @@
package eu.dnetlib.dhp.schema.scholexplorer;
import eu.dnetlib.dhp.schema.oaf.Oaf;
import eu.dnetlib.dhp.schema.oaf.OafEntity;
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
import org.apache.commons.lang3.StringUtils;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.commons.lang3.StringUtils;
public class DLIUnknown extends Oaf implements Serializable {
@ -49,7 +47,6 @@ public class DLIUnknown extends Oaf implements Serializable {
this.id = id;
}
public List<StructuredProperty> getPid() {
return pid;
}
@ -75,33 +72,45 @@ public class DLIUnknown extends Oaf implements Serializable {
}
public void mergeFrom(DLIUnknown p) {
if ("complete".equalsIgnoreCase(p.completionStatus))
completionStatus = "complete";
if ("complete".equalsIgnoreCase(p.completionStatus)) completionStatus = "complete";
dlicollectedfrom = mergeProvenance(dlicollectedfrom, p.getDlicollectedfrom());
}
private List<ProvenaceInfo> mergeProvenance(final List<ProvenaceInfo> a, final List<ProvenaceInfo> b) {
private List<ProvenaceInfo> mergeProvenance(
final List<ProvenaceInfo> a, final List<ProvenaceInfo> b) {
Map<String, ProvenaceInfo> result = new HashMap<>();
if (a != null)
a.forEach(p -> {
if (p != null && StringUtils.isNotBlank(p.getId()) && result.containsKey(p.getId())) {
if ("incomplete".equalsIgnoreCase(result.get(p.getId()).getCompletionStatus()) && StringUtils.isNotBlank(p.getCompletionStatus())) {
result.put(p.getId(), p);
}
a.forEach(
p -> {
if (p != null
&& StringUtils.isNotBlank(p.getId())
&& result.containsKey(p.getId())) {
if ("incomplete"
.equalsIgnoreCase(
result.get(p.getId()).getCompletionStatus())
&& StringUtils.isNotBlank(p.getCompletionStatus())) {
result.put(p.getId(), p);
}
} else if (p != null && p.getId() != null && !result.containsKey(p.getId()))
result.put(p.getId(), p);
});
} else if (p != null && p.getId() != null && !result.containsKey(p.getId()))
result.put(p.getId(), p);
});
if (b != null)
b.forEach(p -> {
if (p != null && StringUtils.isNotBlank(p.getId()) && result.containsKey(p.getId())) {
if ("incomplete".equalsIgnoreCase(result.get(p.getId()).getCompletionStatus()) && StringUtils.isNotBlank(p.getCompletionStatus())) {
result.put(p.getId(), p);
}
b.forEach(
p -> {
if (p != null
&& StringUtils.isNotBlank(p.getId())
&& result.containsKey(p.getId())) {
if ("incomplete"
.equalsIgnoreCase(
result.get(p.getId()).getCompletionStatus())
&& StringUtils.isNotBlank(p.getCompletionStatus())) {
result.put(p.getId(), p);
}
} else if (p != null && p.getId() != null && !result.containsKey(p.getId()))
result.put(p.getId(), p);
});
} else if (p != null && p.getId() != null && !result.containsKey(p.getId()))
result.put(p.getId(), p);
});
return new ArrayList<>(result.values());
}

View File

@ -10,7 +10,7 @@ public class ProvenaceInfo implements Serializable {
private String completionStatus;
private String collectionMode ="collected";
private String collectionMode = "collected";
public String getId() {
return id;

View File

@ -1,18 +1,14 @@
package eu.dnetlib.dhp.schema.action;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.schema.oaf.Relation;
import org.apache.commons.lang3.StringUtils;
import org.junit.jupiter.api.Test;
import java.io.IOException;
import static org.junit.jupiter.api.Assertions.*;
/**
* @author claudio.atzori
*/
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.schema.oaf.Relation;
import java.io.IOException;
import org.apache.commons.lang3.StringUtils;
import org.junit.jupiter.api.Test;
/** @author claudio.atzori */
public class AtomicActionTest {
@Test
@ -36,7 +32,5 @@ public class AtomicActionTest {
assertEquals(aa1.getClazz(), aa2.getClazz());
assertEquals(aa1.getPayload(), aa2.getPayload());
}
}

View File

@ -1,14 +1,14 @@
package eu.dnetlib.dhp.schema.common;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertTrue;
import eu.dnetlib.dhp.schema.oaf.OafEntity;
import eu.dnetlib.dhp.schema.oaf.Relation;
import eu.dnetlib.dhp.schema.oaf.Result;
import org.junit.jupiter.api.Nested;
import org.junit.jupiter.api.Test;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertTrue;
public class ModelSupportTest {
@Nested

View File

@ -1,11 +1,11 @@
package eu.dnetlib.dhp.schema.oaf;
import static org.junit.jupiter.api.Assertions.*;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import java.util.Arrays;
import java.util.List;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
public class MergeTest {
@ -19,7 +19,7 @@ public class MergeTest {
@Test
public void mergeListsTest() {
//string list merge test
// string list merge test
List<String> a = Arrays.asList("a", "b", "c", "e");
List<String> b = Arrays.asList("a", "b", "c", "d");
List<String> c = null;
@ -44,7 +44,6 @@ public class MergeTest {
assertNotNull(a.getCollectedfrom());
assertEquals(3, a.getCollectedfrom().size());
}
@Test
@ -60,7 +59,6 @@ public class MergeTest {
assertNotNull(a.getSubject());
assertEquals(3, a.getSubject().size());
}
private KeyValue setKV(final String key, final String value) {
@ -73,7 +71,8 @@ public class MergeTest {
return k;
}
private StructuredProperty setSP(final String value, final String schema, final String classname) {
private StructuredProperty setSP(
final String value, final String schema, final String classname) {
StructuredProperty s = new StructuredProperty();
s.setValue(value);
Qualifier q = new Qualifier();

View File

@ -6,48 +6,44 @@ import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.SerializationFeature;
import eu.dnetlib.dhp.schema.oaf.Qualifier;
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
import org.junit.jupiter.api.Test;
import java.io.IOException;
import java.util.Arrays;
import java.util.Collections;
import org.junit.jupiter.api.Test;
public class DLItest {
@Test
public void testMergePublication() throws JsonProcessingException {
DLIPublication a1 = new DLIPublication();
a1.setPid(Arrays.asList( createSP("123456","pdb","dnet:pid_types")));
a1.setPid(Arrays.asList(createSP("123456", "pdb", "dnet:pid_types")));
a1.setTitle(Collections.singletonList(createSP("Un Titolo", "title", "dnetTitle")));
a1.setDlicollectedfrom(Arrays.asList(createCollectedFrom("znd","Zenodo","complete")));
a1.setDlicollectedfrom(Arrays.asList(createCollectedFrom("znd", "Zenodo", "complete")));
a1.setCompletionStatus("complete");
DLIPublication a = new DLIPublication();
a.setPid(Arrays.asList(createSP("10.11","doi","dnet:pid_types"), createSP("123456","pdb","dnet:pid_types")));
a.setPid(
Arrays.asList(
createSP("10.11", "doi", "dnet:pid_types"),
createSP("123456", "pdb", "dnet:pid_types")));
a.setTitle(Collections.singletonList(createSP("A Title", "title", "dnetTitle")));
a.setDlicollectedfrom(Arrays.asList(createCollectedFrom("dct","datacite","complete"),createCollectedFrom("dct","datacite","incomplete")));
a.setDlicollectedfrom(
Arrays.asList(
createCollectedFrom("dct", "datacite", "complete"),
createCollectedFrom("dct", "datacite", "incomplete")));
a.setCompletionStatus("incomplete");
a.mergeFrom(a1);
ObjectMapper mapper = new ObjectMapper();
System.out.println(mapper.writeValueAsString(a));
}
@Test
public void testDeserialization() throws IOException {
final String json ="{\"dataInfo\":{\"invisible\":false,\"inferred\":null,\"deletedbyinference\":false,\"trust\":\"0.9\",\"inferenceprovenance\":null,\"provenanceaction\":null},\"lastupdatetimestamp\":null,\"id\":\"60|bd9352547098929a394655ad1a44a479\",\"originalId\":[\"bd9352547098929a394655ad1a44a479\"],\"collectedfrom\":[{\"key\":\"dli_________::datacite\",\"value\":\"Datasets in Datacite\",\"dataInfo\":null,\"blank\":false}],\"pid\":[{\"value\":\"10.7925/DRS1.DUCHAS_5078760\",\"qualifier\":{\"classid\":\"doi\",\"classname\":\"doi\",\"schemeid\":\"dnet:pid_types\",\"schemename\":\"dnet:pid_types\",\"blank\":false},\"dataInfo\":null}],\"dateofcollection\":\"2020-01-09T08:29:31.885Z\",\"dateoftransformation\":null,\"extraInfo\":null,\"oaiprovenance\":null,\"author\":[{\"fullname\":\"Cathail, S. Ó\",\"name\":null,\"surname\":null,\"rank\":null,\"pid\":null,\"affiliation\":null},{\"fullname\":\"Donnell, Breda Mc\",\"name\":null,\"surname\":null,\"rank\":null,\"pid\":null,\"affiliation\":null},{\"fullname\":\"Ireland. Department of Arts, Culture, and the Gaeltacht\",\"name\":null,\"surname\":null,\"rank\":null,\"pid\":null,\"affiliation\":null},{\"fullname\":\"University College Dublin\",\"name\":null,\"surname\":null,\"rank\":null,\"pid\":null,\"affiliation\":null},{\"fullname\":\"National Folklore Foundation\",\"name\":null,\"surname\":null,\"rank\":null,\"pid\":null,\"affiliation\":null},{\"fullname\":\"Cathail, S. Ó\",\"name\":null,\"surname\":null,\"rank\":null,\"pid\":null,\"affiliation\":null},{\"fullname\":\"Donnell, Breda Mc\",\"name\":null,\"surname\":null,\"rank\":null,\"pid\":null,\"affiliation\":null}],\"resulttype\":null,\"language\":null,\"country\":null,\"subject\":[{\"value\":\"Recreation\",\"qualifier\":{\"classid\":\"dnet:subject\",\"classname\":\"dnet:subject\",\"schemeid\":\"unknown\",\"schemename\":\"unknown\",\"blank\":false},\"dataInfo\":null},{\"value\":\"Entertainments and recreational activities\",\"qualifier\":{\"classid\":\"dnet:subject\",\"classname\":\"dnet:subject\",\"schemeid\":\"unknown\",\"schemename\":\"unknown\",\"blank\":false},\"dataInfo\":null},{\"value\":\"Siamsaíocht agus caitheamh aimsire\",\"qualifier\":{\"classid\":\"dnet:subject\",\"classname\":\"dnet:subject\",\"schemeid\":\"unknown\",\"schemename\":\"unknown\",\"blank\":false},\"dataInfo\":null}],\"title\":[{\"value\":\"Games We Play\",\"qualifier\":null,\"dataInfo\":null}],\"relevantdate\":[{\"value\":\"1938-09-28\",\"qualifier\":{\"classid\":\"date\",\"classname\":\"date\",\"schemeid\":\"dnet::date\",\"schemename\":\"dnet::date\",\"blank\":false},\"dataInfo\":null}],\"description\":[{\"value\":\"Story collected by Breda Mc Donnell, a student at Tenure school (Tinure, Co. Louth) (no informant identified).\",\"dataInfo\":null}],\"dateofacceptance\":null,\"publisher\":{\"value\":\"University College Dublin\",\"dataInfo\":null},\"embargoenddate\":null,\"source\":null,\"fulltext\":null,\"format\":null,\"contributor\":null,\"resourcetype\":null,\"coverage\":null,\"refereed\":null,\"context\":null,\"processingchargeamount\":null,\"processingchargecurrency\":null,\"externalReference\":null,\"instance\":[],\"storagedate\":null,\"device\":null,\"size\":null,\"version\":null,\"lastmetadataupdate\":null,\"metadataversionnumber\":null,\"geolocation\":null,\"dlicollectedfrom\":[{\"id\":\"dli_________::datacite\",\"name\":\"Datasets in Datacite\",\"completionStatus\":\"complete\",\"collectionMode\":\"resolved\"}],\"completionStatus\":\"complete\"}";
final String json =
"{\"dataInfo\":{\"invisible\":false,\"inferred\":null,\"deletedbyinference\":false,\"trust\":\"0.9\",\"inferenceprovenance\":null,\"provenanceaction\":null},\"lastupdatetimestamp\":null,\"id\":\"60|bd9352547098929a394655ad1a44a479\",\"originalId\":[\"bd9352547098929a394655ad1a44a479\"],\"collectedfrom\":[{\"key\":\"dli_________::datacite\",\"value\":\"Datasets in Datacite\",\"dataInfo\":null,\"blank\":false}],\"pid\":[{\"value\":\"10.7925/DRS1.DUCHAS_5078760\",\"qualifier\":{\"classid\":\"doi\",\"classname\":\"doi\",\"schemeid\":\"dnet:pid_types\",\"schemename\":\"dnet:pid_types\",\"blank\":false},\"dataInfo\":null}],\"dateofcollection\":\"2020-01-09T08:29:31.885Z\",\"dateoftransformation\":null,\"extraInfo\":null,\"oaiprovenance\":null,\"author\":[{\"fullname\":\"Cathail, S. Ó\",\"name\":null,\"surname\":null,\"rank\":null,\"pid\":null,\"affiliation\":null},{\"fullname\":\"Donnell, Breda Mc\",\"name\":null,\"surname\":null,\"rank\":null,\"pid\":null,\"affiliation\":null},{\"fullname\":\"Ireland. Department of Arts, Culture, and the Gaeltacht\",\"name\":null,\"surname\":null,\"rank\":null,\"pid\":null,\"affiliation\":null},{\"fullname\":\"University College Dublin\",\"name\":null,\"surname\":null,\"rank\":null,\"pid\":null,\"affiliation\":null},{\"fullname\":\"National Folklore Foundation\",\"name\":null,\"surname\":null,\"rank\":null,\"pid\":null,\"affiliation\":null},{\"fullname\":\"Cathail, S. Ó\",\"name\":null,\"surname\":null,\"rank\":null,\"pid\":null,\"affiliation\":null},{\"fullname\":\"Donnell, Breda Mc\",\"name\":null,\"surname\":null,\"rank\":null,\"pid\":null,\"affiliation\":null}],\"resulttype\":null,\"language\":null,\"country\":null,\"subject\":[{\"value\":\"Recreation\",\"qualifier\":{\"classid\":\"dnet:subject\",\"classname\":\"dnet:subject\",\"schemeid\":\"unknown\",\"schemename\":\"unknown\",\"blank\":false},\"dataInfo\":null},{\"value\":\"Entertainments and recreational activities\",\"qualifier\":{\"classid\":\"dnet:subject\",\"classname\":\"dnet:subject\",\"schemeid\":\"unknown\",\"schemename\":\"unknown\",\"blank\":false},\"dataInfo\":null},{\"value\":\"Siamsaíocht agus caitheamh aimsire\",\"qualifier\":{\"classid\":\"dnet:subject\",\"classname\":\"dnet:subject\",\"schemeid\":\"unknown\",\"schemename\":\"unknown\",\"blank\":false},\"dataInfo\":null}],\"title\":[{\"value\":\"Games We Play\",\"qualifier\":null,\"dataInfo\":null}],\"relevantdate\":[{\"value\":\"1938-09-28\",\"qualifier\":{\"classid\":\"date\",\"classname\":\"date\",\"schemeid\":\"dnet::date\",\"schemename\":\"dnet::date\",\"blank\":false},\"dataInfo\":null}],\"description\":[{\"value\":\"Story collected by Breda Mc Donnell, a student at Tenure school (Tinure, Co. Louth) (no informant identified).\",\"dataInfo\":null}],\"dateofacceptance\":null,\"publisher\":{\"value\":\"University College Dublin\",\"dataInfo\":null},\"embargoenddate\":null,\"source\":null,\"fulltext\":null,\"format\":null,\"contributor\":null,\"resourcetype\":null,\"coverage\":null,\"refereed\":null,\"context\":null,\"processingchargeamount\":null,\"processingchargecurrency\":null,\"externalReference\":null,\"instance\":[],\"storagedate\":null,\"device\":null,\"size\":null,\"version\":null,\"lastmetadataupdate\":null,\"metadataversionnumber\":null,\"geolocation\":null,\"dlicollectedfrom\":[{\"id\":\"dli_________::datacite\",\"name\":\"Datasets in Datacite\",\"completionStatus\":\"complete\",\"collectionMode\":\"resolved\"}],\"completionStatus\":\"complete\"}";
ObjectMapper mapper = new ObjectMapper();
mapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false);
@ -56,7 +52,8 @@ public class DLItest {
System.out.println(mapper.writeValueAsString(dliDataset));
}
private ProvenaceInfo createCollectedFrom(final String id, final String name, final String completionStatus) {
private ProvenaceInfo createCollectedFrom(
final String id, final String name, final String completionStatus) {
ProvenaceInfo p = new ProvenaceInfo();
p.setId(id);
p.setName(name);
@ -64,8 +61,8 @@ public class DLItest {
return p;
}
private StructuredProperty createSP(final String value, final String className, final String schemeName) {
private StructuredProperty createSP(
final String value, final String className, final String schemeName) {
StructuredProperty p = new StructuredProperty();
p.setValue(value);
Qualifier schema = new Qualifier();
@ -76,6 +73,4 @@ public class DLItest {
p.setQualifier(schema);
return p;
}
}

View File

@ -10,117 +10,130 @@ import eu.dnetlib.dhp.actionmanager.partition.PartitionActionSetsByPayloadTypeJo
import eu.dnetlib.dhp.utils.ISLookupClientFactory;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
import org.dom4j.Document;
import org.dom4j.Element;
import org.dom4j.io.SAXReader;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.Serializable;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.List;
import java.util.NoSuchElementException;
import java.util.stream.Collectors;
import org.dom4j.Document;
import org.dom4j.Element;
import org.dom4j.io.SAXReader;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class ISClient implements Serializable {
private static final Logger log = LoggerFactory.getLogger(PartitionActionSetsByPayloadTypeJob.class);
private static final Logger log =
LoggerFactory.getLogger(PartitionActionSetsByPayloadTypeJob.class);
private static final String INPUT_ACTION_SET_ID_SEPARATOR = ",";
private static final String INPUT_ACTION_SET_ID_SEPARATOR = ",";
private ISLookUpService isLookup;
private ISLookUpService isLookup;
public ISClient(String isLookupUrl) {
isLookup = ISLookupClientFactory.getLookUpService(isLookupUrl);
}
public ISClient(String isLookupUrl) {
isLookup = ISLookupClientFactory.getLookUpService(isLookupUrl);
}
public List<String> getLatestRawsetPaths(String setIds) {
public List<String> getLatestRawsetPaths(String setIds) {
List<String> ids = Lists.newArrayList(Splitter.on(INPUT_ACTION_SET_ID_SEPARATOR)
.omitEmptyStrings()
.trimResults()
.split(setIds));
List<String> ids =
Lists.newArrayList(
Splitter.on(INPUT_ACTION_SET_ID_SEPARATOR)
.omitEmptyStrings()
.trimResults()
.split(setIds));
return ids.stream()
.map(id -> getSet(isLookup, id))
.map(as -> as.getPathToLatest())
.collect(Collectors.toCollection(ArrayList::new));
}
return ids.stream()
.map(id -> getSet(isLookup, id))
.map(as -> as.getPathToLatest())
.collect(Collectors.toCollection(ArrayList::new));
}
private ActionManagerSet getSet(ISLookUpService isLookup, final String setId) {
private ActionManagerSet getSet(ISLookUpService isLookup, final String setId) {
final String q = "for $x in collection('/db/DRIVER/ActionManagerSetDSResources/ActionManagerSetDSResourceType') "
+ "where $x//SET/@id = '" + setId + "' return $x";
final String q =
"for $x in collection('/db/DRIVER/ActionManagerSetDSResources/ActionManagerSetDSResourceType') "
+ "where $x//SET/@id = '"
+ setId
+ "' return $x";
try {
final String basePath = getBasePathHDFS(isLookup);
final String setProfile = isLookup.getResourceProfileByQuery(q);
return getActionManagerSet(basePath, setProfile);
} catch (ISLookUpException | ActionManagerException e) {
throw new RuntimeException("Error accessing Sets, using query: " + q);
}
}
try {
final String basePath = getBasePathHDFS(isLookup);
final String setProfile = isLookup.getResourceProfileByQuery(q);
return getActionManagerSet(basePath, setProfile);
} catch (ISLookUpException | ActionManagerException e) {
throw new RuntimeException("Error accessing Sets, using query: " + q);
}
}
private ActionManagerSet getActionManagerSet(final String basePath, final String profile) throws ActionManagerException {
final SAXReader reader = new SAXReader();
final ActionManagerSet set = new ActionManagerSet();
private ActionManagerSet getActionManagerSet(final String basePath, final String profile)
throws ActionManagerException {
final SAXReader reader = new SAXReader();
final ActionManagerSet set = new ActionManagerSet();
try {
final Document doc = reader.read(new StringReader(profile));
try {
final Document doc = reader.read(new StringReader(profile));
set.setId(doc.valueOf("//SET/@id").trim());
set.setName(doc.valueOf("//SET").trim());
set.setImpact(ImpactTypes.valueOf(doc.valueOf("//IMPACT").trim()));
set.setLatest(doc.valueOf("//RAW_SETS/LATEST/@id"), doc.valueOf("//RAW_SETS/LATEST/@creationDate"), doc.valueOf("//RAW_SETS/LATEST/@lastUpdate"));
set.setDirectory(doc.valueOf("//SET/@directory"));
final List expiredNodes = doc.selectNodes("//RAW_SETS/EXPIRED");
if (expiredNodes != null) {
for (int i = 0; i < expiredNodes.size(); i++) {
Element ex = (Element) expiredNodes.get(i);
set.addExpired(ex.attributeValue("id"), ex.attributeValue("creationDate"), ex.attributeValue("lastUpdate"));
}
}
set.setId(doc.valueOf("//SET/@id").trim());
set.setName(doc.valueOf("//SET").trim());
set.setImpact(ImpactTypes.valueOf(doc.valueOf("//IMPACT").trim()));
set.setLatest(
doc.valueOf("//RAW_SETS/LATEST/@id"),
doc.valueOf("//RAW_SETS/LATEST/@creationDate"),
doc.valueOf("//RAW_SETS/LATEST/@lastUpdate"));
set.setDirectory(doc.valueOf("//SET/@directory"));
final List expiredNodes = doc.selectNodes("//RAW_SETS/EXPIRED");
if (expiredNodes != null) {
for (int i = 0; i < expiredNodes.size(); i++) {
Element ex = (Element) expiredNodes.get(i);
set.addExpired(
ex.attributeValue("id"),
ex.attributeValue("creationDate"),
ex.attributeValue("lastUpdate"));
}
}
final StringBuilder sb = new StringBuilder();
sb.append(basePath);
sb.append("/");
sb.append(doc.valueOf("//SET/@directory"));
sb.append("/");
sb.append(doc.valueOf("//RAW_SETS/LATEST/@id"));
set.setPathToLatest(sb.toString());
final StringBuilder sb = new StringBuilder();
sb.append(basePath);
sb.append("/");
sb.append(doc.valueOf("//SET/@directory"));
sb.append("/");
sb.append(doc.valueOf("//RAW_SETS/LATEST/@id"));
set.setPathToLatest(sb.toString());
return set;
} catch (Exception e) {
throw new ActionManagerException("Error creating set from profile: " + profile, e);
}
}
private String getBasePathHDFS(ISLookUpService isLookup) throws ActionManagerException {
return queryServiceProperty(isLookup, "basePath");
}
private String queryServiceProperty(ISLookUpService isLookup, final String propertyName) throws ActionManagerException {
final String q = "for $x in /RESOURCE_PROFILE[.//RESOURCE_TYPE/@value='ActionManagerServiceResourceType'] return $x//SERVICE_PROPERTIES/PROPERTY[./@ key='"
+ propertyName + "']/@value/string()";
log.debug("quering for service property: " + q);
try {
final List<String> value = isLookup.quickSearchProfile(q);
return Iterables.getOnlyElement(value);
} catch (ISLookUpException e) {
String msg = "Error accessing service profile, using query: " + q;
log.error(msg, e);
throw new ActionManagerException(msg, e);
} catch (NoSuchElementException e) {
String msg = "missing service property: " + propertyName;
log.error(msg, e);
throw new ActionManagerException(msg, e);
} catch (IllegalArgumentException e) {
String msg = "found more than one service property: " + propertyName;
log.error(msg, e);
throw new ActionManagerException(msg, e);
}
}
return set;
} catch (Exception e) {
throw new ActionManagerException("Error creating set from profile: " + profile, e);
}
}
private String getBasePathHDFS(ISLookUpService isLookup) throws ActionManagerException {
return queryServiceProperty(isLookup, "basePath");
}
private String queryServiceProperty(ISLookUpService isLookup, final String propertyName)
throws ActionManagerException {
final String q =
"for $x in /RESOURCE_PROFILE[.//RESOURCE_TYPE/@value='ActionManagerServiceResourceType'] return $x//SERVICE_PROPERTIES/PROPERTY[./@ key='"
+ propertyName
+ "']/@value/string()";
log.debug("quering for service property: " + q);
try {
final List<String> value = isLookup.quickSearchProfile(q);
return Iterables.getOnlyElement(value);
} catch (ISLookUpException e) {
String msg = "Error accessing service profile, using query: " + q;
log.error(msg, e);
throw new ActionManagerException(msg, e);
} catch (NoSuchElementException e) {
String msg = "missing service property: " + propertyName;
log.error(msg, e);
throw new ActionManagerException(msg, e);
} catch (IllegalArgumentException e) {
String msg = "found more than one service property: " + propertyName;
log.error(msg, e);
throw new ActionManagerException(msg, e);
}
}
}

View File

@ -1,9 +1,15 @@
package eu.dnetlib.dhp.actionmanager.partition;
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
import static org.apache.spark.sql.functions.*;
import eu.dnetlib.dhp.actionmanager.ISClient;
import eu.dnetlib.dhp.common.HdfsSupport;
import eu.dnetlib.dhp.actionmanager.promote.PromoteActionPayloadForGraphTableJob;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.common.HdfsSupport;
import java.util.Arrays;
import java.util.List;
import java.util.Optional;
import org.apache.commons.io.IOUtils;
import org.apache.hadoop.io.Text;
import org.apache.spark.SparkConf;
@ -14,32 +20,27 @@ import org.apache.spark.sql.types.*;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.Optional;
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
import static org.apache.spark.sql.functions.*;
/**
* Partitions given set of action sets by payload type.
*/
/** Partitions given set of action sets by payload type. */
public class PartitionActionSetsByPayloadTypeJob {
private static final Logger logger = LoggerFactory.getLogger(PartitionActionSetsByPayloadTypeJob.class);
private static final Logger logger =
LoggerFactory.getLogger(PartitionActionSetsByPayloadTypeJob.class);
private static final StructType KV_SCHEMA = StructType$.MODULE$.apply(
Arrays.asList(
StructField$.MODULE$.apply("key", DataTypes.StringType, false, Metadata.empty()),
StructField$.MODULE$.apply("value", DataTypes.StringType, false, Metadata.empty())
));
private static final StructType KV_SCHEMA =
StructType$.MODULE$.apply(
Arrays.asList(
StructField$.MODULE$.apply(
"key", DataTypes.StringType, false, Metadata.empty()),
StructField$.MODULE$.apply(
"value", DataTypes.StringType, false, Metadata.empty())));
private static final StructType ATOMIC_ACTION_SCHEMA = StructType$.MODULE$.apply(
Arrays.asList(
StructField$.MODULE$.apply("clazz", DataTypes.StringType, false, Metadata.empty()),
StructField$.MODULE$.apply("payload", DataTypes.StringType, false, Metadata.empty())
));
private static final StructType ATOMIC_ACTION_SCHEMA =
StructType$.MODULE$.apply(
Arrays.asList(
StructField$.MODULE$.apply(
"clazz", DataTypes.StringType, false, Metadata.empty()),
StructField$.MODULE$.apply(
"payload", DataTypes.StringType, false, Metadata.empty())));
private ISClient isClient;
@ -47,20 +48,20 @@ public class PartitionActionSetsByPayloadTypeJob {
this.isClient = new ISClient(isLookupUrl);
}
public PartitionActionSetsByPayloadTypeJob() {
}
public PartitionActionSetsByPayloadTypeJob() {}
public static void main(String[] args) throws Exception {
String jsonConfiguration = IOUtils.toString(
PromoteActionPayloadForGraphTableJob.class
.getResourceAsStream("/eu/dnetlib/dhp/actionmanager/partition/partition_action_sets_by_payload_type_input_parameters.json"));
String jsonConfiguration =
IOUtils.toString(
PromoteActionPayloadForGraphTableJob.class.getResourceAsStream(
"/eu/dnetlib/dhp/actionmanager/partition/partition_action_sets_by_payload_type_input_parameters.json"));
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
parser.parseArgument(args);
Boolean isSparkSessionManaged = Optional
.ofNullable(parser.get("isSparkSessionManaged"))
.map(Boolean::valueOf)
.orElse(Boolean.TRUE);
Boolean isSparkSessionManaged =
Optional.ofNullable(parser.get("isSparkSessionManaged"))
.map(Boolean::valueOf)
.orElse(Boolean.TRUE);
logger.info("isSparkSessionManaged: {}", isSparkSessionManaged);
String inputActionSetIds = parser.get("inputActionSetIds");
@ -72,7 +73,8 @@ public class PartitionActionSetsByPayloadTypeJob {
String isLookupUrl = parser.get("isLookupUrl");
logger.info("isLookupUrl: {}", isLookupUrl);
new PartitionActionSetsByPayloadTypeJob(isLookupUrl).run(isSparkSessionManaged, inputActionSetIds, outputPath);
new PartitionActionSetsByPayloadTypeJob(isLookupUrl)
.run(isSparkSessionManaged, inputActionSetIds, outputPath);
}
protected void run(Boolean isSparkSessionManaged, String inputActionSetIds, String outputPath) {
@ -83,51 +85,51 @@ public class PartitionActionSetsByPayloadTypeJob {
SparkConf conf = new SparkConf();
conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer");
runWithSparkSession(conf, isSparkSessionManaged,
runWithSparkSession(
conf,
isSparkSessionManaged,
spark -> {
removeOutputDir(spark, outputPath);
readAndWriteActionSetsFromPaths(spark, inputActionSetPaths, outputPath);
});
}
private static void removeOutputDir(SparkSession spark,
String path) {
private static void removeOutputDir(SparkSession spark, String path) {
HdfsSupport.remove(path, spark.sparkContext().hadoopConfiguration());
}
private static void readAndWriteActionSetsFromPaths(SparkSession spark,
List<String> inputActionSetPaths,
String outputPath) {
inputActionSetPaths
.forEach(inputActionSetPath -> {
Dataset<Row> actionDS = readActionSetFromPath(spark, inputActionSetPath);
saveActions(actionDS, outputPath);
});
private static void readAndWriteActionSetsFromPaths(
SparkSession spark, List<String> inputActionSetPaths, String outputPath) {
inputActionSetPaths.stream()
.filter(
path ->
HdfsSupport.exists(
path, spark.sparkContext().hadoopConfiguration()))
.forEach(
inputActionSetPath -> {
Dataset<Row> actionDS =
readActionSetFromPath(spark, inputActionSetPath);
saveActions(actionDS, outputPath);
});
}
private static Dataset<Row> readActionSetFromPath(SparkSession spark,
String path) {
private static Dataset<Row> readActionSetFromPath(SparkSession spark, String path) {
logger.info("Reading actions from path: {}", path);
JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
JavaRDD<Row> rdd = sc
.sequenceFile(path, Text.class, Text.class)
.map(x -> RowFactory.create(x._1().toString(), x._2().toString()));
JavaRDD<Row> rdd =
sc.sequenceFile(path, Text.class, Text.class)
.map(x -> RowFactory.create(x._1().toString(), x._2().toString()));
return spark.createDataFrame(rdd, KV_SCHEMA)
.withColumn("atomic_action", from_json(col("value"), ATOMIC_ACTION_SCHEMA))
.select(expr("atomic_action.*"));
}
private static void saveActions(Dataset<Row> actionDS,
String path) {
private static void saveActions(Dataset<Row> actionDS, String path) {
logger.info("Saving actions to path: {}", path);
actionDS
.write()
.partitionBy("clazz")
.mode(SaveMode.Append)
.parquet(path);
actionDS.write().partitionBy("clazz").mode(SaveMode.Append).parquet(path);
}
public ISClient getIsClient() {

View File

@ -1,41 +1,39 @@
package eu.dnetlib.dhp.actionmanager.promote;
import static eu.dnetlib.dhp.schema.common.ModelSupport.isSubClass;
import eu.dnetlib.dhp.common.FunctionalInterfaceSupport.SerializableSupplier;
import eu.dnetlib.dhp.schema.oaf.Oaf;
import eu.dnetlib.dhp.schema.oaf.OafEntity;
import eu.dnetlib.dhp.schema.oaf.Relation;
import java.util.function.BiFunction;
import static eu.dnetlib.dhp.schema.common.ModelSupport.isSubClass;
/**
* OAF model merging support.
*/
/** OAF model merging support. */
public class MergeAndGet {
private MergeAndGet() {
}
private MergeAndGet() {}
/**
* Strategy for merging OAF model objects.
* <p>
* MERGE_FROM_AND_GET: use OAF 'mergeFrom' method
* SELECT_NEWER_AND_GET: use last update timestamp to return newer instance
*
* <p>MERGE_FROM_AND_GET: use OAF 'mergeFrom' method SELECT_NEWER_AND_GET: use last update
* timestamp to return newer instance
*/
public enum Strategy {
MERGE_FROM_AND_GET, SELECT_NEWER_AND_GET
MERGE_FROM_AND_GET,
SELECT_NEWER_AND_GET
}
/**
* Returns a function for merging OAF model objects.
*
* @param strategy Strategy to be used to merge objects
* @param <G> Graph table type
* @param <A> Action payload type
* @param <G> Graph table type
* @param <A> Action payload type
* @return BiFunction to be used to merge OAF objects
*/
public static <G extends Oaf, A extends Oaf> SerializableSupplier<BiFunction<G, A, G>> functionFor(Strategy strategy) {
public static <G extends Oaf, A extends Oaf>
SerializableSupplier<BiFunction<G, A, G>> functionFor(Strategy strategy) {
switch (strategy) {
case MERGE_FROM_AND_GET:
return () -> MergeAndGet::mergeFromAndGet;
@ -49,26 +47,36 @@ public class MergeAndGet {
if (isSubClass(x, Relation.class) && isSubClass(y, Relation.class)) {
((Relation) x).mergeFrom((Relation) y);
return x;
} else if (isSubClass(x, OafEntity.class) && isSubClass(y, OafEntity.class) && isSubClass(x, y)) {
} else if (isSubClass(x, OafEntity.class)
&& isSubClass(y, OafEntity.class)
&& isSubClass(x, y)) {
((OafEntity) x).mergeFrom((OafEntity) y);
return x;
}
throw new RuntimeException(String.format("MERGE_FROM_AND_GET incompatible types: %s, %s",
x.getClass().getCanonicalName(), y.getClass().getCanonicalName()));
throw new RuntimeException(
String.format(
"MERGE_FROM_AND_GET incompatible types: %s, %s",
x.getClass().getCanonicalName(), y.getClass().getCanonicalName()));
}
private static <G extends Oaf, A extends Oaf> G selectNewerAndGet(G x, A y) {
if (x.getClass().equals(y.getClass()) && x.getLastupdatetimestamp() > y.getLastupdatetimestamp()) {
if (x.getClass().equals(y.getClass())
&& x.getLastupdatetimestamp() > y.getLastupdatetimestamp()) {
return x;
} else if (x.getClass().equals(y.getClass()) && x.getLastupdatetimestamp() < y.getLastupdatetimestamp()) {
} else if (x.getClass().equals(y.getClass())
&& x.getLastupdatetimestamp() < y.getLastupdatetimestamp()) {
return (G) y;
} else if (isSubClass(x, y) && x.getLastupdatetimestamp() > y.getLastupdatetimestamp()) {
return x;
} else if (isSubClass(x, y) && x.getLastupdatetimestamp() < y.getLastupdatetimestamp()) {
throw new RuntimeException(String.format("SELECT_NEWER_AND_GET cannot return right type when it is not the same as left type: %s, %s",
x.getClass().getCanonicalName(), y.getClass().getCanonicalName()));
throw new RuntimeException(
String.format(
"SELECT_NEWER_AND_GET cannot return right type when it is not the same as left type: %s, %s",
x.getClass().getCanonicalName(), y.getClass().getCanonicalName()));
}
throw new RuntimeException(String.format("SELECT_NEWER_AND_GET cannot be used when left is not subtype of right: %s, %s",
x.getClass().getCanonicalName(), y.getClass().getCanonicalName()));
throw new RuntimeException(
String.format(
"SELECT_NEWER_AND_GET cannot be used when left is not subtype of right: %s, %s",
x.getClass().getCanonicalName(), y.getClass().getCanonicalName()));
}
}

View File

@ -1,14 +1,20 @@
package eu.dnetlib.dhp.actionmanager.promote;
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
import static eu.dnetlib.dhp.schema.common.ModelSupport.isSubClass;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.common.FunctionalInterfaceSupport.SerializableSupplier;
import eu.dnetlib.dhp.common.HdfsSupport;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.schema.common.ModelSupport;
import eu.dnetlib.dhp.schema.oaf.*;
import java.util.Objects;
import java.util.Optional;
import java.util.function.BiFunction;
import java.util.function.Function;
import org.apache.commons.io.IOUtils;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.function.FilterFunction;
import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Encoders;
@ -17,33 +23,25 @@ import org.apache.spark.sql.SparkSession;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.Objects;
import java.util.Optional;
import java.util.function.BiFunction;
import java.util.function.Function;
import static eu.dnetlib.dhp.schema.common.ModelSupport.isSubClass;
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
/**
* Applies a given action payload file to graph table of compatible type.
*/
/** Applies a given action payload file to graph table of compatible type. */
public class PromoteActionPayloadForGraphTableJob {
private static final Logger logger = LoggerFactory.getLogger(PromoteActionPayloadForGraphTableJob.class);
private static final Logger logger =
LoggerFactory.getLogger(PromoteActionPayloadForGraphTableJob.class);
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
public static void main(String[] args) throws Exception {
String jsonConfiguration = IOUtils.toString(
PromoteActionPayloadForGraphTableJob.class
.getResourceAsStream("/eu/dnetlib/dhp/actionmanager/promote/promote_action_payload_for_graph_table_input_parameters.json"));
String jsonConfiguration =
IOUtils.toString(
PromoteActionPayloadForGraphTableJob.class.getResourceAsStream(
"/eu/dnetlib/dhp/actionmanager/promote/promote_action_payload_for_graph_table_input_parameters.json"));
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
parser.parseArgument(args);
Boolean isSparkSessionManaged = Optional
.ofNullable(parser.get("isSparkSessionManaged"))
.map(Boolean::valueOf)
.orElse(Boolean.TRUE);
Boolean isSparkSessionManaged =
Optional.ofNullable(parser.get("isSparkSessionManaged"))
.map(Boolean::valueOf)
.orElse(Boolean.TRUE);
logger.info("isSparkSessionManaged: {}", isSparkSessionManaged);
String inputGraphTablePath = parser.get("inputGraphTablePath");
@ -61,11 +59,13 @@ public class PromoteActionPayloadForGraphTableJob {
String outputGraphTablePath = parser.get("outputGraphTablePath");
logger.info("outputGraphTablePath: {}", outputGraphTablePath);
MergeAndGet.Strategy strategy = MergeAndGet.Strategy.valueOf(parser.get("mergeAndGetStrategy").toUpperCase());
MergeAndGet.Strategy strategy =
MergeAndGet.Strategy.valueOf(parser.get("mergeAndGetStrategy").toUpperCase());
logger.info("strategy: {}", strategy);
Class<? extends Oaf> rowClazz = (Class<? extends Oaf>) Class.forName(graphTableClassName);
Class<? extends Oaf> actionPayloadClazz = (Class<? extends Oaf>) Class.forName(actionPayloadClassName);
Class<? extends Oaf> actionPayloadClazz =
(Class<? extends Oaf>) Class.forName(actionPayloadClassName);
throwIfGraphTableClassIsNotSubClassOfActionPayloadClass(rowClazz, actionPayloadClazz);
@ -73,10 +73,13 @@ public class PromoteActionPayloadForGraphTableJob {
conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer");
conf.registerKryoClasses(ModelSupport.getOafModelClasses());
runWithSparkSession(conf, isSparkSessionManaged,
runWithSparkSession(
conf,
isSparkSessionManaged,
spark -> {
removeOutputDir(spark, outputGraphTablePath);
promoteActionPayloadForGraphTable(spark,
promoteActionPayloadForGraphTable(
spark,
inputGraphTablePath,
inputActionPayloadPath,
outputGraphTablePath,
@ -86,44 +89,50 @@ public class PromoteActionPayloadForGraphTableJob {
});
}
private static void throwIfGraphTableClassIsNotSubClassOfActionPayloadClass(Class<? extends Oaf> rowClazz,
Class<? extends Oaf> actionPayloadClazz) {
private static void throwIfGraphTableClassIsNotSubClassOfActionPayloadClass(
Class<? extends Oaf> rowClazz, Class<? extends Oaf> actionPayloadClazz) {
if (!isSubClass(rowClazz, actionPayloadClazz)) {
String msg = String.format("graph table class is not a subclass of action payload class: graph=%s, action=%s",
rowClazz.getCanonicalName(), actionPayloadClazz.getCanonicalName());
String msg =
String.format(
"graph table class is not a subclass of action payload class: graph=%s, action=%s",
rowClazz.getCanonicalName(), actionPayloadClazz.getCanonicalName());
throw new RuntimeException(msg);
}
}
private static void removeOutputDir(SparkSession spark,
String path) {
private static void removeOutputDir(SparkSession spark, String path) {
HdfsSupport.remove(path, spark.sparkContext().hadoopConfiguration());
}
private static <G extends Oaf, A extends Oaf> void promoteActionPayloadForGraphTable(SparkSession spark,
String inputGraphTablePath,
String inputActionPayloadPath,
String outputGraphTablePath,
MergeAndGet.Strategy strategy,
Class<G> rowClazz,
Class<A> actionPayloadClazz) {
private static <G extends Oaf, A extends Oaf> void promoteActionPayloadForGraphTable(
SparkSession spark,
String inputGraphTablePath,
String inputActionPayloadPath,
String outputGraphTablePath,
MergeAndGet.Strategy strategy,
Class<G> rowClazz,
Class<A> actionPayloadClazz) {
Dataset<G> rowDS = readGraphTable(spark, inputGraphTablePath, rowClazz);
Dataset<A> actionPayloadDS = readActionPayload(spark, inputActionPayloadPath, actionPayloadClazz);
Dataset<A> actionPayloadDS =
readActionPayload(spark, inputActionPayloadPath, actionPayloadClazz);
Dataset<G> result = promoteActionPayloadForGraphTable(rowDS, actionPayloadDS, strategy, rowClazz, actionPayloadClazz)
.map((MapFunction<G, G>) value -> value, Encoders.bean(rowClazz));
Dataset<G> result =
promoteActionPayloadForGraphTable(
rowDS, actionPayloadDS, strategy, rowClazz, actionPayloadClazz)
.map((MapFunction<G, G>) value -> value, Encoders.bean(rowClazz));
saveGraphTable(result, outputGraphTablePath);
}
private static <G extends Oaf> Dataset<G> readGraphTable(SparkSession spark,
String path,
Class<G> rowClazz) {
private static <G extends Oaf> Dataset<G> readGraphTable(
SparkSession spark, String path, Class<G> rowClazz) {
logger.info("Reading graph table from path: {}", path);
return spark.read()
.textFile(path)
.map((MapFunction<String, G>) value -> OBJECT_MAPPER.readValue(value, rowClazz), Encoders.bean(rowClazz));
.map(
(MapFunction<String, G>) value -> OBJECT_MAPPER.readValue(value, rowClazz),
Encoders.bean(rowClazz));
/*
return spark
@ -133,33 +142,44 @@ public class PromoteActionPayloadForGraphTableJob {
*/
}
private static <A extends Oaf> Dataset<A> readActionPayload(SparkSession spark,
String path,
Class<A> actionPayloadClazz) {
private static <A extends Oaf> Dataset<A> readActionPayload(
SparkSession spark, String path, Class<A> actionPayloadClazz) {
logger.info("Reading action payload from path: {}", path);
return spark
.read()
return spark.read()
.parquet(path)
.map((MapFunction<Row, A>) value -> OBJECT_MAPPER.readValue(value.<String>getAs("payload"),
actionPayloadClazz), Encoders.bean(actionPayloadClazz));
.map(
(MapFunction<Row, A>)
value ->
OBJECT_MAPPER.readValue(
value.<String>getAs("payload"), actionPayloadClazz),
Encoders.bean(actionPayloadClazz));
}
private static <G extends Oaf, A extends Oaf> Dataset<G> promoteActionPayloadForGraphTable(Dataset<G> rowDS,
Dataset<A> actionPayloadDS,
MergeAndGet.Strategy strategy,
Class<G> rowClazz,
Class<A> actionPayloadClazz) {
logger.info("Promoting action payload for graph table: payload={}, table={}", actionPayloadClazz.getSimpleName(), rowClazz.getSimpleName());
private static <G extends Oaf, A extends Oaf> Dataset<G> promoteActionPayloadForGraphTable(
Dataset<G> rowDS,
Dataset<A> actionPayloadDS,
MergeAndGet.Strategy strategy,
Class<G> rowClazz,
Class<A> actionPayloadClazz) {
logger.info(
"Promoting action payload for graph table: payload={}, table={}",
actionPayloadClazz.getSimpleName(),
rowClazz.getSimpleName());
SerializableSupplier<Function<G, String>> rowIdFn = PromoteActionPayloadForGraphTableJob::idFn;
SerializableSupplier<Function<A, String>> actionPayloadIdFn = PromoteActionPayloadForGraphTableJob::idFn;
SerializableSupplier<BiFunction<G, A, G>> mergeRowWithActionPayloadAndGetFn = MergeAndGet.functionFor(strategy);
SerializableSupplier<BiFunction<G, G, G>> mergeRowsAndGetFn = MergeAndGet.functionFor(strategy);
SerializableSupplier<Function<G, String>> rowIdFn =
PromoteActionPayloadForGraphTableJob::idFn;
SerializableSupplier<Function<A, String>> actionPayloadIdFn =
PromoteActionPayloadForGraphTableJob::idFn;
SerializableSupplier<BiFunction<G, A, G>> mergeRowWithActionPayloadAndGetFn =
MergeAndGet.functionFor(strategy);
SerializableSupplier<BiFunction<G, G, G>> mergeRowsAndGetFn =
MergeAndGet.functionFor(strategy);
SerializableSupplier<G> zeroFn = zeroFn(rowClazz);
SerializableSupplier<Function<G, Boolean>> isNotZeroFn = PromoteActionPayloadForGraphTableJob::isNotZeroFnUsingIdOrSource;
SerializableSupplier<Function<G, Boolean>> isNotZeroFn =
PromoteActionPayloadForGraphTableJob::isNotZeroFnUsingIdOrSource;
Dataset<G> joinedAndMerged = PromoteActionPayloadFunctions
.joinGraphTableWithActionPayloadAndMerge(
Dataset<G> joinedAndMerged =
PromoteActionPayloadFunctions.joinGraphTableWithActionPayloadAndMerge(
rowDS,
actionPayloadDS,
rowIdFn,
@ -168,14 +188,8 @@ public class PromoteActionPayloadForGraphTableJob {
rowClazz,
actionPayloadClazz);
return PromoteActionPayloadFunctions
.groupGraphTableByIdAndMerge(
joinedAndMerged,
rowIdFn,
mergeRowsAndGetFn,
zeroFn,
isNotZeroFn,
rowClazz);
return PromoteActionPayloadFunctions.groupGraphTableByIdAndMerge(
joinedAndMerged, rowIdFn, mergeRowsAndGetFn, zeroFn, isNotZeroFn, rowClazz);
}
private static <T extends Oaf> Function<T, String> idFn() {
@ -190,19 +204,49 @@ public class PromoteActionPayloadForGraphTableJob {
private static <T extends Oaf> String idFnForRelation(T t) {
Relation r = (Relation) t;
return Optional.ofNullable(r.getSource())
.map(source -> Optional.ofNullable(r.getTarget())
.map(target -> Optional.ofNullable(r.getRelType())
.map(relType -> Optional.ofNullable(r.getSubRelType())
.map(subRelType -> Optional.ofNullable(r.getRelClass())
.map(relClass -> String.join(source, target, relType, subRelType, relClass))
.orElse(String.join(source, target, relType, subRelType))
)
.orElse(String.join(source, target, relType))
)
.orElse(String.join(source, target))
)
.orElse(source)
)
.map(
source ->
Optional.ofNullable(r.getTarget())
.map(
target ->
Optional.ofNullable(r.getRelType())
.map(
relType ->
Optional.ofNullable(
r
.getSubRelType())
.map(
subRelType ->
Optional
.ofNullable(
r
.getRelClass())
.map(
relClass ->
String
.join(
source,
target,
relType,
subRelType,
relClass))
.orElse(
String
.join(
source,
target,
relType,
subRelType)))
.orElse(
String
.join(
source,
target,
relType)))
.orElse(
String.join(
source, target)))
.orElse(source))
.orElse(null);
}
@ -242,13 +286,8 @@ public class PromoteActionPayloadForGraphTableJob {
};
}
private static <G extends Oaf> void saveGraphTable(Dataset<G> result,
String path) {
private static <G extends Oaf> void saveGraphTable(Dataset<G> result, String path) {
logger.info("Saving graph table to path: {}", path);
result
.toJSON()
.write()
.option("compression", "gzip")
.text(path);
result.toJSON().write().option("compression", "gzip").text(path);
}
}

View File

@ -1,7 +1,13 @@
package eu.dnetlib.dhp.actionmanager.promote;
import static eu.dnetlib.dhp.schema.common.ModelSupport.isSubClass;
import eu.dnetlib.dhp.common.FunctionalInterfaceSupport.SerializableSupplier;
import eu.dnetlib.dhp.schema.oaf.Oaf;
import java.util.Objects;
import java.util.Optional;
import java.util.function.BiFunction;
import java.util.function.Function;
import org.apache.spark.api.java.function.FilterFunction;
import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.sql.Dataset;
@ -11,95 +17,113 @@ import org.apache.spark.sql.TypedColumn;
import org.apache.spark.sql.expressions.Aggregator;
import scala.Tuple2;
import java.util.Objects;
import java.util.Optional;
import java.util.function.BiFunction;
import java.util.function.Function;
import static eu.dnetlib.dhp.schema.common.ModelSupport.isSubClass;
/**
* Promote action payload functions.
*/
/** Promote action payload functions. */
public class PromoteActionPayloadFunctions {
private PromoteActionPayloadFunctions() {
}
private PromoteActionPayloadFunctions() {}
/**
* Joins dataset representing graph table with dataset representing action payload using supplied functions.
* Joins dataset representing graph table with dataset representing action payload using
* supplied functions.
*
* @param rowDS Dataset representing graph table
* @param actionPayloadDS Dataset representing action payload
* @param rowIdFn Function used to get the id of graph table row
* @param actionPayloadIdFn Function used to get id of action payload instance
* @param mergeAndGetFn Function used to merge graph table row and action payload instance
* @param rowClazz Class of graph table
* @param rowDS Dataset representing graph table
* @param actionPayloadDS Dataset representing action payload
* @param rowIdFn Function used to get the id of graph table row
* @param actionPayloadIdFn Function used to get id of action payload instance
* @param mergeAndGetFn Function used to merge graph table row and action payload instance
* @param rowClazz Class of graph table
* @param actionPayloadClazz Class of action payload
* @param <G> Type of graph table row
* @param <A> Type of action payload instance
* @param <G> Type of graph table row
* @param <A> Type of action payload instance
* @return Dataset of merged graph table rows and action payload instances
*/
public static <G extends Oaf, A extends Oaf> Dataset<G> joinGraphTableWithActionPayloadAndMerge(Dataset<G> rowDS,
Dataset<A> actionPayloadDS,
SerializableSupplier<Function<G, String>> rowIdFn,
SerializableSupplier<Function<A, String>> actionPayloadIdFn,
SerializableSupplier<BiFunction<G, A, G>> mergeAndGetFn,
Class<G> rowClazz,
Class<A> actionPayloadClazz) {
public static <G extends Oaf, A extends Oaf> Dataset<G> joinGraphTableWithActionPayloadAndMerge(
Dataset<G> rowDS,
Dataset<A> actionPayloadDS,
SerializableSupplier<Function<G, String>> rowIdFn,
SerializableSupplier<Function<A, String>> actionPayloadIdFn,
SerializableSupplier<BiFunction<G, A, G>> mergeAndGetFn,
Class<G> rowClazz,
Class<A> actionPayloadClazz) {
if (!isSubClass(rowClazz, actionPayloadClazz)) {
throw new RuntimeException("action payload type must be the same or be a super type of table row type");
throw new RuntimeException(
"action payload type must be the same or be a super type of table row type");
}
Dataset<Tuple2<String, G>> rowWithIdDS = mapToTupleWithId(rowDS, rowIdFn, rowClazz);
Dataset<Tuple2<String, A>> actionPayloadWithIdDS = mapToTupleWithId(actionPayloadDS, actionPayloadIdFn, actionPayloadClazz);
Dataset<Tuple2<String, A>> actionPayloadWithIdDS =
mapToTupleWithId(actionPayloadDS, actionPayloadIdFn, actionPayloadClazz);
return rowWithIdDS
.joinWith(actionPayloadWithIdDS, rowWithIdDS.col("_1").equalTo(actionPayloadWithIdDS.col("_1")), "full_outer")
.map((MapFunction<Tuple2<Tuple2<String, G>, Tuple2<String, A>>, G>) value -> {
Optional<G> rowOpt = Optional.ofNullable(value._1()).map(Tuple2::_2);
Optional<A> actionPayloadOpt = Optional.ofNullable(value._2()).map(Tuple2::_2);
return rowOpt
.map(row -> actionPayloadOpt
.map(actionPayload -> mergeAndGetFn.get().apply(row, actionPayload))
.orElse(row))
.orElseGet(() -> actionPayloadOpt
.filter(actionPayload -> actionPayload.getClass().equals(rowClazz))
.map(rowClazz::cast)
.orElse(null));
}, Encoders.kryo(rowClazz))
.joinWith(
actionPayloadWithIdDS,
rowWithIdDS.col("_1").equalTo(actionPayloadWithIdDS.col("_1")),
"full_outer")
.map(
(MapFunction<Tuple2<Tuple2<String, G>, Tuple2<String, A>>, G>)
value -> {
Optional<G> rowOpt =
Optional.ofNullable(value._1()).map(Tuple2::_2);
Optional<A> actionPayloadOpt =
Optional.ofNullable(value._2()).map(Tuple2::_2);
return rowOpt.map(
row ->
actionPayloadOpt
.map(
actionPayload ->
mergeAndGetFn
.get()
.apply(
row,
actionPayload))
.orElse(row))
.orElseGet(
() ->
actionPayloadOpt
.filter(
actionPayload ->
actionPayload
.getClass()
.equals(
rowClazz))
.map(rowClazz::cast)
.orElse(null));
},
Encoders.kryo(rowClazz))
.filter((FilterFunction<G>) Objects::nonNull);
}
private static <T extends Oaf> Dataset<Tuple2<String, T>> mapToTupleWithId(Dataset<T> ds,
SerializableSupplier<Function<T, String>> idFn,
Class<T> clazz) {
return ds
.map((MapFunction<T, Tuple2<String, T>>) value -> new Tuple2<>(idFn.get().apply(value), value),
Encoders.tuple(Encoders.STRING(), Encoders.kryo(clazz)));
private static <T extends Oaf> Dataset<Tuple2<String, T>> mapToTupleWithId(
Dataset<T> ds, SerializableSupplier<Function<T, String>> idFn, Class<T> clazz) {
return ds.map(
(MapFunction<T, Tuple2<String, T>>)
value -> new Tuple2<>(idFn.get().apply(value), value),
Encoders.tuple(Encoders.STRING(), Encoders.kryo(clazz)));
}
/**
* Groups graph table by id and aggregates using supplied functions.
*
* @param rowDS Dataset representing graph table
* @param rowIdFn Function used to get the id of graph table row
* @param rowDS Dataset representing graph table
* @param rowIdFn Function used to get the id of graph table row
* @param mergeAndGetFn Function used to merge graph table rows
* @param zeroFn Function to create a zero/empty instance of graph table row
* @param isNotZeroFn Function to check if graph table row is not zero/empty
* @param rowClazz Class of graph table
* @param <G> Type of graph table row
* @param zeroFn Function to create a zero/empty instance of graph table row
* @param isNotZeroFn Function to check if graph table row is not zero/empty
* @param rowClazz Class of graph table
* @param <G> Type of graph table row
* @return Dataset of aggregated graph table rows
*/
public static <G extends Oaf> Dataset<G> groupGraphTableByIdAndMerge(Dataset<G> rowDS,
SerializableSupplier<Function<G, String>> rowIdFn,
SerializableSupplier<BiFunction<G, G, G>> mergeAndGetFn,
SerializableSupplier<G> zeroFn,
SerializableSupplier<Function<G, Boolean>> isNotZeroFn,
Class<G> rowClazz) {
TypedColumn<G, G> aggregator = new TableAggregator<>(zeroFn, mergeAndGetFn, isNotZeroFn, rowClazz).toColumn();
return rowDS
.groupByKey((MapFunction<G, String>) x -> rowIdFn.get().apply(x), Encoders.STRING())
public static <G extends Oaf> Dataset<G> groupGraphTableByIdAndMerge(
Dataset<G> rowDS,
SerializableSupplier<Function<G, String>> rowIdFn,
SerializableSupplier<BiFunction<G, G, G>> mergeAndGetFn,
SerializableSupplier<G> zeroFn,
SerializableSupplier<Function<G, Boolean>> isNotZeroFn,
Class<G> rowClazz) {
TypedColumn<G, G> aggregator =
new TableAggregator<>(zeroFn, mergeAndGetFn, isNotZeroFn, rowClazz).toColumn();
return rowDS.groupByKey(
(MapFunction<G, String>) x -> rowIdFn.get().apply(x), Encoders.STRING())
.agg(aggregator)
.map((MapFunction<Tuple2<String, G>, G>) Tuple2::_2, Encoders.kryo(rowClazz));
}
@ -115,10 +139,11 @@ public class PromoteActionPayloadFunctions {
private SerializableSupplier<Function<G, Boolean>> isNotZeroFn;
private Class<G> rowClazz;
public TableAggregator(SerializableSupplier<G> zeroFn,
SerializableSupplier<BiFunction<G, G, G>> mergeAndGetFn,
SerializableSupplier<Function<G, Boolean>> isNotZeroFn,
Class<G> rowClazz) {
public TableAggregator(
SerializableSupplier<G> zeroFn,
SerializableSupplier<BiFunction<G, G, G>> mergeAndGetFn,
SerializableSupplier<Function<G, Boolean>> isNotZeroFn,
Class<G> rowClazz) {
this.zeroFn = zeroFn;
this.mergeAndGetFn = mergeAndGetFn;
this.isNotZeroFn = isNotZeroFn;
@ -149,7 +174,8 @@ public class PromoteActionPayloadFunctions {
} else if (!isNotZero.apply(left) && isNotZero.apply(right)) {
return right;
}
throw new RuntimeException("internal aggregation error: left and right objects are zero");
throw new RuntimeException(
"internal aggregation error: left and right objects are zero");
}
@Override

View File

@ -1,10 +1,20 @@
package eu.dnetlib.dhp.actionmanager.partition;
import static eu.dnetlib.dhp.common.ThrowingSupport.rethrowAsRuntimeException;
import static org.apache.spark.sql.functions.*;
import static org.junit.jupiter.api.Assertions.assertIterableEquals;
import static scala.collection.JavaConversions.mutableSeqAsJavaList;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.common.collect.Lists;
import eu.dnetlib.dhp.actionmanager.ISClient;
import eu.dnetlib.dhp.actionmanager.promote.PromoteActionPayloadForGraphTableJobTest;
import eu.dnetlib.dhp.schema.oaf.*;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.*;
import java.util.stream.Collectors;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
@ -25,32 +35,23 @@ import org.mockito.junit.jupiter.MockitoExtension;
import scala.Tuple2;
import scala.collection.mutable.Seq;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.*;
import java.util.stream.Collectors;
import static eu.dnetlib.dhp.common.ThrowingSupport.rethrowAsRuntimeException;
import static org.apache.spark.sql.functions.*;
import static org.junit.jupiter.api.Assertions.assertIterableEquals;
import static scala.collection.JavaConversions.mutableSeqAsJavaList;
@ExtendWith(MockitoExtension.class)
public class PartitionActionSetsByPayloadTypeJobTest {
private static final ClassLoader cl = PartitionActionSetsByPayloadTypeJobTest.class.getClassLoader();
private static final ClassLoader cl =
PartitionActionSetsByPayloadTypeJobTest.class.getClassLoader();
private static Configuration configuration;
private static SparkSession spark;
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
private static final StructType ATOMIC_ACTION_SCHEMA = StructType$.MODULE$.apply(
Arrays.asList(
StructField$.MODULE$.apply("clazz", DataTypes.StringType, false, Metadata.empty()),
StructField$.MODULE$.apply("payload", DataTypes.StringType, false, Metadata.empty())
));
private static final StructType ATOMIC_ACTION_SCHEMA =
StructType$.MODULE$.apply(
Arrays.asList(
StructField$.MODULE$.apply(
"clazz", DataTypes.StringType, false, Metadata.empty()),
StructField$.MODULE$.apply(
"payload", DataTypes.StringType, false, Metadata.empty())));
@BeforeAll
public static void beforeAll() throws IOException {
@ -71,11 +72,11 @@ public class PartitionActionSetsByPayloadTypeJobTest {
@Nested
class Main {
@Mock
private ISClient isClient;
@Mock private ISClient isClient;
@Test
public void shouldPartitionActionSetsByPayloadType(@TempDir Path workingDir) throws Exception {
public void shouldPartitionActionSetsByPayloadType(@TempDir Path workingDir)
throws Exception {
// given
Path inputActionSetsBaseDir = workingDir.resolve("input").resolve("action_sets");
Path outputDir = workingDir.resolve("output");
@ -85,15 +86,16 @@ public class PartitionActionSetsByPayloadTypeJobTest {
List<String> inputActionSetsPaths = resolveInputActionSetPaths(inputActionSetsBaseDir);
// when
Mockito.when(isClient.getLatestRawsetPaths(Mockito.anyString())).thenReturn(inputActionSetsPaths);
Mockito.when(isClient.getLatestRawsetPaths(Mockito.anyString()))
.thenReturn(inputActionSetsPaths);
PartitionActionSetsByPayloadTypeJob job = new PartitionActionSetsByPayloadTypeJob();
job.setIsClient(isClient);
job.run(
Boolean.FALSE,
"", // it can be empty we're mocking the response from isClient to resolve the paths
outputDir.toString()
);
"", // it can be empty we're mocking the response from isClient to resolve the
// paths
outputDir.toString());
// then
Files.exists(outputDir);
@ -110,112 +112,134 @@ public class PartitionActionSetsByPayloadTypeJobTest {
}
}
private List<String> resolveInputActionSetPaths(Path inputActionSetsBaseDir) throws IOException {
private List<String> resolveInputActionSetPaths(Path inputActionSetsBaseDir)
throws IOException {
Path inputActionSetJsonDumpsDir = getInputActionSetJsonDumpsDir();
return Files
.list(inputActionSetJsonDumpsDir)
.map(path -> {
String inputActionSetId = path.getFileName().toString();
return inputActionSetsBaseDir.resolve(inputActionSetId).toString();
})
return Files.list(inputActionSetJsonDumpsDir)
.map(
path -> {
String inputActionSetId = path.getFileName().toString();
return inputActionSetsBaseDir.resolve(inputActionSetId).toString();
})
.collect(Collectors.toCollection(ArrayList::new));
}
private static Map<String, List<String>> createActionSets(Path inputActionSetsDir) throws IOException {
private static Map<String, List<String>> createActionSets(Path inputActionSetsDir)
throws IOException {
Path inputActionSetJsonDumpsDir = getInputActionSetJsonDumpsDir();
Map<String, List<String>> oafsByType = new HashMap<>();
Files
.list(inputActionSetJsonDumpsDir)
.forEach(inputActionSetJsonDumpFile -> {
String inputActionSetId = inputActionSetJsonDumpFile.getFileName().toString();
Path inputActionSetDir = inputActionSetsDir.resolve(inputActionSetId);
Files.list(inputActionSetJsonDumpsDir)
.forEach(
inputActionSetJsonDumpFile -> {
String inputActionSetId =
inputActionSetJsonDumpFile.getFileName().toString();
Path inputActionSetDir = inputActionSetsDir.resolve(inputActionSetId);
Dataset<String> actionDS = readActionsFromJsonDump(inputActionSetJsonDumpFile.toString())
.cache();
Dataset<String> actionDS =
readActionsFromJsonDump(inputActionSetJsonDumpFile.toString())
.cache();
writeActionsAsJobInput(actionDS, inputActionSetId, inputActionSetDir.toString());
writeActionsAsJobInput(
actionDS, inputActionSetId, inputActionSetDir.toString());
Map<String, List<String>> actionSetOafsByType = actionDS
.withColumn("atomic_action", from_json(col("value"), ATOMIC_ACTION_SCHEMA))
.select(expr("atomic_action.*"))
.groupBy(col("clazz"))
.agg(collect_list(col("payload")).as("payload_list"))
.collectAsList()
.stream()
.map(row -> new AbstractMap.SimpleEntry<>(row.<String>getAs("clazz"),
mutableSeqAsJavaList(row.<Seq<String>>getAs("payload_list"))))
.collect(Collectors.toMap(AbstractMap.SimpleEntry::getKey, AbstractMap.SimpleEntry::getValue));
Map<String, List<String>> actionSetOafsByType =
actionDS
.withColumn(
"atomic_action",
from_json(col("value"), ATOMIC_ACTION_SCHEMA))
.select(expr("atomic_action.*")).groupBy(col("clazz"))
.agg(collect_list(col("payload")).as("payload_list"))
.collectAsList().stream()
.map(
row ->
new AbstractMap.SimpleEntry<>(
row.<String>getAs("clazz"),
mutableSeqAsJavaList(
row.<Seq<String>>getAs(
"payload_list"))))
.collect(
Collectors.toMap(
AbstractMap.SimpleEntry::getKey,
AbstractMap.SimpleEntry::getValue));
actionSetOafsByType.keySet()
.forEach(x -> {
if (oafsByType.containsKey(x)) {
List<String> collected = new ArrayList<>();
collected.addAll(oafsByType.get(x));
collected.addAll(actionSetOafsByType.get(x));
oafsByType.put(x, collected);
} else {
oafsByType.put(x, actionSetOafsByType.get(x));
}
});
});
actionSetOafsByType
.keySet()
.forEach(
x -> {
if (oafsByType.containsKey(x)) {
List<String> collected = new ArrayList<>();
collected.addAll(oafsByType.get(x));
collected.addAll(actionSetOafsByType.get(x));
oafsByType.put(x, collected);
} else {
oafsByType.put(x, actionSetOafsByType.get(x));
}
});
});
return oafsByType;
}
private static Path getInputActionSetJsonDumpsDir() {
return Paths
.get(Objects.requireNonNull(cl.getResource("eu/dnetlib/dhp/actionmanager/partition/input/"))
return Paths.get(
Objects.requireNonNull(
cl.getResource("eu/dnetlib/dhp/actionmanager/partition/input/"))
.getFile());
}
private static Dataset<String> readActionsFromJsonDump(String path) {
return spark
.read()
.textFile(path);
return spark.read().textFile(path);
}
private static void writeActionsAsJobInput(Dataset<String> actionDS,
String inputActionSetId,
String path) {
actionDS
.javaRDD()
private static void writeActionsAsJobInput(
Dataset<String> actionDS, String inputActionSetId, String path) {
actionDS.javaRDD()
.mapToPair(json -> new Tuple2<>(new Text(inputActionSetId), new Text(json)))
.saveAsNewAPIHadoopFile(path,
.saveAsNewAPIHadoopFile(
path,
Text.class,
Text.class,
SequenceFileOutputFormat.class,
configuration);
}
private static <T extends Oaf> void assertForOafType(Path outputDir, Map<String, List<String>> oafsByClassName, Class<T> clazz) {
Path outputDatasetDir = outputDir.resolve(String.format("clazz=%s", clazz.getCanonicalName()));
private static <T extends Oaf> void assertForOafType(
Path outputDir, Map<String, List<String>> oafsByClassName, Class<T> clazz) {
Path outputDatasetDir =
outputDir.resolve(String.format("clazz=%s", clazz.getCanonicalName()));
Files.exists(outputDatasetDir);
List<T> actuals = readActionPayloadFromJobOutput(outputDatasetDir.toString(), clazz).collectAsList();
List<T> actuals =
readActionPayloadFromJobOutput(outputDatasetDir.toString(), clazz).collectAsList();
actuals.sort(Comparator.comparingInt(Object::hashCode));
List<T> expecteds = oafsByClassName.get(clazz.getCanonicalName()).stream()
.map(json -> mapToOaf(json, clazz))
.sorted(Comparator.comparingInt(Object::hashCode))
.collect(Collectors.toList());
List<T> expecteds =
oafsByClassName.get(clazz.getCanonicalName()).stream()
.map(json -> mapToOaf(json, clazz))
.sorted(Comparator.comparingInt(Object::hashCode))
.collect(Collectors.toList());
assertIterableEquals(expecteds, actuals);
}
private static <T extends Oaf> Dataset<T> readActionPayloadFromJobOutput(String path,
Class<T> clazz) {
return spark
.read()
private static <T extends Oaf> Dataset<T> readActionPayloadFromJobOutput(
String path, Class<T> clazz) {
return spark.read()
.parquet(path)
.map((MapFunction<Row, T>) value -> OBJECT_MAPPER.readValue(value.<String>getAs("payload"), clazz),
.map(
(MapFunction<Row, T>)
value ->
OBJECT_MAPPER.readValue(
value.<String>getAs("payload"), clazz),
Encoders.bean(clazz));
}
private static <T extends Oaf> T mapToOaf(String json, Class<T> clazz) {
return rethrowAsRuntimeException(
() -> OBJECT_MAPPER.readValue(json, clazz),
String.format("failed to map json to class: json=%s, class=%s", json, clazz.getCanonicalName())
);
String.format(
"failed to map json to class: json=%s, class=%s",
json, clazz.getCanonicalName()));
}
}

View File

@ -1,17 +1,16 @@
package eu.dnetlib.dhp.actionmanager.promote;
import eu.dnetlib.dhp.common.FunctionalInterfaceSupport.SerializableSupplier;
import eu.dnetlib.dhp.schema.oaf.*;
import org.junit.jupiter.api.Nested;
import org.junit.jupiter.api.Test;
import java.util.function.BiFunction;
import static eu.dnetlib.dhp.actionmanager.promote.MergeAndGet.Strategy;
import static eu.dnetlib.dhp.actionmanager.promote.MergeAndGet.functionFor;
import static org.junit.jupiter.api.Assertions.*;
import static org.mockito.Mockito.*;
import eu.dnetlib.dhp.common.FunctionalInterfaceSupport.SerializableSupplier;
import eu.dnetlib.dhp.schema.oaf.*;
import java.util.function.BiFunction;
import org.junit.jupiter.api.Nested;
import org.junit.jupiter.api.Test;
public class MergeAndGetTest {
@Nested
@ -24,11 +23,11 @@ public class MergeAndGetTest {
Oaf b = mock(Oaf.class);
// when
SerializableSupplier<BiFunction<Oaf, Oaf, Oaf>> fn = functionFor(Strategy.MERGE_FROM_AND_GET);
SerializableSupplier<BiFunction<Oaf, Oaf, Oaf>> fn =
functionFor(Strategy.MERGE_FROM_AND_GET);
// then
assertThrows(RuntimeException.class, () ->
fn.get().apply(a, b));
assertThrows(RuntimeException.class, () -> fn.get().apply(a, b));
}
@Test
@ -38,11 +37,11 @@ public class MergeAndGetTest {
Relation b = mock(Relation.class);
// when
SerializableSupplier<BiFunction<Oaf, Oaf, Oaf>> fn = functionFor(Strategy.MERGE_FROM_AND_GET);
SerializableSupplier<BiFunction<Oaf, Oaf, Oaf>> fn =
functionFor(Strategy.MERGE_FROM_AND_GET);
// then
assertThrows(RuntimeException.class, () ->
fn.get().apply(a, b));
assertThrows(RuntimeException.class, () -> fn.get().apply(a, b));
}
@Test
@ -52,11 +51,11 @@ public class MergeAndGetTest {
OafEntity b = mock(OafEntity.class);
// when
SerializableSupplier<BiFunction<Oaf, Oaf, Oaf>> fn = functionFor(Strategy.MERGE_FROM_AND_GET);
SerializableSupplier<BiFunction<Oaf, Oaf, Oaf>> fn =
functionFor(Strategy.MERGE_FROM_AND_GET);
// then
assertThrows(RuntimeException.class, () ->
fn.get().apply(a, b));
assertThrows(RuntimeException.class, () -> fn.get().apply(a, b));
}
@Test
@ -66,11 +65,11 @@ public class MergeAndGetTest {
Oaf b = mock(Oaf.class);
// when
SerializableSupplier<BiFunction<Oaf, Oaf, Oaf>> fn = functionFor(Strategy.MERGE_FROM_AND_GET);
SerializableSupplier<BiFunction<Oaf, Oaf, Oaf>> fn =
functionFor(Strategy.MERGE_FROM_AND_GET);
// then
assertThrows(RuntimeException.class, () ->
fn.get().apply(a, b));
assertThrows(RuntimeException.class, () -> fn.get().apply(a, b));
}
@Test
@ -80,11 +79,11 @@ public class MergeAndGetTest {
OafEntity b = mock(OafEntity.class);
// when
SerializableSupplier<BiFunction<Oaf, Oaf, Oaf>> fn = functionFor(Strategy.MERGE_FROM_AND_GET);
SerializableSupplier<BiFunction<Oaf, Oaf, Oaf>> fn =
functionFor(Strategy.MERGE_FROM_AND_GET);
// then
assertThrows(RuntimeException.class, () ->
fn.get().apply(a, b));
assertThrows(RuntimeException.class, () -> fn.get().apply(a, b));
}
@Test
@ -94,7 +93,8 @@ public class MergeAndGetTest {
Relation b = mock(Relation.class);
// when
SerializableSupplier<BiFunction<Oaf, Oaf, Oaf>> fn = functionFor(Strategy.MERGE_FROM_AND_GET);
SerializableSupplier<BiFunction<Oaf, Oaf, Oaf>> fn =
functionFor(Strategy.MERGE_FROM_AND_GET);
// then
Oaf x = fn.get().apply(a, b);
@ -110,11 +110,11 @@ public class MergeAndGetTest {
Oaf b = mock(Oaf.class);
// when
SerializableSupplier<BiFunction<Oaf, Oaf, Oaf>> fn = functionFor(Strategy.MERGE_FROM_AND_GET);
SerializableSupplier<BiFunction<Oaf, Oaf, Oaf>> fn =
functionFor(Strategy.MERGE_FROM_AND_GET);
// then
assertThrows(RuntimeException.class, () ->
fn.get().apply(a, b));
assertThrows(RuntimeException.class, () -> fn.get().apply(a, b));
}
@Test
@ -124,30 +124,28 @@ public class MergeAndGetTest {
Relation b = mock(Relation.class);
// when
SerializableSupplier<BiFunction<Oaf, Oaf, Oaf>> fn = functionFor(Strategy.MERGE_FROM_AND_GET);
SerializableSupplier<BiFunction<Oaf, Oaf, Oaf>> fn =
functionFor(Strategy.MERGE_FROM_AND_GET);
// then
assertThrows(RuntimeException.class, () ->
fn.get().apply(a, b));
assertThrows(RuntimeException.class, () -> fn.get().apply(a, b));
}
@Test
public void shouldThrowForOafEntityAndOafEntityButNotSubclasses() {
// given
class OafEntitySub1 extends OafEntity {
}
class OafEntitySub2 extends OafEntity {
}
class OafEntitySub1 extends OafEntity {}
class OafEntitySub2 extends OafEntity {}
OafEntitySub1 a = mock(OafEntitySub1.class);
OafEntitySub2 b = mock(OafEntitySub2.class);
// when
SerializableSupplier<BiFunction<Oaf, Oaf, Oaf>> fn = functionFor(Strategy.MERGE_FROM_AND_GET);
SerializableSupplier<BiFunction<Oaf, Oaf, Oaf>> fn =
functionFor(Strategy.MERGE_FROM_AND_GET);
// then
assertThrows(RuntimeException.class, () ->
fn.get().apply(a, b));
assertThrows(RuntimeException.class, () -> fn.get().apply(a, b));
}
@Test
@ -157,7 +155,8 @@ public class MergeAndGetTest {
OafEntity b = mock(OafEntity.class);
// when
SerializableSupplier<BiFunction<Oaf, Oaf, Oaf>> fn = functionFor(Strategy.MERGE_FROM_AND_GET);
SerializableSupplier<BiFunction<Oaf, Oaf, Oaf>> fn =
functionFor(Strategy.MERGE_FROM_AND_GET);
// then
Oaf x = fn.get().apply(a, b);
@ -177,11 +176,11 @@ public class MergeAndGetTest {
Relation b = mock(Relation.class);
// when
SerializableSupplier<BiFunction<Oaf, Oaf, Oaf>> fn = functionFor(Strategy.SELECT_NEWER_AND_GET);
SerializableSupplier<BiFunction<Oaf, Oaf, Oaf>> fn =
functionFor(Strategy.SELECT_NEWER_AND_GET);
// then
assertThrows(RuntimeException.class, () ->
fn.get().apply(a, b));
assertThrows(RuntimeException.class, () -> fn.get().apply(a, b));
}
@Test
@ -191,11 +190,11 @@ public class MergeAndGetTest {
OafEntity b = mock(OafEntity.class);
// when
SerializableSupplier<BiFunction<Oaf, Oaf, Oaf>> fn = functionFor(Strategy.SELECT_NEWER_AND_GET);
SerializableSupplier<BiFunction<Oaf, Oaf, Oaf>> fn =
functionFor(Strategy.SELECT_NEWER_AND_GET);
// then
assertThrows(RuntimeException.class, () ->
fn.get().apply(a, b));
assertThrows(RuntimeException.class, () -> fn.get().apply(a, b));
}
@Test
@ -205,28 +204,29 @@ public class MergeAndGetTest {
Result b = mock(Result.class);
// when
SerializableSupplier<BiFunction<Oaf, Oaf, Oaf>> fn = functionFor(Strategy.SELECT_NEWER_AND_GET);
SerializableSupplier<BiFunction<Oaf, Oaf, Oaf>> fn =
functionFor(Strategy.SELECT_NEWER_AND_GET);
// then
assertThrows(RuntimeException.class, () ->
fn.get().apply(a, b));
assertThrows(RuntimeException.class, () -> fn.get().apply(a, b));
}
@Test
public void shouldThrowWhenSuperTypeIsNewerForResultAndOafEntity() {
// given
// real types must be used because subclass-superclass resolution does not work for mocks
// real types must be used because subclass-superclass resolution does not work for
// mocks
Dataset a = new Dataset();
a.setLastupdatetimestamp(1L);
Result b = new Result();
b.setLastupdatetimestamp(2L);
// when
SerializableSupplier<BiFunction<Oaf, Oaf, Oaf>> fn = functionFor(Strategy.SELECT_NEWER_AND_GET);
SerializableSupplier<BiFunction<Oaf, Oaf, Oaf>> fn =
functionFor(Strategy.SELECT_NEWER_AND_GET);
// then
assertThrows(RuntimeException.class, () ->
fn.get().apply(a, b));
assertThrows(RuntimeException.class, () -> fn.get().apply(a, b));
}
@Test

View File

@ -1,8 +1,20 @@
package eu.dnetlib.dhp.actionmanager.promote;
import static org.junit.jupiter.api.Assertions.*;
import static org.junit.jupiter.params.provider.Arguments.arguments;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.schema.common.ModelSupport;
import eu.dnetlib.dhp.schema.oaf.*;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.Comparator;
import java.util.List;
import java.util.Objects;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.apache.commons.io.FileUtils;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.function.MapFunction;
@ -14,21 +26,9 @@ import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.Arguments;
import org.junit.jupiter.params.provider.MethodSource;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.Comparator;
import java.util.List;
import java.util.Objects;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import static org.junit.jupiter.api.Assertions.*;
import static org.junit.jupiter.params.provider.Arguments.arguments;
public class PromoteActionPayloadForGraphTableJobTest {
private static final ClassLoader cl = PromoteActionPayloadForGraphTableJobTest.class.getClassLoader();
private static final ClassLoader cl =
PromoteActionPayloadForGraphTableJobTest.class.getClassLoader();
private static SparkSession spark;
@ -52,7 +52,9 @@ public class PromoteActionPayloadForGraphTableJobTest {
@BeforeEach
public void beforeEach() throws IOException {
workingDir = Files.createTempDirectory(PromoteActionPayloadForGraphTableJobTest.class.getSimpleName());
workingDir =
Files.createTempDirectory(
PromoteActionPayloadForGraphTableJobTest.class.getSimpleName());
inputDir = workingDir.resolve("input");
inputGraphRootDir = inputDir.resolve("graph");
inputActionPayloadRootDir = inputDir.resolve("action_payload");
@ -80,87 +82,130 @@ public class PromoteActionPayloadForGraphTableJobTest {
Class<OafEntity> actionPayloadClazz = OafEntity.class;
// when
RuntimeException exception = assertThrows(RuntimeException.class, () ->
PromoteActionPayloadForGraphTableJob.main(new String[]{
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-inputGraphTablePath", "",
"-graphTableClassName", rowClazz.getCanonicalName(),
"-inputActionPayloadPath", "",
"-actionPayloadClassName", actionPayloadClazz.getCanonicalName(),
"-outputGraphTablePath", "",
"-mergeAndGetStrategy", MergeAndGet.Strategy.SELECT_NEWER_AND_GET.name()
}));
RuntimeException exception =
assertThrows(
RuntimeException.class,
() ->
PromoteActionPayloadForGraphTableJob.main(
new String[] {
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-inputGraphTablePath", "",
"-graphTableClassName", rowClazz.getCanonicalName(),
"-inputActionPayloadPath", "",
"-actionPayloadClassName",
actionPayloadClazz.getCanonicalName(),
"-outputGraphTablePath", "",
"-mergeAndGetStrategy",
MergeAndGet.Strategy.SELECT_NEWER_AND_GET
.name()
}));
// then
String msg = String.format("graph table class is not a subclass of action payload class: graph=%s, action=%s",
rowClazz.getCanonicalName(), actionPayloadClazz.getCanonicalName());
String msg =
String.format(
"graph table class is not a subclass of action payload class: graph=%s, action=%s",
rowClazz.getCanonicalName(), actionPayloadClazz.getCanonicalName());
assertTrue(exception.getMessage().contains(msg));
}
@ParameterizedTest(name = "strategy: {0}, graph table: {1}, action payload: {2}")
@MethodSource("eu.dnetlib.dhp.actionmanager.promote.PromoteActionPayloadForGraphTableJobTest#promoteJobTestParams")
public void shouldPromoteActionPayloadForGraphTable(MergeAndGet.Strategy strategy,
Class<? extends Oaf> rowClazz,
Class<? extends Oaf> actionPayloadClazz) throws Exception {
@MethodSource(
"eu.dnetlib.dhp.actionmanager.promote.PromoteActionPayloadForGraphTableJobTest#promoteJobTestParams")
public void shouldPromoteActionPayloadForGraphTable(
MergeAndGet.Strategy strategy,
Class<? extends Oaf> rowClazz,
Class<? extends Oaf> actionPayloadClazz)
throws Exception {
// given
Path inputGraphTableDir = createGraphTable(inputGraphRootDir, rowClazz);
Path inputActionPayloadDir = createActionPayload(inputActionPayloadRootDir, rowClazz, actionPayloadClazz);
Path outputGraphTableDir = outputDir.resolve("graph").resolve(rowClazz.getSimpleName().toLowerCase());
Path inputActionPayloadDir =
createActionPayload(inputActionPayloadRootDir, rowClazz, actionPayloadClazz);
Path outputGraphTableDir =
outputDir.resolve("graph").resolve(rowClazz.getSimpleName().toLowerCase());
// when
PromoteActionPayloadForGraphTableJob.main(new String[]{
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-inputGraphTablePath", inputGraphTableDir.toString(),
"-graphTableClassName", rowClazz.getCanonicalName(),
"-inputActionPayloadPath", inputActionPayloadDir.toString(),
"-actionPayloadClassName", actionPayloadClazz.getCanonicalName(),
"-outputGraphTablePath", outputGraphTableDir.toString(),
"-mergeAndGetStrategy", strategy.name()
});
PromoteActionPayloadForGraphTableJob.main(
new String[] {
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-inputGraphTablePath", inputGraphTableDir.toString(),
"-graphTableClassName", rowClazz.getCanonicalName(),
"-inputActionPayloadPath", inputActionPayloadDir.toString(),
"-actionPayloadClassName", actionPayloadClazz.getCanonicalName(),
"-outputGraphTablePath", outputGraphTableDir.toString(),
"-mergeAndGetStrategy", strategy.name()
});
// then
assertTrue(Files.exists(outputGraphTableDir));
List<? extends Oaf> actualOutputRows = readGraphTableFromJobOutput(outputGraphTableDir.toString(), rowClazz)
.collectAsList()
.stream()
.sorted(Comparator.comparingInt(Object::hashCode))
.collect(Collectors.toList());
String expectedOutputGraphTableJsonDumpPath = resultFileLocation(strategy, rowClazz, actionPayloadClazz);
Path expectedOutputGraphTableJsonDumpFile = Paths
.get(Objects.requireNonNull(cl.getResource(expectedOutputGraphTableJsonDumpPath)).getFile());
List<? extends Oaf> expectedOutputRows = readGraphTableFromJsonDump(expectedOutputGraphTableJsonDumpFile.toString(), rowClazz)
.collectAsList()
.stream()
.sorted(Comparator.comparingInt(Object::hashCode))
.collect(Collectors.toList());
List<? extends Oaf> actualOutputRows =
readGraphTableFromJobOutput(outputGraphTableDir.toString(), rowClazz)
.collectAsList().stream()
.sorted(Comparator.comparingInt(Object::hashCode))
.collect(Collectors.toList());
String expectedOutputGraphTableJsonDumpPath =
resultFileLocation(strategy, rowClazz, actionPayloadClazz);
Path expectedOutputGraphTableJsonDumpFile =
Paths.get(
Objects.requireNonNull(
cl.getResource(expectedOutputGraphTableJsonDumpPath))
.getFile());
List<? extends Oaf> expectedOutputRows =
readGraphTableFromJsonDump(
expectedOutputGraphTableJsonDumpFile.toString(), rowClazz)
.collectAsList().stream()
.sorted(Comparator.comparingInt(Object::hashCode))
.collect(Collectors.toList());
assertIterableEquals(expectedOutputRows, actualOutputRows);
}
}
public static Stream<Arguments> promoteJobTestParams() {
return Stream.of(
arguments(MergeAndGet.Strategy.MERGE_FROM_AND_GET, eu.dnetlib.dhp.schema.oaf.Dataset.class, eu.dnetlib.dhp.schema.oaf.Dataset.class),
arguments(MergeAndGet.Strategy.MERGE_FROM_AND_GET, eu.dnetlib.dhp.schema.oaf.Dataset.class, eu.dnetlib.dhp.schema.oaf.Result.class),
arguments(MergeAndGet.Strategy.MERGE_FROM_AND_GET, Datasource.class, Datasource.class),
arguments(MergeAndGet.Strategy.MERGE_FROM_AND_GET, Organization.class, Organization.class),
arguments(MergeAndGet.Strategy.MERGE_FROM_AND_GET, OtherResearchProduct.class, OtherResearchProduct.class),
arguments(MergeAndGet.Strategy.MERGE_FROM_AND_GET, OtherResearchProduct.class, Result.class),
arguments(
MergeAndGet.Strategy.MERGE_FROM_AND_GET,
eu.dnetlib.dhp.schema.oaf.Dataset.class,
eu.dnetlib.dhp.schema.oaf.Dataset.class),
arguments(
MergeAndGet.Strategy.MERGE_FROM_AND_GET,
eu.dnetlib.dhp.schema.oaf.Dataset.class,
eu.dnetlib.dhp.schema.oaf.Result.class),
arguments(
MergeAndGet.Strategy.MERGE_FROM_AND_GET,
Datasource.class,
Datasource.class),
arguments(
MergeAndGet.Strategy.MERGE_FROM_AND_GET,
Organization.class,
Organization.class),
arguments(
MergeAndGet.Strategy.MERGE_FROM_AND_GET,
OtherResearchProduct.class,
OtherResearchProduct.class),
arguments(
MergeAndGet.Strategy.MERGE_FROM_AND_GET,
OtherResearchProduct.class,
Result.class),
arguments(MergeAndGet.Strategy.MERGE_FROM_AND_GET, Project.class, Project.class),
arguments(MergeAndGet.Strategy.MERGE_FROM_AND_GET, Publication.class, Publication.class),
arguments(
MergeAndGet.Strategy.MERGE_FROM_AND_GET,
Publication.class,
Publication.class),
arguments(MergeAndGet.Strategy.MERGE_FROM_AND_GET, Publication.class, Result.class),
arguments(MergeAndGet.Strategy.MERGE_FROM_AND_GET, Relation.class, Relation.class),
arguments(MergeAndGet.Strategy.MERGE_FROM_AND_GET, Software.class, Software.class),
arguments(MergeAndGet.Strategy.MERGE_FROM_AND_GET, Software.class, Result.class)
);
arguments(MergeAndGet.Strategy.MERGE_FROM_AND_GET, Software.class, Result.class));
}
private static <G extends Oaf> Path createGraphTable(Path inputGraphRootDir,
Class<G> rowClazz) {
private static <G extends Oaf> Path createGraphTable(
Path inputGraphRootDir, Class<G> rowClazz) {
String inputGraphTableJsonDumpPath = inputGraphTableJsonDumpLocation(rowClazz);
Path inputGraphTableJsonDumpFile = Paths
.get(Objects.requireNonNull(cl.getResource(inputGraphTableJsonDumpPath)).getFile());
Dataset<G> rowDS = readGraphTableFromJsonDump(inputGraphTableJsonDumpFile.toString(), rowClazz);
Path inputGraphTableJsonDumpFile =
Paths.get(
Objects.requireNonNull(cl.getResource(inputGraphTableJsonDumpPath))
.getFile());
Dataset<G> rowDS =
readGraphTableFromJsonDump(inputGraphTableJsonDumpFile.toString(), rowClazz);
String inputGraphTableName = rowClazz.getSimpleName().toLowerCase();
Path inputGraphTableDir = inputGraphRootDir.resolve(inputGraphTableName);
writeGraphTableAaJobInput(rowDS, inputGraphTableDir.toString());
@ -169,71 +214,74 @@ public class PromoteActionPayloadForGraphTableJobTest {
private static String inputGraphTableJsonDumpLocation(Class<? extends Oaf> rowClazz) {
return String.format(
"%s/%s.json", "eu/dnetlib/dhp/actionmanager/promote/input/graph", rowClazz.getSimpleName().toLowerCase());
"%s/%s.json",
"eu/dnetlib/dhp/actionmanager/promote/input/graph",
rowClazz.getSimpleName().toLowerCase());
}
private static <G extends Oaf> Dataset<G> readGraphTableFromJsonDump(String path,
Class<G> rowClazz) {
return spark
.read()
private static <G extends Oaf> Dataset<G> readGraphTableFromJsonDump(
String path, Class<G> rowClazz) {
return spark.read()
.textFile(path)
.map((MapFunction<String, G>) json -> OBJECT_MAPPER.readValue(json, rowClazz), Encoders.bean(rowClazz));
.map(
(MapFunction<String, G>) json -> OBJECT_MAPPER.readValue(json, rowClazz),
Encoders.bean(rowClazz));
}
private static <G extends Oaf> void writeGraphTableAaJobInput(Dataset<G> rowDS,
String path) {
rowDS
.write()
.option("compression", "gzip")
.json(path);
private static <G extends Oaf> void writeGraphTableAaJobInput(Dataset<G> rowDS, String path) {
rowDS.write().option("compression", "gzip").json(path);
}
private static <G extends Oaf, A extends Oaf> Path createActionPayload(Path inputActionPayloadRootDir,
Class<G> rowClazz,
Class<A> actionPayloadClazz) {
String inputActionPayloadJsonDumpPath = inputActionPayloadJsonDumpLocation(rowClazz, actionPayloadClazz);
Path inputActionPayloadJsonDumpFile = Paths
.get(Objects.requireNonNull(cl.getResource(inputActionPayloadJsonDumpPath)).getFile());
Dataset<String> actionPayloadDS = readActionPayloadFromJsonDump(inputActionPayloadJsonDumpFile.toString());
Path inputActionPayloadDir = inputActionPayloadRootDir.resolve(actionPayloadClazz.getSimpleName().toLowerCase());
private static <G extends Oaf, A extends Oaf> Path createActionPayload(
Path inputActionPayloadRootDir, Class<G> rowClazz, Class<A> actionPayloadClazz) {
String inputActionPayloadJsonDumpPath =
inputActionPayloadJsonDumpLocation(rowClazz, actionPayloadClazz);
Path inputActionPayloadJsonDumpFile =
Paths.get(
Objects.requireNonNull(cl.getResource(inputActionPayloadJsonDumpPath))
.getFile());
Dataset<String> actionPayloadDS =
readActionPayloadFromJsonDump(inputActionPayloadJsonDumpFile.toString());
Path inputActionPayloadDir =
inputActionPayloadRootDir.resolve(actionPayloadClazz.getSimpleName().toLowerCase());
writeActionPayloadAsJobInput(actionPayloadDS, inputActionPayloadDir.toString());
return inputActionPayloadDir;
}
private static String inputActionPayloadJsonDumpLocation(Class<? extends Oaf> rowClazz,
Class<? extends Oaf> actionPayloadClazz) {
private static String inputActionPayloadJsonDumpLocation(
Class<? extends Oaf> rowClazz, Class<? extends Oaf> actionPayloadClazz) {
return String.format("eu/dnetlib/dhp/actionmanager/promote/input/action_payload/%s_table/%s.json",
rowClazz.getSimpleName().toLowerCase(), actionPayloadClazz.getSimpleName().toLowerCase());
return String.format(
"eu/dnetlib/dhp/actionmanager/promote/input/action_payload/%s_table/%s.json",
rowClazz.getSimpleName().toLowerCase(),
actionPayloadClazz.getSimpleName().toLowerCase());
}
private static Dataset<String> readActionPayloadFromJsonDump(String path) {
return spark
.read()
.textFile(path);
return spark.read().textFile(path);
}
private static void writeActionPayloadAsJobInput(Dataset<String> actionPayloadDS,
String path) {
actionPayloadDS
.withColumnRenamed("value", "payload")
.write()
.parquet(path);
private static void writeActionPayloadAsJobInput(Dataset<String> actionPayloadDS, String path) {
actionPayloadDS.withColumnRenamed("value", "payload").write().parquet(path);
}
private static <G extends Oaf> Dataset<G> readGraphTableFromJobOutput(String path,
Class<G> rowClazz) {
return spark
.read()
private static <G extends Oaf> Dataset<G> readGraphTableFromJobOutput(
String path, Class<G> rowClazz) {
return spark.read()
.textFile(path)
.map((MapFunction<String, G>) json -> OBJECT_MAPPER.readValue(json, rowClazz), Encoders.bean(rowClazz));
.map(
(MapFunction<String, G>) json -> OBJECT_MAPPER.readValue(json, rowClazz),
Encoders.bean(rowClazz));
}
private static String resultFileLocation(MergeAndGet.Strategy strategy,
Class<? extends Oaf> rowClazz,
Class<? extends Oaf> actionPayloadClazz) {
return String
.format("eu/dnetlib/dhp/actionmanager/promote/output/graph/%s/%s/%s_action_payload/result.json",
strategy.name().toLowerCase(), rowClazz.getSimpleName().toLowerCase(), actionPayloadClazz.getSimpleName().toLowerCase());
private static String resultFileLocation(
MergeAndGet.Strategy strategy,
Class<? extends Oaf> rowClazz,
Class<? extends Oaf> actionPayloadClazz) {
return String.format(
"eu/dnetlib/dhp/actionmanager/promote/output/graph/%s/%s/%s_action_payload/result.json",
strategy.name().toLowerCase(),
rowClazz.getSimpleName().toLowerCase(),
actionPayloadClazz.getSimpleName().toLowerCase());
}
}

View File

@ -1,7 +1,15 @@
package eu.dnetlib.dhp.actionmanager.promote;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertThrows;
import eu.dnetlib.dhp.common.FunctionalInterfaceSupport.SerializableSupplier;
import eu.dnetlib.dhp.schema.oaf.Oaf;
import java.util.Arrays;
import java.util.List;
import java.util.Objects;
import java.util.function.BiFunction;
import java.util.function.Function;
import org.apache.spark.SparkConf;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Encoders;
@ -11,15 +19,6 @@ import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Nested;
import org.junit.jupiter.api.Test;
import java.util.Arrays;
import java.util.List;
import java.util.Objects;
import java.util.function.BiFunction;
import java.util.function.Function;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertThrows;
public class PromoteActionPayloadFunctionsTest {
private static SparkSession spark;
@ -44,20 +43,20 @@ public class PromoteActionPayloadFunctionsTest {
@Test
public void shouldThrowWhenTableTypeIsNotSubtypeOfActionPayloadType() {
// given
class OafImpl extends Oaf {
}
class OafImpl extends Oaf {}
// when
assertThrows(RuntimeException.class, () ->
PromoteActionPayloadFunctions
.joinGraphTableWithActionPayloadAndMerge(null,
assertThrows(
RuntimeException.class,
() ->
PromoteActionPayloadFunctions.joinGraphTableWithActionPayloadAndMerge(
null,
null,
null,
null,
null,
OafImplSubSub.class,
OafImpl.class
));
OafImpl.class));
}
@Test
@ -68,40 +67,53 @@ public class PromoteActionPayloadFunctionsTest {
String id2 = "id2";
String id3 = "id3";
String id4 = "id4";
List<OafImplSubSub> rowData = Arrays.asList(
createOafImplSubSub(id0),
createOafImplSubSub(id1),
createOafImplSubSub(id2),
createOafImplSubSub(id3)
);
Dataset<OafImplSubSub> rowDS = spark.createDataset(rowData, Encoders.bean(OafImplSubSub.class));
List<OafImplSubSub> rowData =
Arrays.asList(
createOafImplSubSub(id0),
createOafImplSubSub(id1),
createOafImplSubSub(id2),
createOafImplSubSub(id3));
Dataset<OafImplSubSub> rowDS =
spark.createDataset(rowData, Encoders.bean(OafImplSubSub.class));
List<OafImplSubSub> actionPayloadData = Arrays.asList(
createOafImplSubSub(id1),
createOafImplSubSub(id2), createOafImplSubSub(id2),
createOafImplSubSub(id3), createOafImplSubSub(id3), createOafImplSubSub(id3),
createOafImplSubSub(id4), createOafImplSubSub(id4), createOafImplSubSub(id4), createOafImplSubSub(id4)
);
Dataset<OafImplSubSub> actionPayloadDS = spark.createDataset(actionPayloadData, Encoders.bean(OafImplSubSub.class));
List<OafImplSubSub> actionPayloadData =
Arrays.asList(
createOafImplSubSub(id1),
createOafImplSubSub(id2),
createOafImplSubSub(id2),
createOafImplSubSub(id3),
createOafImplSubSub(id3),
createOafImplSubSub(id3),
createOafImplSubSub(id4),
createOafImplSubSub(id4),
createOafImplSubSub(id4),
createOafImplSubSub(id4));
Dataset<OafImplSubSub> actionPayloadDS =
spark.createDataset(actionPayloadData, Encoders.bean(OafImplSubSub.class));
SerializableSupplier<Function<OafImplSubSub, String>> rowIdFn = () -> OafImplRoot::getId;
SerializableSupplier<Function<OafImplSubSub, String>> actionPayloadIdFn = () -> OafImplRoot::getId;
SerializableSupplier<BiFunction<OafImplSubSub, OafImplSubSub, OafImplSubSub>> mergeAndGetFn = () -> (x, y) -> {
x.merge(y);
return x;
};
SerializableSupplier<Function<OafImplSubSub, String>> rowIdFn =
() -> OafImplRoot::getId;
SerializableSupplier<Function<OafImplSubSub, String>> actionPayloadIdFn =
() -> OafImplRoot::getId;
SerializableSupplier<BiFunction<OafImplSubSub, OafImplSubSub, OafImplSubSub>>
mergeAndGetFn =
() ->
(x, y) -> {
x.merge(y);
return x;
};
// when
List<OafImplSubSub> results = PromoteActionPayloadFunctions
.joinGraphTableWithActionPayloadAndMerge(rowDS,
actionPayloadDS,
rowIdFn,
actionPayloadIdFn,
mergeAndGetFn,
OafImplSubSub.class,
OafImplSubSub.class
)
.collectAsList();
List<OafImplSubSub> results =
PromoteActionPayloadFunctions.joinGraphTableWithActionPayloadAndMerge(
rowDS,
actionPayloadDS,
rowIdFn,
actionPayloadIdFn,
mergeAndGetFn,
OafImplSubSub.class,
OafImplSubSub.class)
.collectAsList();
// then
assertEquals(11, results.size());
@ -111,23 +123,24 @@ public class PromoteActionPayloadFunctionsTest {
assertEquals(3, results.stream().filter(x -> x.getId().equals(id3)).count());
assertEquals(4, results.stream().filter(x -> x.getId().equals(id4)).count());
results.forEach(result -> {
switch (result.getId()) {
case "id0":
assertEquals(1, result.getMerged());
break;
case "id1":
case "id2":
case "id3":
assertEquals(2, result.getMerged());
break;
case "id4":
assertEquals(1, result.getMerged());
break;
default:
throw new RuntimeException();
}
});
results.forEach(
result -> {
switch (result.getId()) {
case "id0":
assertEquals(1, result.getMerged());
break;
case "id1":
case "id2":
case "id3":
assertEquals(2, result.getMerged());
break;
case "id4":
assertEquals(1, result.getMerged());
break;
default:
throw new RuntimeException();
}
});
}
@Test
@ -138,40 +151,53 @@ public class PromoteActionPayloadFunctionsTest {
String id2 = "id2";
String id3 = "id3";
String id4 = "id4";
List<OafImplSubSub> rowData = Arrays.asList(
createOafImplSubSub(id0),
createOafImplSubSub(id1),
createOafImplSubSub(id2),
createOafImplSubSub(id3)
);
Dataset<OafImplSubSub> rowDS = spark.createDataset(rowData, Encoders.bean(OafImplSubSub.class));
List<OafImplSubSub> rowData =
Arrays.asList(
createOafImplSubSub(id0),
createOafImplSubSub(id1),
createOafImplSubSub(id2),
createOafImplSubSub(id3));
Dataset<OafImplSubSub> rowDS =
spark.createDataset(rowData, Encoders.bean(OafImplSubSub.class));
List<OafImplSub> actionPayloadData = Arrays.asList(
createOafImplSub(id1),
createOafImplSub(id2), createOafImplSub(id2),
createOafImplSub(id3), createOafImplSub(id3), createOafImplSub(id3),
createOafImplSub(id4), createOafImplSub(id4), createOafImplSub(id4), createOafImplSub(id4)
);
Dataset<OafImplSub> actionPayloadDS = spark.createDataset(actionPayloadData, Encoders.bean(OafImplSub.class));
List<OafImplSub> actionPayloadData =
Arrays.asList(
createOafImplSub(id1),
createOafImplSub(id2),
createOafImplSub(id2),
createOafImplSub(id3),
createOafImplSub(id3),
createOafImplSub(id3),
createOafImplSub(id4),
createOafImplSub(id4),
createOafImplSub(id4),
createOafImplSub(id4));
Dataset<OafImplSub> actionPayloadDS =
spark.createDataset(actionPayloadData, Encoders.bean(OafImplSub.class));
SerializableSupplier<Function<OafImplSubSub, String>> rowIdFn = () -> OafImplRoot::getId;
SerializableSupplier<Function<OafImplSub, String>> actionPayloadIdFn = () -> OafImplRoot::getId;
SerializableSupplier<BiFunction<OafImplSubSub, OafImplSub, OafImplSubSub>> mergeAndGetFn = () -> (x, y) -> {
x.merge(y);
return x;
};
SerializableSupplier<Function<OafImplSubSub, String>> rowIdFn =
() -> OafImplRoot::getId;
SerializableSupplier<Function<OafImplSub, String>> actionPayloadIdFn =
() -> OafImplRoot::getId;
SerializableSupplier<BiFunction<OafImplSubSub, OafImplSub, OafImplSubSub>>
mergeAndGetFn =
() ->
(x, y) -> {
x.merge(y);
return x;
};
// when
List<OafImplSubSub> results = PromoteActionPayloadFunctions
.joinGraphTableWithActionPayloadAndMerge(rowDS,
actionPayloadDS,
rowIdFn,
actionPayloadIdFn,
mergeAndGetFn,
OafImplSubSub.class,
OafImplSub.class
)
.collectAsList();
List<OafImplSubSub> results =
PromoteActionPayloadFunctions.joinGraphTableWithActionPayloadAndMerge(
rowDS,
actionPayloadDS,
rowIdFn,
actionPayloadIdFn,
mergeAndGetFn,
OafImplSubSub.class,
OafImplSub.class)
.collectAsList();
// then
assertEquals(7, results.size());
@ -181,22 +207,22 @@ public class PromoteActionPayloadFunctionsTest {
assertEquals(3, results.stream().filter(x -> x.getId().equals(id3)).count());
assertEquals(0, results.stream().filter(x -> x.getId().equals(id4)).count());
results.forEach(result -> {
switch (result.getId()) {
case "id0":
assertEquals(1, result.getMerged());
break;
case "id1":
case "id2":
case "id3":
assertEquals(2, result.getMerged());
break;
default:
throw new RuntimeException();
}
});
results.forEach(
result -> {
switch (result.getId()) {
case "id0":
assertEquals(1, result.getMerged());
break;
case "id1":
case "id2":
case "id3":
assertEquals(2, result.getMerged());
break;
default:
throw new RuntimeException();
}
});
}
}
@Nested
@ -208,30 +234,40 @@ public class PromoteActionPayloadFunctionsTest {
String id1 = "id1";
String id2 = "id2";
String id3 = "id3";
List<OafImplSubSub> rowData = Arrays.asList(
createOafImplSubSub(id1),
createOafImplSubSub(id2), createOafImplSubSub(id2),
createOafImplSubSub(id3), createOafImplSubSub(id3), createOafImplSubSub(id3)
);
Dataset<OafImplSubSub> rowDS = spark.createDataset(rowData, Encoders.bean(OafImplSubSub.class));
List<OafImplSubSub> rowData =
Arrays.asList(
createOafImplSubSub(id1),
createOafImplSubSub(id2),
createOafImplSubSub(id2),
createOafImplSubSub(id3),
createOafImplSubSub(id3),
createOafImplSubSub(id3));
Dataset<OafImplSubSub> rowDS =
spark.createDataset(rowData, Encoders.bean(OafImplSubSub.class));
SerializableSupplier<Function<OafImplSubSub, String>> rowIdFn = () -> OafImplRoot::getId;
SerializableSupplier<BiFunction<OafImplSubSub, OafImplSubSub, OafImplSubSub>> mergeAndGetFn = () -> (x, y) -> {
x.merge(y);
return x;
};
SerializableSupplier<Function<OafImplSubSub, String>> rowIdFn =
() -> OafImplRoot::getId;
SerializableSupplier<BiFunction<OafImplSubSub, OafImplSubSub, OafImplSubSub>>
mergeAndGetFn =
() ->
(x, y) -> {
x.merge(y);
return x;
};
SerializableSupplier<OafImplSubSub> zeroFn = OafImplSubSub::new;
SerializableSupplier<Function<OafImplSubSub, Boolean>> isNotZeroFn = () -> x -> Objects.nonNull(x.getId());
SerializableSupplier<Function<OafImplSubSub, Boolean>> isNotZeroFn =
() -> x -> Objects.nonNull(x.getId());
// when
List<OafImplSubSub> results = PromoteActionPayloadFunctions
.groupGraphTableByIdAndMerge(rowDS,
rowIdFn,
mergeAndGetFn,
zeroFn,
isNotZeroFn,
OafImplSubSub.class)
.collectAsList();
List<OafImplSubSub> results =
PromoteActionPayloadFunctions.groupGraphTableByIdAndMerge(
rowDS,
rowIdFn,
mergeAndGetFn,
zeroFn,
isNotZeroFn,
OafImplSubSub.class)
.collectAsList();
// then
assertEquals(3, results.size());
@ -239,23 +275,23 @@ public class PromoteActionPayloadFunctionsTest {
assertEquals(1, results.stream().filter(x -> x.getId().equals(id2)).count());
assertEquals(1, results.stream().filter(x -> x.getId().equals(id3)).count());
results.forEach(result -> {
switch (result.getId()) {
case "id1":
assertEquals(1, result.getMerged());
break;
case "id2":
assertEquals(2, result.getMerged());
break;
case "id3":
assertEquals(3, result.getMerged());
break;
default:
throw new RuntimeException();
}
});
results.forEach(
result -> {
switch (result.getId()) {
case "id1":
assertEquals(1, result.getMerged());
break;
case "id2":
assertEquals(2, result.getMerged());
break;
case "id3":
assertEquals(3, result.getMerged());
break;
default:
throw new RuntimeException();
}
});
}
}
public static class OafImplRoot extends Oaf {
@ -310,5 +346,4 @@ public class PromoteActionPayloadFunctionsTest {
x.setId(id);
return x;
}
}

View File

@ -1,10 +1,10 @@
{"collectedFrom":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":0,"relClass":"provides","relType":"datasourceOrganization","source":"10|CRIS_UNS____::f66f1bd369679b5b077dcdf006089556","subRelType":"provision","target":"20|openaire____::d0bbea1f5bed5864d1904eb602e608a6"}
{"collectedFrom":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":0,"relClass":"provides","relType":"datasourceOrganization","source":"10|OpenstarTs__::f66f1bd369679b5b077dcdf006089556","subRelType":"provision","target":"20|openaire____::fc7459b8fed8c0d47947fe04275251c0"}
{"collectedFrom":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":0,"relClass":"provides","relType":"datasourceOrganization","source":"10|NARCIS__cris::f66f1bd369679b5b077dcdf006089556","subRelType":"provision","target":"20|openaire____::c978e29d3b2ddf4f0c2b6e60d6613426"}
{"collectedFrom":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":0,"relClass":"provides","relType":"datasourceOrganization","source":"10|MetisRadboud::f66f1bd369679b5b077dcdf006089556","subRelType":"provision","target":"20|openaire____::b58bdbe8ae5acead04fc76777d2f8017"}
{"collectedFrom":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":true,"invisible":false,"provenanceaction":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":0,"relClass":"provides","relType":"datasourceOrganization","source":"10|MetisRadboud::f66f1bd369679b5b077dcdf006089556","subRelType":"provision","target":"20|dedup_wf_001::8de0f5a712997aafe0d794a53e51b75a"}
{"collectedFrom":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":0,"relClass":"provides","relType":"datasourceOrganization","source":"10|UnityFVG____::f66f1bd369679b5b077dcdf006089556","subRelType":"provision","target":"20|openaire____::89bab7c5a227fc27b2b9cadf475a6b71"}
{"collectedFrom":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":0,"relClass":"provides","relType":"datasourceOrganization","source":"10|4ScienceCRIS::f66f1bd369679b5b077dcdf006089556","subRelType":"provision","target":"20|openaire____::007a4870b31056f89b768cf508e1538e"}
{"collectedFrom":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":0,"relClass":"provides","relType":"datasourceOrganization","source":"10|VTTRsInSsCrs::f66f1bd369679b5b077dcdf006089556","subRelType":"provision","target":"20|openaire____::735915884eb439d42953372eaf934782"}
{"collectedFrom":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":true,"invisible":false,"provenanceaction":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":0,"relClass":"provides","relType":"datasourceOrganization","source":"10|CRIS_UNS____::f66f1bd369679b5b077dcdf006089556","subRelType":"provision","target":"20|dedup_wf_001::9ea9c0996c87e1dc7fc69f94b5ed0010"}
{"collectedFrom":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":0,"relClass":"provides","relType":"datasourceOrganization","source":"10|CSC_________::a2b9ce8435390bcbfc05f3cae3948747","subRelType":"provision","target":"20|openaire____::c24a458004a31f9687089ea3d249de51"}
{"collectedfrom":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":0,"relClass":"provides","relType":"datasourceOrganization","source":"10|CRIS_UNS____::f66f1bd369679b5b077dcdf006089556","subRelType":"provision","target":"20|openaire____::d0bbea1f5bed5864d1904eb602e608a6"}
{"collectedfrom":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":0,"relClass":"provides","relType":"datasourceOrganization","source":"10|OpenstarTs__::f66f1bd369679b5b077dcdf006089556","subRelType":"provision","target":"20|openaire____::fc7459b8fed8c0d47947fe04275251c0"}
{"collectedfrom":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":0,"relClass":"provides","relType":"datasourceOrganization","source":"10|NARCIS__cris::f66f1bd369679b5b077dcdf006089556","subRelType":"provision","target":"20|openaire____::c978e29d3b2ddf4f0c2b6e60d6613426"}
{"collectedfrom":[],"dataInfo":{"deletedbyinference":true,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":0,"relClass":"provides","relType":"datasourceOrganization","source":"10|MetisRadboud::f66f1bd369679b5b077dcdf006089556","subRelType":"provision","target":"20|openaire____::b58bdbe8ae5acead04fc76777d2f8017"}
{"collectedfrom":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":true,"invisible":false,"provenanceaction":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":0,"relClass":"provides","relType":"datasourceOrganization","source":"10|MetisRadboud::f66f1bd369679b5b077dcdf006089556","subRelType":"provision","target":"20|dedup_wf_001::8de0f5a712997aafe0d794a53e51b75a"}
{"collectedfrom":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":0,"relClass":"provides","relType":"datasourceOrganization","source":"10|UnityFVG____::f66f1bd369679b5b077dcdf006089556","subRelType":"provision","target":"20|openaire____::89bab7c5a227fc27b2b9cadf475a6b71"}
{"collectedfrom":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":0,"relClass":"provides","relType":"datasourceOrganization","source":"10|4ScienceCRIS::f66f1bd369679b5b077dcdf006089556","subRelType":"provision","target":"20|openaire____::007a4870b31056f89b768cf508e1538e"}
{"collectedfrom":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":0,"relClass":"provides","relType":"datasourceOrganization","source":"10|VTTRsInSsCrs::f66f1bd369679b5b077dcdf006089556","subRelType":"provision","target":"20|openaire____::735915884eb439d42953372eaf934782"}
{"collectedfrom":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":true,"invisible":false,"provenanceaction":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":0,"relClass":"provides","relType":"datasourceOrganization","source":"10|CRIS_UNS____::f66f1bd369679b5b077dcdf006089556","subRelType":"provision","target":"20|dedup_wf_001::9ea9c0996c87e1dc7fc69f94b5ed0010"}
{"collectedfrom":[],"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"UNKNOWN","classname":"UNKNOWN","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"},"lastupdatetimestamp":0,"relClass":"provides","relType":"datasourceOrganization","source":"10|CSC_________::a2b9ce8435390bcbfc05f3cae3948747","subRelType":"provision","target":"20|openaire____::c24a458004a31f9687089ea3d249de51"}

View File

@ -7,6 +7,11 @@ import eu.dnetlib.dhp.model.mdstore.Provenance;
import eu.dnetlib.message.Message;
import eu.dnetlib.message.MessageManager;
import eu.dnetlib.message.MessageType;
import java.io.ByteArrayInputStream;
import java.nio.charset.StandardCharsets;
import java.util.HashMap;
import java.util.Map;
import java.util.Objects;
import org.apache.commons.cli.*;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils;
@ -24,94 +29,135 @@ import org.dom4j.Document;
import org.dom4j.Node;
import org.dom4j.io.SAXReader;
import java.io.ByteArrayInputStream;
import java.nio.charset.StandardCharsets;
import java.util.HashMap;
import java.util.Map;
import java.util.Objects;
public class GenerateNativeStoreSparkJob {
public static MetadataRecord parseRecord(
final String input,
final String xpath,
final String encoding,
final Provenance provenance,
final Long dateOfCollection,
final LongAccumulator totalItems,
final LongAccumulator invalidRecords) {
public static MetadataRecord parseRecord (final String input, final String xpath, final String encoding, final Provenance provenance, final Long dateOfCollection, final LongAccumulator totalItems, final LongAccumulator invalidRecords) {
if(totalItems != null)
totalItems.add(1);
if (totalItems != null) totalItems.add(1);
try {
SAXReader reader = new SAXReader();
Document document = reader.read(new ByteArrayInputStream(input.getBytes(StandardCharsets.UTF_8)));
Document document =
reader.read(new ByteArrayInputStream(input.getBytes(StandardCharsets.UTF_8)));
Node node = document.selectSingleNode(xpath);
final String originalIdentifier = node.getText();
if (StringUtils.isBlank(originalIdentifier)) {
if (invalidRecords!= null)
invalidRecords.add(1);
if (invalidRecords != null) invalidRecords.add(1);
return null;
}
return new MetadataRecord(originalIdentifier, encoding, provenance, input, dateOfCollection);
return new MetadataRecord(
originalIdentifier, encoding, provenance, input, dateOfCollection);
} catch (Throwable e) {
if (invalidRecords!= null)
invalidRecords.add(1);
if (invalidRecords != null) invalidRecords.add(1);
e.printStackTrace();
return null;
}
}
public static void main(String[] args) throws Exception {
final ArgumentApplicationParser parser = new ArgumentApplicationParser(IOUtils.toString(GenerateNativeStoreSparkJob.class.getResourceAsStream("/eu/dnetlib/dhp/collection/collection_input_parameters.json")));
final ArgumentApplicationParser parser =
new ArgumentApplicationParser(
IOUtils.toString(
GenerateNativeStoreSparkJob.class.getResourceAsStream(
"/eu/dnetlib/dhp/collection/collection_input_parameters.json")));
parser.parseArgument(args);
final ObjectMapper jsonMapper = new ObjectMapper();
final Provenance provenance = jsonMapper.readValue(parser.get("provenance"), Provenance.class);
final long dateOfCollection = new Long(parser.get("dateOfCollection"));
final ObjectMapper jsonMapper = new ObjectMapper();
final Provenance provenance =
jsonMapper.readValue(parser.get("provenance"), Provenance.class);
final long dateOfCollection = new Long(parser.get("dateOfCollection"));
final SparkSession spark = SparkSession
.builder()
.appName("GenerateNativeStoreSparkJob")
.master(parser.get("master"))
.getOrCreate();
final SparkSession spark =
SparkSession.builder()
.appName("GenerateNativeStoreSparkJob")
.master(parser.get("master"))
.getOrCreate();
final Map<String, String> ongoingMap = new HashMap<>();
final Map<String, String> reportMap = new HashMap<>();
final boolean test = parser.get("isTest") == null ? false : Boolean.valueOf(parser.get("isTest"));
final boolean test =
parser.get("isTest") == null ? false : Boolean.valueOf(parser.get("isTest"));
final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext());
final JavaPairRDD<IntWritable, Text> inputRDD = sc.sequenceFile(parser.get("input"), IntWritable.class, Text.class);
final JavaPairRDD<IntWritable, Text> inputRDD =
sc.sequenceFile(parser.get("input"), IntWritable.class, Text.class);
final LongAccumulator totalItems = sc.sc().longAccumulator("TotalItems");
final LongAccumulator invalidRecords = sc.sc().longAccumulator("InvalidRecords");
final MessageManager manager = new MessageManager(parser.get("rabbitHost"), parser.get("rabbitUser"), parser.get("rabbitPassword"), false, false, null);
final MessageManager manager =
new MessageManager(
parser.get("rabbitHost"),
parser.get("rabbitUser"),
parser.get("rabbitPassword"),
false,
false,
null);
final JavaRDD<MetadataRecord> mappeRDD = inputRDD.map(item -> parseRecord(item._2().toString(), parser.get("xpath"), parser.get("encoding"), provenance, dateOfCollection, totalItems, invalidRecords))
.filter(Objects::nonNull).distinct();
final JavaRDD<MetadataRecord> mappeRDD =
inputRDD.map(
item ->
parseRecord(
item._2().toString(),
parser.get("xpath"),
parser.get("encoding"),
provenance,
dateOfCollection,
totalItems,
invalidRecords))
.filter(Objects::nonNull)
.distinct();
ongoingMap.put("ongoing", "0");
if (!test) {
manager.sendMessage(new Message(parser.get("workflowId"),"DataFrameCreation", MessageType.ONGOING, ongoingMap ), parser.get("rabbitOngoingQueue"), true, false);
manager.sendMessage(
new Message(
parser.get("workflowId"),
"DataFrameCreation",
MessageType.ONGOING,
ongoingMap),
parser.get("rabbitOngoingQueue"),
true,
false);
}
final Encoder<MetadataRecord> encoder = Encoders.bean(MetadataRecord.class);
final Dataset<MetadataRecord> mdstore = spark.createDataset(mappeRDD.rdd(), encoder);
final LongAccumulator mdStoreRecords = sc.sc().longAccumulator("MDStoreRecords");
mdStoreRecords.add(mdstore.count());
ongoingMap.put("ongoing", ""+ totalItems.value());
ongoingMap.put("ongoing", "" + totalItems.value());
if (!test) {
manager.sendMessage(new Message(parser.get("workflowId"), "DataFrameCreation", MessageType.ONGOING, ongoingMap), parser.get("rabbitOngoingQueue"), true, false);
manager.sendMessage(
new Message(
parser.get("workflowId"),
"DataFrameCreation",
MessageType.ONGOING,
ongoingMap),
parser.get("rabbitOngoingQueue"),
true,
false);
}
mdstore.write().format("parquet").save(parser.get("output"));
reportMap.put("inputItem" , ""+ totalItems.value());
reportMap.put("inputItem", "" + totalItems.value());
reportMap.put("invalidRecords", "" + invalidRecords.value());
reportMap.put("mdStoreSize", "" + mdStoreRecords.value());
if (!test) {
manager.sendMessage(new Message(parser.get("workflowId"), "Collection", MessageType.REPORT, reportMap), parser.get("rabbitReportQueue"), true, false);
manager.sendMessage(
new Message(
parser.get("workflowId"), "Collection", MessageType.REPORT, reportMap),
parser.get("rabbitReportQueue"),
true,
false);
manager.close();
}
}
}

View File

@ -2,10 +2,9 @@ package eu.dnetlib.dhp.collection.plugin;
import eu.dnetlib.collector.worker.model.ApiDescriptor;
import eu.dnetlib.dhp.collection.worker.DnetCollectorException;
import java.util.stream.Stream;
public interface CollectorPlugin {
Stream<String> collect(ApiDescriptor api) throws DnetCollectorException;
Stream<String> collect(ApiDescriptor api) throws DnetCollectorException;
}

View File

@ -1,5 +1,11 @@
package eu.dnetlib.dhp.collection.plugin.oai;
import com.google.common.base.Splitter;
import com.google.common.collect.Iterators;
import com.google.common.collect.Lists;
import eu.dnetlib.collector.worker.model.ApiDescriptor;
import eu.dnetlib.dhp.collection.plugin.CollectorPlugin;
import eu.dnetlib.dhp.collection.worker.DnetCollectorException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
@ -7,62 +13,70 @@ import java.util.Spliterator;
import java.util.Spliterators;
import java.util.stream.Stream;
import java.util.stream.StreamSupport;
import com.google.common.base.Splitter;
import com.google.common.collect.Iterators;
import com.google.common.collect.Lists;
import eu.dnetlib.collector.worker.model.ApiDescriptor;
import eu.dnetlib.dhp.collection.plugin.CollectorPlugin;
import eu.dnetlib.dhp.collection.worker.DnetCollectorException;
public class OaiCollectorPlugin implements CollectorPlugin {
private static final String FORMAT_PARAM = "format";
private static final String OAI_SET_PARAM = "set";
private static final Object OAI_FROM_DATE_PARAM = "fromDate";
private static final Object OAI_UNTIL_DATE_PARAM = "untilDate";
private static final String FORMAT_PARAM = "format";
private static final String OAI_SET_PARAM = "set";
private static final Object OAI_FROM_DATE_PARAM = "fromDate";
private static final Object OAI_UNTIL_DATE_PARAM = "untilDate";
private OaiIteratorFactory oaiIteratorFactory;
private OaiIteratorFactory oaiIteratorFactory;
@Override
public Stream<String> collect(final ApiDescriptor api) throws DnetCollectorException {
final String baseUrl = api.getBaseUrl();
final String mdFormat = api.getParams().get(FORMAT_PARAM);
final String setParam = api.getParams().get(OAI_SET_PARAM);
final String fromDate = api.getParams().get(OAI_FROM_DATE_PARAM);
final String untilDate = api.getParams().get(OAI_UNTIL_DATE_PARAM);
@Override
public Stream<String> collect(final ApiDescriptor api) throws DnetCollectorException {
final String baseUrl = api.getBaseUrl();
final String mdFormat = api.getParams().get(FORMAT_PARAM);
final String setParam = api.getParams().get(OAI_SET_PARAM);
final String fromDate = api.getParams().get(OAI_FROM_DATE_PARAM);
final String untilDate = api.getParams().get(OAI_UNTIL_DATE_PARAM);
final List<String> sets = new ArrayList<>();
if (setParam != null) {
sets.addAll(
Lists.newArrayList(
Splitter.on(",").omitEmptyStrings().trimResults().split(setParam)));
}
if (sets.isEmpty()) {
// If no set is defined, ALL the sets must be harvested
sets.add("");
}
final List<String> sets = new ArrayList<>();
if (setParam != null) {
sets.addAll(Lists.newArrayList(Splitter.on(",").omitEmptyStrings().trimResults().split(setParam)));
}
if (sets.isEmpty()) {
// If no set is defined, ALL the sets must be harvested
sets.add("");
}
if (baseUrl == null || baseUrl.isEmpty()) {
throw new DnetCollectorException("Param 'baseurl' is null or empty");
}
if (baseUrl == null || baseUrl.isEmpty()) { throw new DnetCollectorException("Param 'baseurl' is null or empty"); }
if (mdFormat == null || mdFormat.isEmpty()) {
throw new DnetCollectorException("Param 'mdFormat' is null or empty");
}
if (mdFormat == null || mdFormat.isEmpty()) { throw new DnetCollectorException("Param 'mdFormat' is null or empty"); }
if (fromDate != null && !fromDate.matches("\\d{4}-\\d{2}-\\d{2}")) {
throw new DnetCollectorException("Invalid date (YYYY-MM-DD): " + fromDate);
}
if (fromDate != null && !fromDate.matches("\\d{4}-\\d{2}-\\d{2}")) { throw new DnetCollectorException("Invalid date (YYYY-MM-DD): " + fromDate); }
if (untilDate != null && !untilDate.matches("\\d{4}-\\d{2}-\\d{2}")) {
throw new DnetCollectorException("Invalid date (YYYY-MM-DD): " + untilDate);
}
if (untilDate != null && !untilDate.matches("\\d{4}-\\d{2}-\\d{2}")) { throw new DnetCollectorException("Invalid date (YYYY-MM-DD): " + untilDate); }
final Iterator<Iterator<String>> iters =
sets.stream()
.map(
set ->
getOaiIteratorFactory()
.newIterator(
baseUrl, mdFormat, set, fromDate,
untilDate))
.iterator();
final Iterator<Iterator<String>> iters = sets.stream()
.map(set -> getOaiIteratorFactory().newIterator(baseUrl, mdFormat, set, fromDate, untilDate))
.iterator();
return StreamSupport.stream(
Spliterators.spliteratorUnknownSize(Iterators.concat(iters), Spliterator.ORDERED),
false);
}
return StreamSupport.stream(Spliterators.spliteratorUnknownSize(Iterators.concat(iters), Spliterator.ORDERED), false);
}
public OaiIteratorFactory getOaiIteratorFactory() {
if (oaiIteratorFactory == null){
oaiIteratorFactory = new OaiIteratorFactory();
}
return oaiIteratorFactory;
}
public OaiIteratorFactory getOaiIteratorFactory() {
if (oaiIteratorFactory == null) {
oaiIteratorFactory = new OaiIteratorFactory();
}
return oaiIteratorFactory;
}
}

View File

@ -1,15 +1,14 @@
package eu.dnetlib.dhp.collection.plugin.oai;
import eu.dnetlib.dhp.collection.worker.DnetCollectorException;
import eu.dnetlib.dhp.collection.worker.utils.HttpConnector;
import eu.dnetlib.dhp.collection.worker.utils.XmlCleaner;
import java.io.StringReader;
import java.io.UnsupportedEncodingException;
import java.net.URLEncoder;
import java.util.Iterator;
import java.util.Queue;
import java.util.concurrent.PriorityBlockingQueue;
import eu.dnetlib.dhp.collection.worker.DnetCollectorException;
import eu.dnetlib.dhp.collection.worker.utils.HttpConnector;
import eu.dnetlib.dhp.collection.worker.utils.XmlCleaner;
import org.apache.commons.lang.StringUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
@ -18,146 +17,163 @@ import org.dom4j.DocumentException;
import org.dom4j.Node;
import org.dom4j.io.SAXReader;
public class OaiIterator implements Iterator<String> {
private static final Log log = LogFactory.getLog(OaiIterator.class); // NOPMD by marko on 11/24/08 5:02 PM
private static final Log log =
LogFactory.getLog(OaiIterator.class); // NOPMD by marko on 11/24/08 5:02 PM
private final Queue<String> queue = new PriorityBlockingQueue<>();
private final SAXReader reader = new SAXReader();
private final Queue<String> queue = new PriorityBlockingQueue<>();
private final SAXReader reader = new SAXReader();
private final String baseUrl;
private final String set;
private final String mdFormat;
private final String fromDate;
private final String untilDate;
private String token;
private boolean started;
private final HttpConnector httpConnector;
private final String baseUrl;
private final String set;
private final String mdFormat;
private final String fromDate;
private final String untilDate;
private String token;
private boolean started;
private final HttpConnector httpConnector;
public OaiIterator(final String baseUrl, final String mdFormat, final String set, final String fromDate, final String untilDate,
final HttpConnector httpConnector) {
this.baseUrl = baseUrl;
this.mdFormat = mdFormat;
this.set = set;
this.fromDate = fromDate;
this.untilDate = untilDate;
this.started = false;
this.httpConnector = httpConnector;
}
public OaiIterator(
final String baseUrl,
final String mdFormat,
final String set,
final String fromDate,
final String untilDate,
final HttpConnector httpConnector) {
this.baseUrl = baseUrl;
this.mdFormat = mdFormat;
this.set = set;
this.fromDate = fromDate;
this.untilDate = untilDate;
this.started = false;
this.httpConnector = httpConnector;
}
private void verifyStarted() {
if (!this.started) {
this.started = true;
try {
this.token = firstPage();
} catch (final DnetCollectorException e) {
throw new RuntimeException(e);
}
}
}
private void verifyStarted() {
if (!this.started) {
this.started = true;
try {
this.token = firstPage();
} catch (final DnetCollectorException e) {
throw new RuntimeException(e);
}
}
}
@Override
public boolean hasNext() {
synchronized (queue) {
verifyStarted();
return !queue.isEmpty();
}
}
@Override
public boolean hasNext() {
synchronized (queue) {
verifyStarted();
return !queue.isEmpty();
}
}
@Override
public String next() {
synchronized (queue) {
verifyStarted();
final String res = queue.poll();
while (queue.isEmpty() && token != null && !token.isEmpty()) {
try {
token = otherPages(token);
} catch (final DnetCollectorException e) {
throw new RuntimeException(e);
}
}
return res;
}
}
@Override
public String next() {
synchronized (queue) {
verifyStarted();
final String res = queue.poll();
while (queue.isEmpty() && token != null && !token.isEmpty()) {
try {
token = otherPages(token);
} catch (final DnetCollectorException e) {
throw new RuntimeException(e);
}
}
return res;
}
}
@Override
public void remove() {}
@Override
public void remove() {}
private String firstPage() throws DnetCollectorException {
try {
String url = baseUrl + "?verb=ListRecords&metadataPrefix=" + URLEncoder.encode(mdFormat, "UTF-8");
if (set != null && !set.isEmpty()) {
url += "&set=" + URLEncoder.encode(set, "UTF-8");
}
if (fromDate != null && fromDate.matches("\\d{4}-\\d{2}-\\d{2}")) {
url += "&from=" + URLEncoder.encode(fromDate, "UTF-8");
}
if (untilDate != null && untilDate.matches("\\d{4}-\\d{2}-\\d{2}")) {
url += "&until=" + URLEncoder.encode(untilDate, "UTF-8");
}
log.info("Start harvesting using url: " + url);
private String firstPage() throws DnetCollectorException {
try {
String url =
baseUrl
+ "?verb=ListRecords&metadataPrefix="
+ URLEncoder.encode(mdFormat, "UTF-8");
if (set != null && !set.isEmpty()) {
url += "&set=" + URLEncoder.encode(set, "UTF-8");
}
if (fromDate != null && fromDate.matches("\\d{4}-\\d{2}-\\d{2}")) {
url += "&from=" + URLEncoder.encode(fromDate, "UTF-8");
}
if (untilDate != null && untilDate.matches("\\d{4}-\\d{2}-\\d{2}")) {
url += "&until=" + URLEncoder.encode(untilDate, "UTF-8");
}
log.info("Start harvesting using url: " + url);
return downloadPage(url);
} catch (final UnsupportedEncodingException e) {
throw new DnetCollectorException(e);
}
}
return downloadPage(url);
} catch (final UnsupportedEncodingException e) {
throw new DnetCollectorException(e);
}
}
private String extractResumptionToken(final String xml) {
private String extractResumptionToken(final String xml) {
final String s = StringUtils.substringAfter(xml, "<resumptionToken");
if (s == null) { return null; }
final String s = StringUtils.substringAfter(xml, "<resumptionToken");
if (s == null) {
return null;
}
final String result = StringUtils.substringBetween(s, ">", "</");
if (result == null) { return null; }
return result.trim();
final String result = StringUtils.substringBetween(s, ">", "</");
if (result == null) {
return null;
}
return result.trim();
}
}
private String otherPages(final String resumptionToken) throws DnetCollectorException {
try {
return downloadPage(
baseUrl
+ "?verb=ListRecords&resumptionToken="
+ URLEncoder.encode(resumptionToken, "UTF-8"));
} catch (final UnsupportedEncodingException e) {
throw new DnetCollectorException(e);
}
}
private String otherPages(final String resumptionToken) throws DnetCollectorException {
try {
return downloadPage(baseUrl + "?verb=ListRecords&resumptionToken=" + URLEncoder.encode(resumptionToken, "UTF-8"));
} catch (final UnsupportedEncodingException e) {
throw new DnetCollectorException(e);
}
}
private String downloadPage(final String url) throws DnetCollectorException {
private String downloadPage(final String url) throws DnetCollectorException {
final String xml = httpConnector.getInputSource(url);
Document doc;
try {
doc = reader.read(new StringReader(xml));
} catch (final DocumentException e) {
log.warn("Error parsing xml, I try to clean it: " + xml, e);
final String cleaned = XmlCleaner.cleanAllEntities(xml);
try {
doc = reader.read(new StringReader(cleaned));
} catch (final DocumentException e1) {
final String resumptionToken = extractResumptionToken(xml);
if (resumptionToken == null) {
throw new DnetCollectorException(
"Error parsing cleaned document:" + cleaned, e1);
}
return resumptionToken;
}
}
final String xml = httpConnector.getInputSource(url);
Document doc;
try {
doc = reader.read(new StringReader(xml));
} catch (final DocumentException e) {
log.warn("Error parsing xml, I try to clean it: " + xml, e);
final String cleaned = XmlCleaner.cleanAllEntities(xml);
try {
doc = reader.read(new StringReader(cleaned));
} catch (final DocumentException e1) {
final String resumptionToken = extractResumptionToken(xml);
if (resumptionToken == null) { throw new DnetCollectorException("Error parsing cleaned document:" + cleaned, e1); }
return resumptionToken;
}
}
final Node errorNode =
doc.selectSingleNode("/*[local-name()='OAI-PMH']/*[local-name()='error']");
if (errorNode != null) {
final String code = errorNode.valueOf("@code");
if ("noRecordsMatch".equalsIgnoreCase(code.trim())) {
log.warn("noRecordsMatch for oai call: " + url);
return null;
} else {
throw new DnetCollectorException(code + " - " + errorNode.getText());
}
}
final Node errorNode = doc.selectSingleNode("/*[local-name()='OAI-PMH']/*[local-name()='error']");
if (errorNode != null) {
final String code = errorNode.valueOf("@code");
if ("noRecordsMatch".equalsIgnoreCase(code.trim())) {
log.warn("noRecordsMatch for oai call: " + url);
return null;
} else {
throw new DnetCollectorException(code + " - " + errorNode.getText());
}
}
for (final Object o : doc.selectNodes("//*[local-name()='ListRecords']/*[local-name()='record']")) {
queue.add(((Node) o).asXML());
}
return doc.valueOf("//*[local-name()='resumptionToken']");
}
for (final Object o :
doc.selectNodes("//*[local-name()='ListRecords']/*[local-name()='record']")) {
queue.add(((Node) o).asXML());
}
return doc.valueOf("//*[local-name()='resumptionToken']");
}
}

View File

@ -1,25 +1,23 @@
package eu.dnetlib.dhp.collection.plugin.oai;
import eu.dnetlib.dhp.collection.worker.utils.HttpConnector;
import java.util.Iterator;
public class OaiIteratorFactory {
private HttpConnector httpConnector;
private HttpConnector httpConnector;
public Iterator<String> newIterator(final String baseUrl, final String mdFormat, final String set, final String fromDate, final String untilDate) {
return new OaiIterator(baseUrl, mdFormat, set, fromDate, untilDate, getHttpConnector());
}
private HttpConnector getHttpConnector() {
if (httpConnector== null)
httpConnector = new HttpConnector();
return httpConnector;
}
public Iterator<String> newIterator(
final String baseUrl,
final String mdFormat,
final String set,
final String fromDate,
final String untilDate) {
return new OaiIterator(baseUrl, mdFormat, set, fromDate, untilDate, getHttpConnector());
}
private HttpConnector getHttpConnector() {
if (httpConnector == null) httpConnector = new HttpConnector();
return httpConnector;
}
}

View File

@ -2,29 +2,30 @@ package eu.dnetlib.dhp.collection.worker;
public class DnetCollectorException extends Exception {
/**
*
*/
private static final long serialVersionUID = -290723075076039757L;
/** */
private static final long serialVersionUID = -290723075076039757L;
public DnetCollectorException() {
super();
}
public DnetCollectorException() {
super();
}
public DnetCollectorException(final String message, final Throwable cause, final boolean enableSuppression, final boolean writableStackTrace) {
super(message, cause, enableSuppression, writableStackTrace);
}
public DnetCollectorException(
final String message,
final Throwable cause,
final boolean enableSuppression,
final boolean writableStackTrace) {
super(message, cause, enableSuppression, writableStackTrace);
}
public DnetCollectorException(final String message, final Throwable cause) {
super(message, cause);
}
public DnetCollectorException(final String message, final Throwable cause) {
super(message, cause);
}
public DnetCollectorException(final String message) {
super(message);
}
public DnetCollectorException(final Throwable cause) {
super(cause);
}
public DnetCollectorException(final String message) {
super(message);
}
public DnetCollectorException(final Throwable cause) {
super(cause);
}
}

View File

@ -2,13 +2,17 @@ package eu.dnetlib.dhp.collection.worker;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.collector.worker.model.ApiDescriptor;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.collection.plugin.CollectorPlugin;
import eu.dnetlib.dhp.collection.worker.utils.CollectorPluginFactory;
import eu.dnetlib.message.Message;
import eu.dnetlib.message.MessageManager;
import eu.dnetlib.message.MessageType;
import java.io.IOException;
import java.net.URI;
import java.util.HashMap;
import java.util.Map;
import java.util.concurrent.atomic.AtomicInteger;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
@ -18,37 +22,34 @@ import org.apache.hadoop.io.Text;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.net.URI;
import java.util.HashMap;
import java.util.Map;
import java.util.concurrent.atomic.AtomicInteger;
public class DnetCollectorWorker {
private static final Logger log = LoggerFactory.getLogger(DnetCollectorWorker.class);
private final CollectorPluginFactory collectorPluginFactory;
private final ArgumentApplicationParser argumentParser;
private final MessageManager manager;
public DnetCollectorWorker(final CollectorPluginFactory collectorPluginFactory, final ArgumentApplicationParser argumentParser, final MessageManager manager) throws DnetCollectorException {
public DnetCollectorWorker(
final CollectorPluginFactory collectorPluginFactory,
final ArgumentApplicationParser argumentParser,
final MessageManager manager)
throws DnetCollectorException {
this.collectorPluginFactory = collectorPluginFactory;
this.argumentParser = argumentParser;
this.manager = manager;
}
public void collect() throws DnetCollectorException {
try {
final ObjectMapper jsonMapper = new ObjectMapper();
final ApiDescriptor api = jsonMapper.readValue(argumentParser.get("apidescriptor"), ApiDescriptor.class);
final ApiDescriptor api =
jsonMapper.readValue(argumentParser.get("apidescriptor"), ApiDescriptor.class);
final CollectorPlugin plugin = collectorPluginFactory.getPluginByProtocol(api.getProtocol());
final CollectorPlugin plugin =
collectorPluginFactory.getPluginByProtocol(api.getProtocol());
final String hdfsuri = argumentParser.get("namenode");
@ -62,7 +63,7 @@ public class DnetCollectorWorker {
System.setProperty("HADOOP_USER_NAME", argumentParser.get("userHDFS"));
System.setProperty("hadoop.home.dir", "/");
//Get the filesystem - HDFS
// Get the filesystem - HDFS
FileSystem.get(URI.create(hdfsuri), conf);
Path hdfswritepath = new Path(argumentParser.get("hdfsPath"));
@ -71,43 +72,69 @@ public class DnetCollectorWorker {
final Map<String, String> ongoingMap = new HashMap<>();
final Map<String, String> reportMap = new HashMap<>();
final AtomicInteger counter = new AtomicInteger(0);
try (SequenceFile.Writer writer = SequenceFile.createWriter(conf,
SequenceFile.Writer.file(hdfswritepath), SequenceFile.Writer.keyClass(IntWritable.class),
SequenceFile.Writer.valueClass(Text.class))) {
try (SequenceFile.Writer writer =
SequenceFile.createWriter(
conf,
SequenceFile.Writer.file(hdfswritepath),
SequenceFile.Writer.keyClass(IntWritable.class),
SequenceFile.Writer.valueClass(Text.class))) {
final IntWritable key = new IntWritable(counter.get());
final Text value = new Text();
plugin.collect(api).forEach(content -> {
key.set(counter.getAndIncrement());
value.set(content);
if (counter.get() % 10 == 0) {
try {
ongoingMap.put("ongoing", "" + counter.get());
log.debug("Sending message: "+ manager.sendMessage(new Message(argumentParser.get("workflowId"), "Collection", MessageType.ONGOING, ongoingMap), argumentParser.get("rabbitOngoingQueue"), true, false));
} catch (Exception e) {
log.error("Error on sending message ", e);
}
}
try {
writer.append(key, value);
} catch (IOException e) {
throw new RuntimeException(e);
}
});
plugin.collect(api)
.forEach(
content -> {
key.set(counter.getAndIncrement());
value.set(content);
if (counter.get() % 10 == 0) {
try {
ongoingMap.put("ongoing", "" + counter.get());
log.debug(
"Sending message: "
+ manager.sendMessage(
new Message(
argumentParser.get(
"workflowId"),
"Collection",
MessageType.ONGOING,
ongoingMap),
argumentParser.get(
"rabbitOngoingQueue"),
true,
false));
} catch (Exception e) {
log.error("Error on sending message ", e);
}
}
try {
writer.append(key, value);
} catch (IOException e) {
throw new RuntimeException(e);
}
});
}
ongoingMap.put("ongoing", "" + counter.get());
manager.sendMessage(new Message(argumentParser.get("workflowId"), "Collection", MessageType.ONGOING, ongoingMap), argumentParser.get("rabbitOngoingQueue"), true, false);
manager.sendMessage(
new Message(
argumentParser.get("workflowId"),
"Collection",
MessageType.ONGOING,
ongoingMap),
argumentParser.get("rabbitOngoingQueue"),
true,
false);
reportMap.put("collected", "" + counter.get());
manager.sendMessage(new Message(argumentParser.get("workflowId"), "Collection", MessageType.REPORT, reportMap), argumentParser.get("rabbitOngoingQueue"), true, false);
manager.sendMessage(
new Message(
argumentParser.get("workflowId"),
"Collection",
MessageType.REPORT,
reportMap),
argumentParser.get("rabbitOngoingQueue"),
true,
false);
manager.close();
} catch (Throwable e) {
throw new DnetCollectorException("Error on collecting ",e);
throw new DnetCollectorException("Error on collecting ", e);
}
}
}

View File

@ -1,6 +1,5 @@
package eu.dnetlib.dhp.collection.worker;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.collection.worker.utils.CollectorPluginFactory;
import eu.dnetlib.message.MessageManager;
@ -8,16 +7,13 @@ import org.apache.commons.io.IOUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* DnetCollectortWorkerApplication is the main class responsible to start
* the Dnet Collection into HDFS.
* This module will be executed on the hadoop cluster and taking in input some parameters
* that tells it which is the right collector plugin to use and where store the data into HDFS path
* DnetCollectortWorkerApplication is the main class responsible to start the Dnet Collection into
* HDFS. This module will be executed on the hadoop cluster and taking in input some parameters that
* tells it which is the right collector plugin to use and where store the data into HDFS path
*
* @author Sandro La Bruzzo
*/
public class DnetCollectorWorkerApplication {
private static final Logger log = LoggerFactory.getLogger(DnetCollectorWorkerApplication.class);
@ -26,22 +22,27 @@ public class DnetCollectorWorkerApplication {
private static ArgumentApplicationParser argumentParser;
/**
* @param args
*/
/** @param args */
public static void main(final String[] args) throws Exception {
argumentParser= new ArgumentApplicationParser(IOUtils.toString(DnetCollectorWorker.class.getResourceAsStream("/eu/dnetlib/collector/worker/collector_parameter.json")));
argumentParser =
new ArgumentApplicationParser(
IOUtils.toString(
DnetCollectorWorker.class.getResourceAsStream(
"/eu/dnetlib/collector/worker/collector_parameter.json")));
argumentParser.parseArgument(args);
log.info("hdfsPath =" + argumentParser.get("hdfsPath"));
log.info("json = " + argumentParser.get("apidescriptor"));
final MessageManager manager = new MessageManager(argumentParser.get("rabbitHost"), argumentParser.get("rabbitUser"), argumentParser.get("rabbitPassword"), false, false, null);
final DnetCollectorWorker worker = new DnetCollectorWorker(collectorPluginFactory, argumentParser, manager);
final MessageManager manager =
new MessageManager(
argumentParser.get("rabbitHost"),
argumentParser.get("rabbitUser"),
argumentParser.get("rabbitPassword"),
false,
false,
null);
final DnetCollectorWorker worker =
new DnetCollectorWorker(collectorPluginFactory, argumentParser, manager);
worker.collect();
}
}

View File

@ -4,16 +4,15 @@ import java.util.LinkedList;
public class CollectorPluginErrorLogList extends LinkedList<String> {
private static final long serialVersionUID = -6925786561303289704L;
@Override
public String toString() {
String log = new String();
int index = 0;
for (final String errorMessage : this) {
log += String.format("Retry #%s: %s / ", index++, errorMessage);
}
return log;
}
private static final long serialVersionUID = -6925786561303289704L;
@Override
public String toString() {
String log = new String();
int index = 0;
for (final String errorMessage : this) {
log += String.format("Retry #%s: %s / ", index++, errorMessage);
}
return log;
}
}

View File

@ -4,19 +4,16 @@ import eu.dnetlib.dhp.collection.plugin.CollectorPlugin;
import eu.dnetlib.dhp.collection.plugin.oai.OaiCollectorPlugin;
import eu.dnetlib.dhp.collection.worker.DnetCollectorException;
;
public class CollectorPluginFactory {
public CollectorPlugin getPluginByProtocol(final String protocol) throws DnetCollectorException {
if (protocol==null) throw new DnetCollectorException("protocol cannot be null");
switch (protocol.toLowerCase().trim()){
public CollectorPlugin getPluginByProtocol(final String protocol)
throws DnetCollectorException {
if (protocol == null) throw new DnetCollectorException("protocol cannot be null");
switch (protocol.toLowerCase().trim()) {
case "oai":
return new OaiCollectorPlugin();
default:
throw new DnetCollectorException("UNknown protocol");
throw new DnetCollectorException("UNknown protocol");
}
}
}

View File

@ -1,15 +1,6 @@
package eu.dnetlib.dhp.collection.worker.utils;
import eu.dnetlib.dhp.collection.worker.DnetCollectorException;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang.math.NumberUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import javax.net.ssl.HttpsURLConnection;
import javax.net.ssl.SSLContext;
import javax.net.ssl.TrustManager;
import javax.net.ssl.X509TrustManager;
import java.io.IOException;
import java.io.InputStream;
import java.net.*;
@ -17,203 +8,243 @@ import java.security.GeneralSecurityException;
import java.security.cert.X509Certificate;
import java.util.List;
import java.util.Map;
import javax.net.ssl.HttpsURLConnection;
import javax.net.ssl.SSLContext;
import javax.net.ssl.TrustManager;
import javax.net.ssl.X509TrustManager;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang.math.NumberUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
public class HttpConnector {
private static final Log log = LogFactory.getLog(HttpConnector.class);
private static final Log log = LogFactory.getLog(HttpConnector.class);
private int maxNumberOfRetry = 6;
private int defaultDelay = 120; // seconds
private int readTimeOut = 120; // seconds
private int maxNumberOfRetry = 6;
private int defaultDelay = 120; // seconds
private int readTimeOut = 120; // seconds
private String responseType = null;
private String responseType = null;
private final String userAgent = "Mozilla/5.0 (compatible; OAI; +http://www.openaire.eu)";
private final String userAgent = "Mozilla/5.0 (compatible; OAI; +http://www.openaire.eu)";
public HttpConnector() {
CookieHandler.setDefault(new CookieManager(null, CookiePolicy.ACCEPT_ALL));
}
public HttpConnector() {
CookieHandler.setDefault(new CookieManager(null, CookiePolicy.ACCEPT_ALL));
}
/**
* Given the URL returns the content via HTTP GET
*
* @param requestUrl
* the URL
* @return the content of the downloaded resource
* @throws DnetCollectorException
* when retrying more than maxNumberOfRetry times
*/
public String getInputSource(final String requestUrl) throws DnetCollectorException {
return attemptDownlaodAsString(requestUrl, 1, new CollectorPluginErrorLogList());
}
/**
* Given the URL returns the content via HTTP GET
*
* @param requestUrl the URL
* @return the content of the downloaded resource
* @throws DnetCollectorException when retrying more than maxNumberOfRetry times
*/
public String getInputSource(final String requestUrl) throws DnetCollectorException {
return attemptDownlaodAsString(requestUrl, 1, new CollectorPluginErrorLogList());
}
/**
* Given the URL returns the content as a stream via HTTP GET
*
* @param requestUrl
* the URL
* @return the content of the downloaded resource as InputStream
* @throws DnetCollectorException
* when retrying more than maxNumberOfRetry times
*/
public InputStream getInputSourceAsStream(final String requestUrl) throws DnetCollectorException {
return attemptDownload(requestUrl, 1, new CollectorPluginErrorLogList());
}
/**
* Given the URL returns the content as a stream via HTTP GET
*
* @param requestUrl the URL
* @return the content of the downloaded resource as InputStream
* @throws DnetCollectorException when retrying more than maxNumberOfRetry times
*/
public InputStream getInputSourceAsStream(final String requestUrl)
throws DnetCollectorException {
return attemptDownload(requestUrl, 1, new CollectorPluginErrorLogList());
}
private String attemptDownlaodAsString(final String requestUrl, final int retryNumber, final CollectorPluginErrorLogList errorList)
throws DnetCollectorException {
try {
final InputStream s = attemptDownload(requestUrl, 1, new CollectorPluginErrorLogList());
try {
return IOUtils.toString(s);
} catch (final IOException e) {
log.error("error while retrieving from http-connection occured: " + requestUrl, e);
Thread.sleep(defaultDelay * 1000);
errorList.add(e.getMessage());
return attemptDownlaodAsString(requestUrl, retryNumber + 1, errorList);
} finally {
IOUtils.closeQuietly(s);
}
} catch (final InterruptedException e) {
throw new DnetCollectorException(e);
}
}
private String attemptDownlaodAsString(
final String requestUrl,
final int retryNumber,
final CollectorPluginErrorLogList errorList)
throws DnetCollectorException {
try {
final InputStream s = attemptDownload(requestUrl, 1, new CollectorPluginErrorLogList());
try {
return IOUtils.toString(s);
} catch (final IOException e) {
log.error("error while retrieving from http-connection occured: " + requestUrl, e);
Thread.sleep(defaultDelay * 1000);
errorList.add(e.getMessage());
return attemptDownlaodAsString(requestUrl, retryNumber + 1, errorList);
} finally {
IOUtils.closeQuietly(s);
}
} catch (final InterruptedException e) {
throw new DnetCollectorException(e);
}
}
private InputStream attemptDownload(final String requestUrl, final int retryNumber, final CollectorPluginErrorLogList errorList)
throws DnetCollectorException {
private InputStream attemptDownload(
final String requestUrl,
final int retryNumber,
final CollectorPluginErrorLogList errorList)
throws DnetCollectorException {
if (retryNumber > maxNumberOfRetry) { throw new DnetCollectorException("Max number of retries exceeded. Cause: \n " + errorList); }
if (retryNumber > maxNumberOfRetry) {
throw new DnetCollectorException(
"Max number of retries exceeded. Cause: \n " + errorList);
}
log.debug("Downloading " + requestUrl + " - try: " + retryNumber);
try {
InputStream input = null;
log.debug("Downloading " + requestUrl + " - try: " + retryNumber);
try {
InputStream input = null;
try {
final HttpURLConnection urlConn = (HttpURLConnection) new URL(requestUrl).openConnection();
urlConn.setInstanceFollowRedirects(false);
urlConn.setReadTimeout(readTimeOut * 1000);
urlConn.addRequestProperty("User-Agent", userAgent);
try {
final HttpURLConnection urlConn =
(HttpURLConnection) new URL(requestUrl).openConnection();
urlConn.setInstanceFollowRedirects(false);
urlConn.setReadTimeout(readTimeOut * 1000);
urlConn.addRequestProperty("User-Agent", userAgent);
if (log.isDebugEnabled()) {
logHeaderFields(urlConn);
}
if (log.isDebugEnabled()) {
logHeaderFields(urlConn);
}
final int retryAfter = obtainRetryAfter(urlConn.getHeaderFields());
if (retryAfter > 0 && urlConn.getResponseCode() == HttpURLConnection.HTTP_UNAVAILABLE) {
log.warn("waiting and repeating request after " + retryAfter + " sec.");
Thread.sleep(retryAfter * 1000);
errorList.add("503 Service Unavailable");
urlConn.disconnect();
return attemptDownload(requestUrl, retryNumber + 1, errorList);
} else if (urlConn.getResponseCode() == HttpURLConnection.HTTP_MOVED_PERM || urlConn.getResponseCode() == HttpURLConnection.HTTP_MOVED_TEMP) {
final String newUrl = obtainNewLocation(urlConn.getHeaderFields());
log.debug("The requested url has been moved to " + newUrl);
errorList.add(String.format("%s %s. Moved to: %s", urlConn.getResponseCode(), urlConn.getResponseMessage(), newUrl));
urlConn.disconnect();
return attemptDownload(newUrl, retryNumber + 1, errorList);
} else if (urlConn.getResponseCode() != HttpURLConnection.HTTP_OK) {
log.error(String.format("HTTP error: %s %s", urlConn.getResponseCode(), urlConn.getResponseMessage()));
Thread.sleep(defaultDelay * 1000);
errorList.add(String.format("%s %s", urlConn.getResponseCode(), urlConn.getResponseMessage()));
urlConn.disconnect();
return attemptDownload(requestUrl, retryNumber + 1, errorList);
} else {
input = urlConn.getInputStream();
responseType = urlConn.getContentType();
return input;
}
} catch (final IOException e) {
log.error("error while retrieving from http-connection occured: " + requestUrl, e);
Thread.sleep(defaultDelay * 1000);
errorList.add(e.getMessage());
return attemptDownload(requestUrl, retryNumber + 1, errorList);
}
} catch (final InterruptedException e) {
throw new DnetCollectorException(e);
}
}
final int retryAfter = obtainRetryAfter(urlConn.getHeaderFields());
if (retryAfter > 0
&& urlConn.getResponseCode() == HttpURLConnection.HTTP_UNAVAILABLE) {
log.warn("waiting and repeating request after " + retryAfter + " sec.");
Thread.sleep(retryAfter * 1000);
errorList.add("503 Service Unavailable");
urlConn.disconnect();
return attemptDownload(requestUrl, retryNumber + 1, errorList);
} else if (urlConn.getResponseCode() == HttpURLConnection.HTTP_MOVED_PERM
|| urlConn.getResponseCode() == HttpURLConnection.HTTP_MOVED_TEMP) {
final String newUrl = obtainNewLocation(urlConn.getHeaderFields());
log.debug("The requested url has been moved to " + newUrl);
errorList.add(
String.format(
"%s %s. Moved to: %s",
urlConn.getResponseCode(),
urlConn.getResponseMessage(),
newUrl));
urlConn.disconnect();
return attemptDownload(newUrl, retryNumber + 1, errorList);
} else if (urlConn.getResponseCode() != HttpURLConnection.HTTP_OK) {
log.error(
String.format(
"HTTP error: %s %s",
urlConn.getResponseCode(), urlConn.getResponseMessage()));
Thread.sleep(defaultDelay * 1000);
errorList.add(
String.format(
"%s %s",
urlConn.getResponseCode(), urlConn.getResponseMessage()));
urlConn.disconnect();
return attemptDownload(requestUrl, retryNumber + 1, errorList);
} else {
input = urlConn.getInputStream();
responseType = urlConn.getContentType();
return input;
}
} catch (final IOException e) {
log.error("error while retrieving from http-connection occured: " + requestUrl, e);
Thread.sleep(defaultDelay * 1000);
errorList.add(e.getMessage());
return attemptDownload(requestUrl, retryNumber + 1, errorList);
}
} catch (final InterruptedException e) {
throw new DnetCollectorException(e);
}
}
private void logHeaderFields(final HttpURLConnection urlConn) throws IOException {
log.debug("StatusCode: " + urlConn.getResponseMessage());
private void logHeaderFields(final HttpURLConnection urlConn) throws IOException {
log.debug("StatusCode: " + urlConn.getResponseMessage());
for (final Map.Entry<String, List<String>> e : urlConn.getHeaderFields().entrySet()) {
if (e.getKey() != null) {
for (final String v : e.getValue()) {
log.debug(" key: " + e.getKey() + " - value: " + v);
}
}
}
}
for (final Map.Entry<String, List<String>> e : urlConn.getHeaderFields().entrySet()) {
if (e.getKey() != null) {
for (final String v : e.getValue()) {
log.debug(" key: " + e.getKey() + " - value: " + v);
}
}
}
}
private int obtainRetryAfter(final Map<String, List<String>> headerMap) {
for (final String key : headerMap.keySet()) {
if (key != null && key.toLowerCase().equals("retry-after") && headerMap.get(key).size() > 0
&& NumberUtils.isNumber(headerMap.get(key).get(0))) { return Integer.parseInt(headerMap.get(key).get(0)) + 10; }
}
return -1;
}
private int obtainRetryAfter(final Map<String, List<String>> headerMap) {
for (final String key : headerMap.keySet()) {
if (key != null
&& key.toLowerCase().equals("retry-after")
&& headerMap.get(key).size() > 0
&& NumberUtils.isNumber(headerMap.get(key).get(0))) {
return Integer.parseInt(headerMap.get(key).get(0)) + 10;
}
}
return -1;
}
private String obtainNewLocation(final Map<String, List<String>> headerMap) throws DnetCollectorException {
for (final String key : headerMap.keySet()) {
if (key != null && key.toLowerCase().equals("location") && headerMap.get(key).size() > 0) { return headerMap.get(key).get(0); }
}
throw new DnetCollectorException("The requested url has been MOVED, but 'location' param is MISSING");
}
private String obtainNewLocation(final Map<String, List<String>> headerMap)
throws DnetCollectorException {
for (final String key : headerMap.keySet()) {
if (key != null
&& key.toLowerCase().equals("location")
&& headerMap.get(key).size() > 0) {
return headerMap.get(key).get(0);
}
}
throw new DnetCollectorException(
"The requested url has been MOVED, but 'location' param is MISSING");
}
/**
* register for https scheme; this is a workaround and not intended for the use in trusted environments
*/
public void initTrustManager() {
final X509TrustManager tm = new X509TrustManager() {
/**
* register for https scheme; this is a workaround and not intended for the use in trusted
* environments
*/
public void initTrustManager() {
final X509TrustManager tm =
new X509TrustManager() {
@Override
public void checkClientTrusted(final X509Certificate[] xcs, final String string) {}
@Override
public void checkClientTrusted(
final X509Certificate[] xcs, final String string) {}
@Override
public void checkServerTrusted(final X509Certificate[] xcs, final String string) {}
@Override
public void checkServerTrusted(
final X509Certificate[] xcs, final String string) {}
@Override
public X509Certificate[] getAcceptedIssuers() {
return null;
}
};
try {
final SSLContext ctx = SSLContext.getInstance("TLS");
ctx.init(null, new TrustManager[] { tm }, null);
HttpsURLConnection.setDefaultSSLSocketFactory(ctx.getSocketFactory());
} catch (final GeneralSecurityException e) {
log.fatal(e);
throw new IllegalStateException(e);
}
}
@Override
public X509Certificate[] getAcceptedIssuers() {
return null;
}
};
try {
final SSLContext ctx = SSLContext.getInstance("TLS");
ctx.init(null, new TrustManager[] {tm}, null);
HttpsURLConnection.setDefaultSSLSocketFactory(ctx.getSocketFactory());
} catch (final GeneralSecurityException e) {
log.fatal(e);
throw new IllegalStateException(e);
}
}
public int getMaxNumberOfRetry() {
return maxNumberOfRetry;
}
public int getMaxNumberOfRetry() {
return maxNumberOfRetry;
}
public void setMaxNumberOfRetry(final int maxNumberOfRetry) {
this.maxNumberOfRetry = maxNumberOfRetry;
}
public void setMaxNumberOfRetry(final int maxNumberOfRetry) {
this.maxNumberOfRetry = maxNumberOfRetry;
}
public int getDefaultDelay() {
return defaultDelay;
}
public int getDefaultDelay() {
return defaultDelay;
}
public void setDefaultDelay(final int defaultDelay) {
this.defaultDelay = defaultDelay;
}
public void setDefaultDelay(final int defaultDelay) {
this.defaultDelay = defaultDelay;
}
public int getReadTimeOut() {
return readTimeOut;
}
public int getReadTimeOut() {
return readTimeOut;
}
public void setReadTimeOut(final int readTimeOut) {
this.readTimeOut = readTimeOut;
}
public String getResponseType() {
return responseType;
}
public void setReadTimeOut(final int readTimeOut) {
this.readTimeOut = readTimeOut;
}
public String getResponseType() {
return responseType;
}
}

View File

@ -6,254 +6,392 @@ import java.util.Map;
import java.util.Set;
import java.util.regex.Pattern;
/**
* @author jochen, Andreas Czerniak
*
*/
/** @author jochen, Andreas Czerniak */
public class XmlCleaner {
/**
* Pattern for numeric entities.
*/
private static Pattern validCharacterEntityPattern = Pattern.compile("^&#x?\\d{2,4};"); //$NON-NLS-1$
// private static Pattern validCharacterEntityPattern = Pattern.compile("^&#?\\d{2,4};"); //$NON-NLS-1$
/** Pattern for numeric entities. */
private static Pattern validCharacterEntityPattern =
Pattern.compile("^&#x?\\d{2,4};"); // $NON-NLS-1$
// private static Pattern validCharacterEntityPattern = Pattern.compile("^&#?\\d{2,4};");
// //$NON-NLS-1$
// see https://www.w3.org/TR/REC-xml/#charsets , not only limited to &#11;
private static Pattern invalidControlCharPattern = Pattern.compile("&#x?1[0-9a-fA-F];");
// see https://www.w3.org/TR/REC-xml/#charsets , not only limited to &#11;
private static Pattern invalidControlCharPattern = Pattern.compile("&#x?1[0-9a-fA-F];");
/**
* Pattern that negates the allowable XML 4 byte unicode characters. Valid are: #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] |
* [#x10000-#x10FFFF]
*/
private static Pattern invalidCharacterPattern = Pattern.compile("[^\t\r\n\u0020-\uD7FF\uE000-\uFFFD]"); //$NON-NLS-1$
/**
* Pattern that negates the allowable XML 4 byte unicode characters. Valid are: #x9 | #xA | #xD
* | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]
*/
private static Pattern invalidCharacterPattern =
Pattern.compile("[^\t\r\n\u0020-\uD7FF\uE000-\uFFFD]"); // $NON-NLS-1$
// Map entities to their unicode equivalent
private static Set<String> goodEntities = new HashSet<>();
private static Map<String, String> badEntities = new HashMap<>();
// Map entities to their unicode equivalent
private static Set<String> goodEntities = new HashSet<>();
private static Map<String, String> badEntities = new HashMap<>();
static {
// pre-defined XML entities
goodEntities.add("&quot;"); //$NON-NLS-1$ // quotation mark
goodEntities.add("&amp;"); //$NON-NLS-1$ // ampersand
goodEntities.add("&lt;"); //$NON-NLS-1$ // less-than sign
goodEntities.add("&gt;"); //$NON-NLS-1$ // greater-than sign
// control entities
// badEntities.put("&#11;", "");
badEntities.put("&#127;", " "); //$NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
badEntities.put("&#128;", " "); //$NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
badEntities.put("&#129;", " "); //$NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
badEntities.put("&#130;", " "); //$NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
badEntities.put("&#131;", " "); //$NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
badEntities.put("&#132;", " "); //$NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
badEntities.put("&#133;", " "); //$NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
badEntities.put("&#134;", " "); //$NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
badEntities.put("&#135;", " "); //$NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
badEntities.put("&#136;", " "); //$NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
badEntities.put("&#137;", " "); //$NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
badEntities.put("&#138;", " "); //$NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
badEntities.put("&#139;", " "); //$NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
badEntities.put("&#140;", " "); //$NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
badEntities.put("&#141;", " "); //$NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
badEntities.put("&#142;", " "); //$NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
badEntities.put("&#143;", " "); //$NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
badEntities.put("&#144;", " "); //$NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
badEntities.put("&#145;", " "); //$NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
badEntities.put("&#146;", " "); //$NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
badEntities.put("&#147;", " "); //$NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
badEntities.put("&#148;", " "); //$NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
badEntities.put("&#149;", " "); //$NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
badEntities.put("&#150;", " "); //$NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
badEntities.put("&#151;", " "); //$NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
badEntities.put("&#152;", " "); //$NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
badEntities.put("&#153;", " "); //$NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
badEntities.put("&#154;", " "); //$NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
badEntities.put("&#155;", " "); //$NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
badEntities.put("&#156;", " "); //$NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
badEntities.put("&#157;", " "); //$NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
badEntities.put("&#158;", " "); //$NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
badEntities.put("&#159;", " "); //$NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
// misc entities
badEntities.put("&euro;", "\u20AC"); //$NON-NLS-1$ //$NON-NLS-2$ // euro
badEntities.put("&lsquo;", "\u2018"); //$NON-NLS-1$ //$NON-NLS-2$ // left single quotation mark
badEntities.put("&rsquo;", "\u2019"); //$NON-NLS-1$ //$NON-NLS-2$ // right single quotation mark
// Latin 1 entities
badEntities.put("&nbsp;", "\u00A0"); //$NON-NLS-1$ //$NON-NLS-2$ // no-break space
badEntities.put("&iexcl;", "\u00A1"); //$NON-NLS-1$ //$NON-NLS-2$ // inverted exclamation mark
badEntities.put("&cent;", "\u00A2"); //$NON-NLS-1$ //$NON-NLS-2$ // cent sign
badEntities.put("&pound;", "\u00A3"); //$NON-NLS-1$ //$NON-NLS-2$ // pound sign
badEntities.put("&curren;", "\u00A4"); //$NON-NLS-1$ //$NON-NLS-2$ // currency sign
badEntities.put("&yen;", "\u00A5"); //$NON-NLS-1$ //$NON-NLS-2$ // yen sign
badEntities.put("&brvbar;", "\u00A6"); //$NON-NLS-1$ //$NON-NLS-2$ // broken vertical bar
badEntities.put("&sect;", "\u00A7"); //$NON-NLS-1$ //$NON-NLS-2$ // section sign
badEntities.put("&uml;", "\u00A8"); //$NON-NLS-1$ //$NON-NLS-2$ // diaeresis
badEntities.put("&copy;", "\u00A9"); //$NON-NLS-1$ //$NON-NLS-2$ // copyright sign
badEntities.put("&ordf;", "\u00AA"); //$NON-NLS-1$ //$NON-NLS-2$ // feminine ordinal indicator
badEntities.put("&laquo;", "\u00AB"); //$NON-NLS-1$ //$NON-NLS-2$ // left-pointing double angle quotation mark
badEntities.put("&not;", "\u00AC"); //$NON-NLS-1$ //$NON-NLS-2$ // not sign
badEntities.put("&shy;", "\u00AD"); //$NON-NLS-1$ //$NON-NLS-2$ // soft hyphen
badEntities.put("&reg;", "\u00AE"); //$NON-NLS-1$ //$NON-NLS-2$ // registered sign
badEntities.put("&macr;", "\u00AF"); //$NON-NLS-1$ //$NON-NLS-2$ // macron
badEntities.put("&deg;", "\u00B0"); //$NON-NLS-1$ //$NON-NLS-2$ // degree sign
badEntities.put("&plusmn;", "\u00B1"); //$NON-NLS-1$ //$NON-NLS-2$ // plus-minus sign
badEntities.put("&sup2;", "\u00B2"); //$NON-NLS-1$ //$NON-NLS-2$ // superscript two
badEntities.put("&sup3;", "\u00B3"); //$NON-NLS-1$ //$NON-NLS-2$ // superscript three
badEntities.put("&acute;", "\u00B4"); //$NON-NLS-1$ //$NON-NLS-2$ // acute accent
badEntities.put("&micro;", "\u00B5"); //$NON-NLS-1$ //$NON-NLS-2$ // micro sign
badEntities.put("&para;", "\u00B6"); //$NON-NLS-1$ //$NON-NLS-2$ // pilcrow sign
badEntities.put("&middot;", "\u00B7"); //$NON-NLS-1$ //$NON-NLS-2$ // middle dot
badEntities.put("&cedil;", "\u00B8"); //$NON-NLS-1$ //$NON-NLS-2$ // cedilla
badEntities.put("&sup1;", "\u00B9"); //$NON-NLS-1$ //$NON-NLS-2$ // superscript one
badEntities.put("&ordm;", "\u00BA"); //$NON-NLS-1$ //$NON-NLS-2$ // masculine ordinal indicator
badEntities.put("&raquo;", "\u00BB"); //$NON-NLS-1$ //$NON-NLS-2$ // right-pointing double angle quotation mark
badEntities.put("&frac14;", "\u00BC"); //$NON-NLS-1$ //$NON-NLS-2$ // vulgar fraction one quarter
badEntities.put("&frac12;", "\u00BD"); //$NON-NLS-1$ //$NON-NLS-2$ // vulgar fraction one half
badEntities.put("&frac34;", "\u00BE"); //$NON-NLS-1$ //$NON-NLS-2$ // vulgar fraction three quarters
badEntities.put("&iquest;", "\u00BF"); //$NON-NLS-1$ //$NON-NLS-2$ // inverted question mark
badEntities.put("&Agrave;", "\u00C0"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter A with grave
badEntities.put("&Aacute;", "\u00C1"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter A with acute
badEntities.put("&Acirc;", "\u00C2"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter A with circumflex
badEntities.put("&Atilde;", "\u00C3"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter A with tilde
badEntities.put("&Auml;", "\u00C4"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter A with diaeresis
badEntities.put("&Aring;", "\u00C5"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter A with ring above
badEntities.put("&AElig;", "\u00C6"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter AE
badEntities.put("&Ccedil;", "\u00C7"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter C with cedilla
badEntities.put("&Egrave;", "\u00C8"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter E with grave
badEntities.put("&Eacute;", "\u00C9"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter E with acute
badEntities.put("&Ecirc;", "\u00CA"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter E with circumflex
badEntities.put("&Euml;", "\u00CB"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter E with diaeresis
badEntities.put("&Igrave;", "\u00CC"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter I with grave
badEntities.put("&Iacute;", "\u00CD"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter I with acute
badEntities.put("&Icirc;", "\u00CE"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter I with circumflex
badEntities.put("&Iuml;", "\u00CF"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter I with diaeresis
badEntities.put("&ETH;", "\u00D0"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter ETH
badEntities.put("&Ntilde;", "\u00D1"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter N with tilde
badEntities.put("&Ograve;", "\u00D2"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter O with grave
badEntities.put("&Oacute;", "\u00D3"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter O with acute
badEntities.put("&Ocirc;", "\u00D4"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter O with circumflex
badEntities.put("&Otilde;", "\u00D5"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter O with tilde
badEntities.put("&Ouml;", "\u00D6"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter O with diaeresis
badEntities.put("&times;", "\u00D7"); //$NON-NLS-1$ //$NON-NLS-2$ // multiplication sign
badEntities.put("&Oslash;", "\u00D8"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter O with stroke
badEntities.put("&Ugrave;", "\u00D9"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter U with grave
badEntities.put("&Uacute;", "\u00DA"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter U with acute
badEntities.put("&Ucirc;", "\u00DB"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter U with circumflex
badEntities.put("&Uuml;", "\u00DC"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter U with diaeresis
badEntities.put("&Yacute;", "\u00DD"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter Y with acute
badEntities.put("&THORN;", "\u00DE"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter THORN
badEntities.put("&szlig;", "\u00DF"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter sharp s
badEntities.put("&agrave;", "\u00E0"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter a with grave
badEntities.put("&aacute;", "\u00E1"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter a with acute
badEntities.put("&acirc;", "\u00E2"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter a with circumflex
badEntities.put("&atilde;", "\u00E3"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter a with tilde
badEntities.put("&auml;", "\u00E4"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter a with diaeresis
badEntities.put("&aring;", "\u00E5"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter a with ring above
badEntities.put("&aelig;", "\u00E6"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter ae
badEntities.put("&ccedil;", "\u00E7"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter c with cedilla
badEntities.put("&egrave;", "\u00E8"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter e with grave
badEntities.put("&eacute;", "\u00E9"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter e with acute
badEntities.put("&ecirc;", "\u00EA"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter e with circumflex
badEntities.put("&euml;", "\u00EB"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter e with diaeresis
badEntities.put("&igrave;", "\u00EC"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter i with grave
badEntities.put("&iacute;", "\u00ED"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter i with acute
badEntities.put("&icirc;", "\u00EE"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter i with circumflex
badEntities.put("&iuml;", "\u00EF"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter i with diaeresis
badEntities.put("&eth;", "\u00F0"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter eth
badEntities.put("&ntilde;", "\u00F1"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter n with tilde
badEntities.put("&ograve;", "\u00F2"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter o with grave
badEntities.put("&oacute;", "\u00F3"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter o with acute
badEntities.put("&ocirc;", "\u00F4"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter o with circumflex
badEntities.put("&otilde;", "\u00F5"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter o with tilde
badEntities.put("&ouml;", "\u00F6"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter o with diaeresis
badEntities.put("&divide;", "\u00F7"); //$NON-NLS-1$ //$NON-NLS-2$ // division sign
badEntities.put("&oslash;", "\u00F8"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter o with stroke
badEntities.put("&ugrave;", "\u00F9"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter u with grave
badEntities.put("&uacute;", "\u00FA"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter u with acute
badEntities.put("&ucirc;", "\u00FB"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter u with circumflex
badEntities.put("&uuml;", "\u00FC"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter u with diaeresis
badEntities.put("&yacute;", "\u00FD"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter y with acute
badEntities.put("&thorn;", "\u00FE"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter thorn
badEntities.put("&yuml;", "\u00FF"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter y with diaeresis
}
static {
// pre-defined XML entities
goodEntities.add("&quot;"); // $NON-NLS-1$ // quotation mark
goodEntities.add("&amp;"); // $NON-NLS-1$ // ampersand
goodEntities.add("&lt;"); // $NON-NLS-1$ // less-than sign
goodEntities.add("&gt;"); // $NON-NLS-1$ // greater-than sign
// control entities
// badEntities.put("&#11;", "");
badEntities.put("&#127;", " "); // $NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
badEntities.put("&#128;", " "); // $NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
badEntities.put("&#129;", " "); // $NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
badEntities.put("&#130;", " "); // $NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
badEntities.put("&#131;", " "); // $NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
badEntities.put("&#132;", " "); // $NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
badEntities.put("&#133;", " "); // $NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
badEntities.put("&#134;", " "); // $NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
badEntities.put("&#135;", " "); // $NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
badEntities.put("&#136;", " "); // $NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
badEntities.put("&#137;", " "); // $NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
badEntities.put("&#138;", " "); // $NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
badEntities.put("&#139;", " "); // $NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
badEntities.put("&#140;", " "); // $NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
badEntities.put("&#141;", " "); // $NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
badEntities.put("&#142;", " "); // $NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
badEntities.put("&#143;", " "); // $NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
badEntities.put("&#144;", " "); // $NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
badEntities.put("&#145;", " "); // $NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
badEntities.put("&#146;", " "); // $NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
badEntities.put("&#147;", " "); // $NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
badEntities.put("&#148;", " "); // $NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
badEntities.put("&#149;", " "); // $NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
badEntities.put("&#150;", " "); // $NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
badEntities.put("&#151;", " "); // $NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
badEntities.put("&#152;", " "); // $NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
badEntities.put("&#153;", " "); // $NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
badEntities.put("&#154;", " "); // $NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
badEntities.put("&#155;", " "); // $NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
badEntities.put("&#156;", " "); // $NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
badEntities.put("&#157;", " "); // $NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
badEntities.put("&#158;", " "); // $NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
badEntities.put("&#159;", " "); // $NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
// misc entities
badEntities.put("&euro;", "\u20AC"); // $NON-NLS-1$ //$NON-NLS-2$ // euro
badEntities.put(
"&lsquo;", "\u2018"); // $NON-NLS-1$ //$NON-NLS-2$ // left single quotation mark
badEntities.put(
"&rsquo;", "\u2019"); // $NON-NLS-1$ //$NON-NLS-2$ // right single quotation mark
// Latin 1 entities
badEntities.put("&nbsp;", "\u00A0"); // $NON-NLS-1$ //$NON-NLS-2$ // no-break space
badEntities.put(
"&iexcl;", "\u00A1"); // $NON-NLS-1$ //$NON-NLS-2$ // inverted exclamation mark
badEntities.put("&cent;", "\u00A2"); // $NON-NLS-1$ //$NON-NLS-2$ // cent sign
badEntities.put("&pound;", "\u00A3"); // $NON-NLS-1$ //$NON-NLS-2$ // pound sign
badEntities.put("&curren;", "\u00A4"); // $NON-NLS-1$ //$NON-NLS-2$ // currency sign
badEntities.put("&yen;", "\u00A5"); // $NON-NLS-1$ //$NON-NLS-2$ // yen sign
badEntities.put("&brvbar;", "\u00A6"); // $NON-NLS-1$ //$NON-NLS-2$ // broken vertical bar
badEntities.put("&sect;", "\u00A7"); // $NON-NLS-1$ //$NON-NLS-2$ // section sign
badEntities.put("&uml;", "\u00A8"); // $NON-NLS-1$ //$NON-NLS-2$ // diaeresis
badEntities.put("&copy;", "\u00A9"); // $NON-NLS-1$ //$NON-NLS-2$ // copyright sign
badEntities.put(
"&ordf;", "\u00AA"); // $NON-NLS-1$ //$NON-NLS-2$ // feminine ordinal indicator
badEntities.put(
"&laquo;",
"\u00AB"); //$NON-NLS-1$ //$NON-NLS-2$ // left-pointing double angle quotation mark
badEntities.put("&not;", "\u00AC"); // $NON-NLS-1$ //$NON-NLS-2$ // not sign
badEntities.put("&shy;", "\u00AD"); // $NON-NLS-1$ //$NON-NLS-2$ // soft hyphen
badEntities.put("&reg;", "\u00AE"); // $NON-NLS-1$ //$NON-NLS-2$ // registered sign
badEntities.put("&macr;", "\u00AF"); // $NON-NLS-1$ //$NON-NLS-2$ // macron
badEntities.put("&deg;", "\u00B0"); // $NON-NLS-1$ //$NON-NLS-2$ // degree sign
badEntities.put("&plusmn;", "\u00B1"); // $NON-NLS-1$ //$NON-NLS-2$ // plus-minus sign
badEntities.put("&sup2;", "\u00B2"); // $NON-NLS-1$ //$NON-NLS-2$ // superscript two
badEntities.put("&sup3;", "\u00B3"); // $NON-NLS-1$ //$NON-NLS-2$ // superscript three
badEntities.put("&acute;", "\u00B4"); // $NON-NLS-1$ //$NON-NLS-2$ // acute accent
badEntities.put("&micro;", "\u00B5"); // $NON-NLS-1$ //$NON-NLS-2$ // micro sign
badEntities.put("&para;", "\u00B6"); // $NON-NLS-1$ //$NON-NLS-2$ // pilcrow sign
badEntities.put("&middot;", "\u00B7"); // $NON-NLS-1$ //$NON-NLS-2$ // middle dot
badEntities.put("&cedil;", "\u00B8"); // $NON-NLS-1$ //$NON-NLS-2$ // cedilla
badEntities.put("&sup1;", "\u00B9"); // $NON-NLS-1$ //$NON-NLS-2$ // superscript one
badEntities.put(
"&ordm;", "\u00BA"); // $NON-NLS-1$ //$NON-NLS-2$ // masculine ordinal indicator
badEntities.put(
"&raquo;",
"\u00BB"); //$NON-NLS-1$ //$NON-NLS-2$ // right-pointing double angle quotation mark
badEntities.put(
"&frac14;", "\u00BC"); // $NON-NLS-1$ //$NON-NLS-2$ // vulgar fraction one quarter
badEntities.put(
"&frac12;", "\u00BD"); // $NON-NLS-1$ //$NON-NLS-2$ // vulgar fraction one half
badEntities.put(
"&frac34;",
"\u00BE"); // $NON-NLS-1$ //$NON-NLS-2$ // vulgar fraction three quarters
badEntities.put(
"&iquest;", "\u00BF"); // $NON-NLS-1$ //$NON-NLS-2$ // inverted question mark
badEntities.put(
"&Agrave;",
"\u00C0"); // $NON-NLS-1$ //$NON-NLS-2$ // latin capital letter A with grave
badEntities.put(
"&Aacute;",
"\u00C1"); // $NON-NLS-1$ //$NON-NLS-2$ // latin capital letter A with acute
badEntities.put(
"&Acirc;",
"\u00C2"); // $NON-NLS-1$ //$NON-NLS-2$ // latin capital letter A with circumflex
badEntities.put(
"&Atilde;",
"\u00C3"); // $NON-NLS-1$ //$NON-NLS-2$ // latin capital letter A with tilde
badEntities.put(
"&Auml;",
"\u00C4"); // $NON-NLS-1$ //$NON-NLS-2$ // latin capital letter A with diaeresis
badEntities.put(
"&Aring;",
"\u00C5"); // $NON-NLS-1$ //$NON-NLS-2$ // latin capital letter A with ring above
badEntities.put(
"&AElig;", "\u00C6"); // $NON-NLS-1$ //$NON-NLS-2$ // latin capital letter AE
badEntities.put(
"&Ccedil;",
"\u00C7"); // $NON-NLS-1$ //$NON-NLS-2$ // latin capital letter C with cedilla
badEntities.put(
"&Egrave;",
"\u00C8"); // $NON-NLS-1$ //$NON-NLS-2$ // latin capital letter E with grave
badEntities.put(
"&Eacute;",
"\u00C9"); // $NON-NLS-1$ //$NON-NLS-2$ // latin capital letter E with acute
badEntities.put(
"&Ecirc;",
"\u00CA"); // $NON-NLS-1$ //$NON-NLS-2$ // latin capital letter E with circumflex
badEntities.put(
"&Euml;",
"\u00CB"); // $NON-NLS-1$ //$NON-NLS-2$ // latin capital letter E with diaeresis
badEntities.put(
"&Igrave;",
"\u00CC"); // $NON-NLS-1$ //$NON-NLS-2$ // latin capital letter I with grave
badEntities.put(
"&Iacute;",
"\u00CD"); // $NON-NLS-1$ //$NON-NLS-2$ // latin capital letter I with acute
badEntities.put(
"&Icirc;",
"\u00CE"); // $NON-NLS-1$ //$NON-NLS-2$ // latin capital letter I with circumflex
badEntities.put(
"&Iuml;",
"\u00CF"); // $NON-NLS-1$ //$NON-NLS-2$ // latin capital letter I with diaeresis
badEntities.put("&ETH;", "\u00D0"); // $NON-NLS-1$ //$NON-NLS-2$ // latin capital letter ETH
badEntities.put(
"&Ntilde;",
"\u00D1"); // $NON-NLS-1$ //$NON-NLS-2$ // latin capital letter N with tilde
badEntities.put(
"&Ograve;",
"\u00D2"); // $NON-NLS-1$ //$NON-NLS-2$ // latin capital letter O with grave
badEntities.put(
"&Oacute;",
"\u00D3"); // $NON-NLS-1$ //$NON-NLS-2$ // latin capital letter O with acute
badEntities.put(
"&Ocirc;",
"\u00D4"); // $NON-NLS-1$ //$NON-NLS-2$ // latin capital letter O with circumflex
badEntities.put(
"&Otilde;",
"\u00D5"); // $NON-NLS-1$ //$NON-NLS-2$ // latin capital letter O with tilde
badEntities.put(
"&Ouml;",
"\u00D6"); // $NON-NLS-1$ //$NON-NLS-2$ // latin capital letter O with diaeresis
badEntities.put("&times;", "\u00D7"); // $NON-NLS-1$ //$NON-NLS-2$ // multiplication sign
badEntities.put(
"&Oslash;",
"\u00D8"); // $NON-NLS-1$ //$NON-NLS-2$ // latin capital letter O with stroke
badEntities.put(
"&Ugrave;",
"\u00D9"); // $NON-NLS-1$ //$NON-NLS-2$ // latin capital letter U with grave
badEntities.put(
"&Uacute;",
"\u00DA"); // $NON-NLS-1$ //$NON-NLS-2$ // latin capital letter U with acute
badEntities.put(
"&Ucirc;",
"\u00DB"); // $NON-NLS-1$ //$NON-NLS-2$ // latin capital letter U with circumflex
badEntities.put(
"&Uuml;",
"\u00DC"); // $NON-NLS-1$ //$NON-NLS-2$ // latin capital letter U with diaeresis
badEntities.put(
"&Yacute;",
"\u00DD"); // $NON-NLS-1$ //$NON-NLS-2$ // latin capital letter Y with acute
badEntities.put(
"&THORN;", "\u00DE"); // $NON-NLS-1$ //$NON-NLS-2$ // latin capital letter THORN
badEntities.put(
"&szlig;", "\u00DF"); // $NON-NLS-1$ //$NON-NLS-2$ // latin small letter sharp s
badEntities.put(
"&agrave;",
"\u00E0"); // $NON-NLS-1$ //$NON-NLS-2$ // latin small letter a with grave
badEntities.put(
"&aacute;",
"\u00E1"); // $NON-NLS-1$ //$NON-NLS-2$ // latin small letter a with acute
badEntities.put(
"&acirc;",
"\u00E2"); // $NON-NLS-1$ //$NON-NLS-2$ // latin small letter a with circumflex
badEntities.put(
"&atilde;",
"\u00E3"); // $NON-NLS-1$ //$NON-NLS-2$ // latin small letter a with tilde
badEntities.put(
"&auml;",
"\u00E4"); // $NON-NLS-1$ //$NON-NLS-2$ // latin small letter a with diaeresis
badEntities.put(
"&aring;",
"\u00E5"); // $NON-NLS-1$ //$NON-NLS-2$ // latin small letter a with ring above
badEntities.put("&aelig;", "\u00E6"); // $NON-NLS-1$ //$NON-NLS-2$ // latin small letter ae
badEntities.put(
"&ccedil;",
"\u00E7"); // $NON-NLS-1$ //$NON-NLS-2$ // latin small letter c with cedilla
badEntities.put(
"&egrave;",
"\u00E8"); // $NON-NLS-1$ //$NON-NLS-2$ // latin small letter e with grave
badEntities.put(
"&eacute;",
"\u00E9"); // $NON-NLS-1$ //$NON-NLS-2$ // latin small letter e with acute
badEntities.put(
"&ecirc;",
"\u00EA"); // $NON-NLS-1$ //$NON-NLS-2$ // latin small letter e with circumflex
badEntities.put(
"&euml;",
"\u00EB"); // $NON-NLS-1$ //$NON-NLS-2$ // latin small letter e with diaeresis
badEntities.put(
"&igrave;",
"\u00EC"); // $NON-NLS-1$ //$NON-NLS-2$ // latin small letter i with grave
badEntities.put(
"&iacute;",
"\u00ED"); // $NON-NLS-1$ //$NON-NLS-2$ // latin small letter i with acute
badEntities.put(
"&icirc;",
"\u00EE"); // $NON-NLS-1$ //$NON-NLS-2$ // latin small letter i with circumflex
badEntities.put(
"&iuml;",
"\u00EF"); // $NON-NLS-1$ //$NON-NLS-2$ // latin small letter i with diaeresis
badEntities.put("&eth;", "\u00F0"); // $NON-NLS-1$ //$NON-NLS-2$ // latin small letter eth
badEntities.put(
"&ntilde;",
"\u00F1"); // $NON-NLS-1$ //$NON-NLS-2$ // latin small letter n with tilde
badEntities.put(
"&ograve;",
"\u00F2"); // $NON-NLS-1$ //$NON-NLS-2$ // latin small letter o with grave
badEntities.put(
"&oacute;",
"\u00F3"); // $NON-NLS-1$ //$NON-NLS-2$ // latin small letter o with acute
badEntities.put(
"&ocirc;",
"\u00F4"); // $NON-NLS-1$ //$NON-NLS-2$ // latin small letter o with circumflex
badEntities.put(
"&otilde;",
"\u00F5"); // $NON-NLS-1$ //$NON-NLS-2$ // latin small letter o with tilde
badEntities.put(
"&ouml;",
"\u00F6"); // $NON-NLS-1$ //$NON-NLS-2$ // latin small letter o with diaeresis
badEntities.put("&divide;", "\u00F7"); // $NON-NLS-1$ //$NON-NLS-2$ // division sign
badEntities.put(
"&oslash;",
"\u00F8"); // $NON-NLS-1$ //$NON-NLS-2$ // latin small letter o with stroke
badEntities.put(
"&ugrave;",
"\u00F9"); // $NON-NLS-1$ //$NON-NLS-2$ // latin small letter u with grave
badEntities.put(
"&uacute;",
"\u00FA"); // $NON-NLS-1$ //$NON-NLS-2$ // latin small letter u with acute
badEntities.put(
"&ucirc;",
"\u00FB"); // $NON-NLS-1$ //$NON-NLS-2$ // latin small letter u with circumflex
badEntities.put(
"&uuml;",
"\u00FC"); // $NON-NLS-1$ //$NON-NLS-2$ // latin small letter u with diaeresis
badEntities.put(
"&yacute;",
"\u00FD"); // $NON-NLS-1$ //$NON-NLS-2$ // latin small letter y with acute
badEntities.put(
"&thorn;", "\u00FE"); // $NON-NLS-1$ //$NON-NLS-2$ // latin small letter thorn
badEntities.put(
"&yuml;",
"\u00FF"); // $NON-NLS-1$ //$NON-NLS-2$ // latin small letter y with diaeresis
}
/**
* For each entity in the input that is not allowed in XML, replace the entity with its unicode equivalent or remove it. For each
* instance of a bare {@literal &}, replace it with {@literal &amp;<br/>
* } XML only allows 4 entities: {@literal &amp;amp;}, {@literal &amp;quot;}, {@literal &amp;lt;} and {@literal &amp;gt;}.
*
* @param broken
* the string to handle entities
* @return the string with entities appropriately fixed up
*/
static public String cleanAllEntities(final String broken) {
if (broken == null) { return null; }
/**
* For each entity in the input that is not allowed in XML, replace the entity with its unicode
* equivalent or remove it. For each instance of a bare {@literal &}, replace it with {@literal
* &amp;<br/> } XML only allows 4 entities: {@literal &amp;amp;}, {@literal &amp;quot;},
* {@literal &amp;lt;} and {@literal &amp;gt;}.
*
* @param broken the string to handle entities
* @return the string with entities appropriately fixed up
*/
public static String cleanAllEntities(final String broken) {
if (broken == null) {
return null;
}
String working = invalidControlCharPattern.matcher(broken).replaceAll("");
working = invalidCharacterPattern.matcher(working).replaceAll("");
String working = invalidControlCharPattern.matcher(broken).replaceAll("");
working = invalidCharacterPattern.matcher(working).replaceAll("");
int cleanfrom = 0;
int cleanfrom = 0;
while (true) {
int amp = working.indexOf('&', cleanfrom);
// If there are no more amps then we are done
if (amp == -1) {
break;
}
// Skip references of the kind &#ddd;
if (validCharacterEntityPattern.matcher(working.substring(amp)).find()) {
cleanfrom = working.indexOf(';', amp) + 1;
continue;
}
int i = amp + 1;
while (true) {
// if we are at the end of the string then just escape the '&';
if (i >= working.length()) { return working.substring(0, amp) + "&amp;" + working.substring(amp + 1); //$NON-NLS-1$
}
// if we have come to a ; then we have an entity
// If it is something that xml can't handle then replace it.
final char c = working.charAt(i);
if (c == ';') {
final String entity = working.substring(amp, i + 1);
final String replace = handleEntity(entity);
working = working.substring(0, amp) + replace + working.substring(i + 1);
break;
}
// Did we end an entity without finding a closing ;
// Then treat it as an '&' that needs to be replaced with &amp;
if (!Character.isLetterOrDigit(c)) {
working = working.substring(0, amp) + "&amp;" + working.substring(amp + 1); //$NON-NLS-1$
amp = i + 4; // account for the 4 extra characters
break;
}
i++;
}
cleanfrom = amp + 1;
}
while (true) {
int amp = working.indexOf('&', cleanfrom);
// If there are no more amps then we are done
if (amp == -1) {
break;
}
// Skip references of the kind &#ddd;
if (validCharacterEntityPattern.matcher(working.substring(amp)).find()) {
cleanfrom = working.indexOf(';', amp) + 1;
continue;
}
int i = amp + 1;
while (true) {
// if we are at the end of the string then just escape the '&';
if (i >= working.length()) {
return working.substring(0, amp)
+ "&amp;"
+ working.substring(amp + 1); // $NON-NLS-1$
}
// if we have come to a ; then we have an entity
// If it is something that xml can't handle then replace it.
final char c = working.charAt(i);
if (c == ';') {
final String entity = working.substring(amp, i + 1);
final String replace = handleEntity(entity);
working = working.substring(0, amp) + replace + working.substring(i + 1);
break;
}
// Did we end an entity without finding a closing ;
// Then treat it as an '&' that needs to be replaced with &amp;
if (!Character.isLetterOrDigit(c)) {
working =
working.substring(0, amp)
+ "&amp;"
+ working.substring(amp + 1); // $NON-NLS-1$
amp = i + 4; // account for the 4 extra characters
break;
}
i++;
}
cleanfrom = amp + 1;
}
if (Pattern.compile("<<").matcher(working).find()) {
working = working.replaceAll("<<", "&lt;&lt;");
}
if (Pattern.compile("<<").matcher(working).find()) {
working = working.replaceAll("<<", "&lt;&lt;");
}
if (Pattern.compile(">>").matcher(working).find()) {
working = working.replaceAll(">>", "&gt;&gt;");
}
if (Pattern.compile(">>").matcher(working).find()) {
working = working.replaceAll(">>", "&gt;&gt;");
}
return working;
}
return working;
}
/**
* Replace entity with its unicode equivalent, if it is not a valid XML entity. Otherwise strip it out. XML only allows 4 entities:
* &amp;amp;, &amp;quot;, &amp;lt; and &amp;gt;.
*
* @param entity
* the entity to be replaced
* @return the substitution for the entity, either itself, the unicode equivalent or an empty string.
*/
private static String handleEntity(final String entity) {
if (goodEntities.contains(entity)) { return entity; }
/**
* Replace entity with its unicode equivalent, if it is not a valid XML entity. Otherwise strip
* it out. XML only allows 4 entities: &amp;amp;, &amp;quot;, &amp;lt; and &amp;gt;.
*
* @param entity the entity to be replaced
* @return the substitution for the entity, either itself, the unicode equivalent or an empty
* string.
*/
private static String handleEntity(final String entity) {
if (goodEntities.contains(entity)) {
return entity;
}
final String replace = badEntities.get(entity);
if (replace != null) { return replace; }
final String replace = badEntities.get(entity);
if (replace != null) {
return replace;
}
return replace != null ? replace : "";
}
return replace != null ? replace : "";
}
}

View File

@ -1,7 +1,6 @@
package eu.dnetlib.dhp.migration.actions;
import eu.dnetlib.data.proto.FieldTypeProtos.Qualifier;
import java.util.Comparator;
public class LicenseComparator implements Comparator<Qualifier> {
@ -45,5 +44,4 @@ public class LicenseComparator implements Comparator<Qualifier> {
// Else (but unlikely), lexicographical ordering will do.
return lClass.compareTo(rClass);
}
}

View File

@ -6,6 +6,11 @@ import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.utils.ISLookupClientFactory;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
import java.io.File;
import java.io.FileOutputStream;
import java.io.OutputStream;
import java.util.*;
import java.util.stream.Collectors;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.logging.Log;
@ -17,12 +22,6 @@ import org.apache.hadoop.tools.DistCp;
import org.apache.hadoop.tools.DistCpOptions;
import org.apache.hadoop.util.ToolRunner;
import java.io.File;
import java.io.FileOutputStream;
import java.io.OutputStream;
import java.util.*;
import java.util.stream.Collectors;
public class MigrateActionSet {
private static final Log log = LogFactory.getLog(MigrateActionSet.class);
@ -34,9 +33,11 @@ public class MigrateActionSet {
private static Boolean DEFAULT_TRANSFORM_ONLY = false;
public static void main(String[] args) throws Exception {
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
IOUtils.toString(MigrateActionSet.class.getResourceAsStream(
"/eu/dnetlib/dhp/migration/migrate_actionsets_parameters.json")));
final ArgumentApplicationParser parser =
new ArgumentApplicationParser(
IOUtils.toString(
MigrateActionSet.class.getResourceAsStream(
"/eu/dnetlib/dhp/migration/migrate_actionsets_parameters.json")));
parser.parseArgument(args);
new MigrateActionSet().run(parser);
@ -47,7 +48,7 @@ public class MigrateActionSet {
final String isLookupUrl = parser.get("isLookupUrl");
final String sourceNN = parser.get("sourceNameNode");
final String targetNN = parser.get("targetNameNode");
final String workDir = parser.get("workingDirectory");
final String workDir = parser.get("workingDirectory");
final Integer distcp_num_maps = Integer.parseInt(parser.get("distcp_num_maps"));
final String distcp_memory_mb = parser.get("distcp_memory_mb");
@ -63,10 +64,12 @@ public class MigrateActionSet {
ISLookUpService isLookUp = ISLookupClientFactory.getLookUpService(isLookupUrl);
Configuration conf = getConfiguration(distcp_task_timeout, distcp_memory_mb, distcp_num_maps);
Configuration conf =
getConfiguration(distcp_task_timeout, distcp_memory_mb, distcp_num_maps);
FileSystem targetFS = FileSystem.get(conf);
Configuration sourceConf = getConfiguration(distcp_task_timeout, distcp_memory_mb, distcp_num_maps);
Configuration sourceConf =
getConfiguration(distcp_task_timeout, distcp_memory_mb, distcp_num_maps);
sourceConf.set(FileSystem.FS_DEFAULT_NAME_KEY, sourceNN);
FileSystem sourceFS = FileSystem.get(sourceConf);
@ -75,14 +78,20 @@ public class MigrateActionSet {
List<Path> targetPaths = new ArrayList<>();
final List<Path> sourcePaths = getSourcePaths(sourceNN, isLookUp);
log.info(String.format("paths to process:\n%s", sourcePaths.stream().map(p -> p.toString()).collect(Collectors.joining("\n"))));
for(Path source : sourcePaths) {
log.info(
String.format(
"paths to process:\n%s",
sourcePaths.stream()
.map(p -> p.toString())
.collect(Collectors.joining("\n"))));
for (Path source : sourcePaths) {
if (!sourceFS.exists(source)) {
log.warn(String.format("skipping unexisting path: %s", source));
} else {
LinkedList<String> pathQ = Lists.newLinkedList(Splitter.on(SEPARATOR).split(source.toUri().getPath()));
LinkedList<String> pathQ =
Lists.newLinkedList(Splitter.on(SEPARATOR).split(source.toUri().getPath()));
final String rawSet = pathQ.pollLast();
log.info(String.format("got RAWSET: %s", rawSet));
@ -91,7 +100,14 @@ public class MigrateActionSet {
final String actionSetDirectory = pathQ.pollLast();
final Path targetPath = new Path(targetNN + workDir + SEPARATOR + actionSetDirectory + SEPARATOR + rawSet);
final Path targetPath =
new Path(
targetNN
+ workDir
+ SEPARATOR
+ actionSetDirectory
+ SEPARATOR
+ rawSet);
log.info(String.format("using TARGET PATH: %s", targetPath));
@ -99,7 +115,13 @@ public class MigrateActionSet {
if (targetFS.exists(targetPath)) {
targetFS.delete(targetPath, true);
}
runDistcp(distcp_num_maps, distcp_memory_mb, distcp_task_timeout, conf, source, targetPath);
runDistcp(
distcp_num_maps,
distcp_memory_mb,
distcp_task_timeout,
conf,
source,
targetPath);
}
targetPaths.add(targetPath);
@ -107,19 +129,25 @@ public class MigrateActionSet {
}
}
props.setProperty(TARGET_PATHS, targetPaths
.stream()
.map(p -> p.toString())
.collect(Collectors.joining(",")));
props.setProperty(
TARGET_PATHS,
targetPaths.stream().map(p -> p.toString()).collect(Collectors.joining(",")));
File file = new File(System.getProperty("oozie.action.output.properties"));
try(OutputStream os = new FileOutputStream(file)) {
try (OutputStream os = new FileOutputStream(file)) {
props.store(os, "");
}
System.out.println(file.getAbsolutePath());
}
private void runDistcp(Integer distcp_num_maps, String distcp_memory_mb, String distcp_task_timeout, Configuration conf, Path source, Path targetPath) throws Exception {
private void runDistcp(
Integer distcp_num_maps,
String distcp_memory_mb,
String distcp_task_timeout,
Configuration conf,
Path source,
Path targetPath)
throws Exception {
final DistCpOptions op = new DistCpOptions(source, targetPath);
op.setMaxMaps(distcp_num_maps);
@ -127,20 +155,25 @@ public class MigrateActionSet {
op.preserve(DistCpOptions.FileAttribute.REPLICATION);
op.preserve(DistCpOptions.FileAttribute.CHECKSUMTYPE);
int res = ToolRunner.run(new DistCp(conf, op), new String[]{
"-Dmapred.task.timeout=" + distcp_task_timeout,
"-Dmapreduce.map.memory.mb=" + distcp_memory_mb,
"-pb",
"-m " + distcp_num_maps,
source.toString(),
targetPath.toString()});
int res =
ToolRunner.run(
new DistCp(conf, op),
new String[] {
"-Dmapred.task.timeout=" + distcp_task_timeout,
"-Dmapreduce.map.memory.mb=" + distcp_memory_mb,
"-pb",
"-m " + distcp_num_maps,
source.toString(),
targetPath.toString()
});
if (res != 0) {
throw new RuntimeException(String.format("distcp exited with code %s", res));
}
}
private Configuration getConfiguration(String distcp_task_timeout, String distcp_memory_mb, Integer distcp_num_maps) {
private Configuration getConfiguration(
String distcp_task_timeout, String distcp_memory_mb, Integer distcp_num_maps) {
final Configuration conf = new Configuration();
conf.set("dfs.webhdfs.socket.connect-timeout", distcp_task_timeout);
conf.set("dfs.webhdfs.socket.read-timeout", distcp_task_timeout);
@ -151,20 +184,20 @@ public class MigrateActionSet {
return conf;
}
private List<Path> getSourcePaths(String sourceNN, ISLookUpService isLookUp) throws ISLookUpException {
String XQUERY = "distinct-values(\n" +
"let $basePath := collection('/db/DRIVER/ServiceResources/ActionManagerServiceResourceType')//SERVICE_PROPERTIES/PROPERTY[@key = 'basePath']/@value/string()\n" +
"for $x in collection('/db/DRIVER/ActionManagerSetDSResources/ActionManagerSetDSResourceType') \n" +
"let $setDir := $x//SET/@directory/string()\n" +
"let $rawSet := $x//RAW_SETS/LATEST/@id/string()\n" +
"return concat($basePath, '/', $setDir, '/', $rawSet))";
private List<Path> getSourcePaths(String sourceNN, ISLookUpService isLookUp)
throws ISLookUpException {
String XQUERY =
"distinct-values(\n"
+ "let $basePath := collection('/db/DRIVER/ServiceResources/ActionManagerServiceResourceType')//SERVICE_PROPERTIES/PROPERTY[@key = 'basePath']/@value/string()\n"
+ "for $x in collection('/db/DRIVER/ActionManagerSetDSResources/ActionManagerSetDSResourceType') \n"
+ "let $setDir := $x//SET/@directory/string()\n"
+ "let $rawSet := $x//RAW_SETS/LATEST/@id/string()\n"
+ "return concat($basePath, '/', $setDir, '/', $rawSet))";
log.info(String.format("running xquery:\n%s", XQUERY));
return isLookUp.quickSearchProfile(XQUERY)
.stream()
return isLookUp.quickSearchProfile(XQUERY).stream()
.map(p -> sourceNN + p)
.map(Path::new)
.collect(Collectors.toList());
}
}

View File

@ -4,12 +4,11 @@ import com.google.common.collect.Lists;
import com.googlecode.protobuf.format.JsonFormat;
import eu.dnetlib.data.proto.*;
import eu.dnetlib.dhp.schema.oaf.*;
import org.apache.commons.lang3.StringUtils;
import java.io.Serializable;
import java.util.List;
import java.util.Optional;
import java.util.stream.Collectors;
import org.apache.commons.lang3.StringUtils;
public class ProtoConverter implements Serializable {
@ -42,10 +41,12 @@ public class ProtoConverter implements Serializable {
rel.setRelType(r.getRelType().toString());
rel.setSubRelType(r.getSubRelType().toString());
rel.setRelClass(r.getRelClass());
rel.setCollectedFrom(r.getCollectedfromCount() > 0 ?
r.getCollectedfromList().stream()
.map(kv -> mapKV(kv))
.collect(Collectors.toList()) : null);
rel.setCollectedfrom(
r.getCollectedfromCount() > 0
? r.getCollectedfromList().stream()
.map(kv -> mapKV(kv))
.collect(Collectors.toList())
: null);
return rel;
}
@ -71,8 +72,7 @@ public class ProtoConverter implements Serializable {
final ResultProtos.Result r = oaf.getEntity().getResult();
if (r.getInstanceCount() > 0) {
return r.getInstanceList()
.stream()
return r.getInstanceList().stream()
.map(i -> convertInstance(i))
.collect(Collectors.toList());
}
@ -96,15 +96,16 @@ public class ProtoConverter implements Serializable {
}
private static Organization convertOrganization(OafProtos.Oaf oaf) {
final OrganizationProtos.Organization.Metadata m = oaf.getEntity().getOrganization().getMetadata();
final OrganizationProtos.Organization.Metadata m =
oaf.getEntity().getOrganization().getMetadata();
final Organization org = setOaf(new Organization(), oaf);
setEntity(org, oaf);
org.setLegalshortname(mapStringField(m.getLegalshortname()));
org.setLegalname(mapStringField(m.getLegalname()));
org.setAlternativeNames(m.getAlternativeNamesList().
stream()
.map(ProtoConverter::mapStringField)
.collect(Collectors.toList()));
org.setAlternativeNames(
m.getAlternativeNamesList().stream()
.map(ProtoConverter::mapStringField)
.collect(Collectors.toList()));
org.setWebsiteurl(mapStringField(m.getWebsiteurl()));
org.setLogourl(mapStringField(m.getLogourl()));
org.setEclegalbody(mapStringField(m.getEclegalbody()));
@ -112,7 +113,8 @@ public class ProtoConverter implements Serializable {
org.setEcnonprofit(mapStringField(m.getEcnonprofit()));
org.setEcresearchorganization(mapStringField(m.getEcresearchorganization()));
org.setEchighereducation(mapStringField(m.getEchighereducation()));
org.setEcinternationalorganizationeurinterests(mapStringField(m.getEcinternationalorganizationeurinterests()));
org.setEcinternationalorganizationeurinterests(
mapStringField(m.getEcinternationalorganizationeurinterests()));
org.setEcinternationalorganization(mapStringField(m.getEcinternationalorganization()));
org.setEcenterprise(mapStringField(m.getEcenterprise()));
org.setEcsmevalidated(mapStringField(m.getEcsmevalidated()));
@ -123,13 +125,14 @@ public class ProtoConverter implements Serializable {
}
private static Datasource convertDataSource(OafProtos.Oaf oaf) {
final DatasourceProtos.Datasource.Metadata m = oaf.getEntity().getDatasource().getMetadata();
final DatasourceProtos.Datasource.Metadata m =
oaf.getEntity().getDatasource().getMetadata();
final Datasource datasource = setOaf(new Datasource(), oaf);
setEntity(datasource, oaf);
datasource.setAccessinfopackage(m.getAccessinfopackageList()
.stream()
.map(ProtoConverter::mapStringField)
.collect(Collectors.toList()));
datasource.setAccessinfopackage(
m.getAccessinfopackageList().stream()
.map(ProtoConverter::mapStringField)
.collect(Collectors.toList()));
datasource.setCertificates(mapStringField(m.getCertificates()));
datasource.setCitationguidelineurl(mapStringField(m.getCitationguidelineurl()));
datasource.setContactemail(mapStringField(m.getContactemail()));
@ -148,37 +151,36 @@ public class ProtoConverter implements Serializable {
datasource.setLogourl(mapStringField(m.getLogourl()));
datasource.setMissionstatementurl(mapStringField(m.getMissionstatementurl()));
datasource.setNamespaceprefix(mapStringField(m.getNamespaceprefix()));
datasource.setOdcontenttypes(m.getOdcontenttypesList()
.stream()
.map(ProtoConverter::mapStringField)
.collect(Collectors.toList()));
datasource.setOdlanguages(m.getOdlanguagesList()
.stream()
.map(ProtoConverter::mapStringField)
.collect(Collectors.toList()));
datasource.setOdcontenttypes(
m.getOdcontenttypesList().stream()
.map(ProtoConverter::mapStringField)
.collect(Collectors.toList()));
datasource.setOdlanguages(
m.getOdlanguagesList().stream()
.map(ProtoConverter::mapStringField)
.collect(Collectors.toList()));
datasource.setOdnumberofitems(mapStringField(m.getOdnumberofitems()));
datasource.setOdnumberofitemsdate(mapStringField(m.getOdnumberofitemsdate()));
datasource.setOdpolicies(mapStringField(m.getOdpolicies()));
datasource.setOfficialname(mapStringField(m.getOfficialname()));
datasource.setOpenairecompatibility(mapQualifier(m.getOpenairecompatibility()));
datasource.setPidsystems(mapStringField(m.getPidsystems()));
datasource.setPolicies(m.getPoliciesList()
.stream()
.map(ProtoConverter::mapKV)
.collect(Collectors.toList()));
datasource.setPolicies(
m.getPoliciesList().stream()
.map(ProtoConverter::mapKV)
.collect(Collectors.toList()));
datasource.setQualitymanagementkind(mapStringField(m.getQualitymanagementkind()));
datasource.setReleaseenddate(mapStringField(m.getReleaseenddate()));
datasource.setServiceprovider(mapBoolField(m.getServiceprovider()));
datasource.setReleasestartdate(mapStringField(m.getReleasestartdate()));
datasource.setSubjects(m.getSubjectsList()
.stream()
.map(ProtoConverter::mapStructuredProperty)
.collect(Collectors.toList()));
datasource.setSubjects(
m.getSubjectsList().stream()
.map(ProtoConverter::mapStructuredProperty)
.collect(Collectors.toList()));
datasource.setVersioning(mapBoolField(m.getVersioning()));
datasource.setWebsiteurl(mapStringField(m.getWebsiteurl()));
datasource.setJournal(mapJournal(m.getJournal()));
return datasource;
}
@ -204,14 +206,16 @@ public class ProtoConverter implements Serializable {
project.setFundedamount(m.getFundedamount());
project.setTotalcost(m.getTotalcost());
project.setKeywords(mapStringField(m.getKeywords()));
project.setSubjects(m.getSubjectsList().stream()
.map(sp -> mapStructuredProperty(sp))
.collect(Collectors.toList()));
project.setSubjects(
m.getSubjectsList().stream()
.map(sp -> mapStructuredProperty(sp))
.collect(Collectors.toList()));
project.setTitle(mapStringField(m.getTitle()));
project.setWebsiteurl(mapStringField(m.getWebsiteurl()));
project.setFundingtree(m.getFundingtreeList().stream()
.map(f -> mapStringField(f))
.collect(Collectors.toList()));
project.setFundingtree(
m.getFundingtreeList().stream()
.map(f -> mapStringField(f))
.collect(Collectors.toList()));
project.setJsonextrainfo(mapStringField(m.getJsonextrainfo()));
project.setSummary(mapStringField(m.getSummary()));
project.setOptional1(mapStringField(m.getOptional1()));
@ -242,14 +246,14 @@ public class ProtoConverter implements Serializable {
setEntity(software, oaf);
setResult(software, oaf);
software.setDocumentationUrl(m.getDocumentationUrlList()
.stream()
.map(ProtoConverter::mapStringField)
.collect(Collectors.toList()));
software.setLicense(m.getLicenseList()
.stream()
.map(ProtoConverter::mapStructuredProperty)
.collect(Collectors.toList()));
software.setDocumentationUrl(
m.getDocumentationUrlList().stream()
.map(ProtoConverter::mapStringField)
.collect(Collectors.toList()));
software.setLicense(
m.getLicenseList().stream()
.map(ProtoConverter::mapStructuredProperty)
.collect(Collectors.toList()));
software.setCodeRepositoryUrl(mapStringField(m.getCodeRepositoryUrl()));
software.setProgrammingLanguage(mapQualifier(m.getProgrammingLanguage()));
return software;
@ -260,18 +264,18 @@ public class ProtoConverter implements Serializable {
OtherResearchProduct otherResearchProducts = setOaf(new OtherResearchProduct(), oaf);
setEntity(otherResearchProducts, oaf);
setResult(otherResearchProducts, oaf);
otherResearchProducts.setContactperson(m.getContactpersonList()
.stream()
.map(ProtoConverter::mapStringField)
.collect(Collectors.toList()));
otherResearchProducts.setContactgroup(m.getContactgroupList()
.stream()
.map(ProtoConverter::mapStringField)
.collect(Collectors.toList()));
otherResearchProducts.setTool(m.getToolList()
.stream()
.map(ProtoConverter::mapStringField)
.collect(Collectors.toList()));
otherResearchProducts.setContactperson(
m.getContactpersonList().stream()
.map(ProtoConverter::mapStringField)
.collect(Collectors.toList()));
otherResearchProducts.setContactgroup(
m.getContactgroupList().stream()
.map(ProtoConverter::mapStringField)
.collect(Collectors.toList()));
otherResearchProducts.setTool(
m.getToolList().stream()
.map(ProtoConverter::mapStringField)
.collect(Collectors.toList()));
return otherResearchProducts;
}
@ -298,12 +302,11 @@ public class ProtoConverter implements Serializable {
dataset.setVersion(mapStringField(m.getVersion()));
dataset.setLastmetadataupdate(mapStringField(m.getLastmetadataupdate()));
dataset.setMetadataversionnumber(mapStringField(m.getMetadataversionnumber()));
dataset.setGeolocation(m.getGeolocationList()
.stream()
.map(ProtoConverter::mapGeolocation)
.collect(Collectors.toList()));
dataset.setGeolocation(
m.getGeolocationList().stream()
.map(ProtoConverter::mapGeolocation)
.collect(Collectors.toList()));
return dataset;
}
public static <T extends Oaf> T setOaf(T oaf, OafProtos.Oaf o) {
@ -313,100 +316,103 @@ public class ProtoConverter implements Serializable {
}
public static <T extends OafEntity> T setEntity(T entity, OafProtos.Oaf oaf) {
//setting Entity fields
// setting Entity fields
final OafProtos.OafEntity e = oaf.getEntity();
entity.setId(e.getId());
entity.setOriginalId(e.getOriginalIdList());
entity.setCollectedfrom(e.getCollectedfromList()
.stream()
.map(ProtoConverter::mapKV)
.collect(Collectors.toList()));
entity.setPid(e.getPidList().stream()
.map(ProtoConverter::mapStructuredProperty)
.collect(Collectors.toList()));
entity.setCollectedfrom(
e.getCollectedfromList().stream()
.map(ProtoConverter::mapKV)
.collect(Collectors.toList()));
entity.setPid(
e.getPidList().stream()
.map(ProtoConverter::mapStructuredProperty)
.collect(Collectors.toList()));
entity.setDateofcollection(e.getDateofcollection());
entity.setDateoftransformation(e.getDateoftransformation());
entity.setExtraInfo(e.getExtraInfoList()
.stream()
.map(ProtoConverter::mapExtraInfo)
.collect(Collectors.toList()));
entity.setExtraInfo(
e.getExtraInfoList().stream()
.map(ProtoConverter::mapExtraInfo)
.collect(Collectors.toList()));
return entity;
}
public static <T extends Result> T setResult(T entity, OafProtos.Oaf oaf) {
//setting Entity fields
// setting Entity fields
final ResultProtos.Result.Metadata m = oaf.getEntity().getResult().getMetadata();
entity.setAuthor(m.getAuthorList()
.stream()
.map(ProtoConverter::mapAuthor)
.collect(Collectors.toList()));
entity.setAuthor(
m.getAuthorList().stream()
.map(ProtoConverter::mapAuthor)
.collect(Collectors.toList()));
entity.setResulttype(mapQualifier(m.getResulttype()));
entity.setLanguage(mapQualifier(m.getLanguage()));
entity.setCountry(m.getCountryList()
.stream()
.map(ProtoConverter::mapQualifierAsCountry)
.collect(Collectors.toList()));
entity.setSubject(m.getSubjectList()
.stream()
.map(ProtoConverter::mapStructuredProperty)
.collect(Collectors.toList()));
entity.setTitle(m.getTitleList()
.stream()
.map(ProtoConverter::mapStructuredProperty)
.collect(Collectors.toList()));
entity.setRelevantdate(m.getRelevantdateList()
.stream()
.map(ProtoConverter::mapStructuredProperty)
.collect(Collectors.toList()));
entity.setDescription(m.getDescriptionList()
.stream()
.map(ProtoConverter::mapStringField)
.collect(Collectors.toList()));
entity.setCountry(
m.getCountryList().stream()
.map(ProtoConverter::mapQualifierAsCountry)
.collect(Collectors.toList()));
entity.setSubject(
m.getSubjectList().stream()
.map(ProtoConverter::mapStructuredProperty)
.collect(Collectors.toList()));
entity.setTitle(
m.getTitleList().stream()
.map(ProtoConverter::mapStructuredProperty)
.collect(Collectors.toList()));
entity.setRelevantdate(
m.getRelevantdateList().stream()
.map(ProtoConverter::mapStructuredProperty)
.collect(Collectors.toList()));
entity.setDescription(
m.getDescriptionList().stream()
.map(ProtoConverter::mapStringField)
.collect(Collectors.toList()));
entity.setDateofacceptance(mapStringField(m.getDateofacceptance()));
entity.setPublisher(mapStringField(m.getPublisher()));
entity.setEmbargoenddate(mapStringField(m.getEmbargoenddate()));
entity.setSource(m.getSourceList()
.stream()
.map(ProtoConverter::mapStringField)
.collect(Collectors.toList()));
entity.setFulltext(m.getFulltextList()
.stream()
.map(ProtoConverter::mapStringField)
.collect(Collectors.toList()));
entity.setFormat(m.getFormatList()
.stream()
.map(ProtoConverter::mapStringField)
.collect(Collectors.toList()));
entity.setContributor(m.getContributorList()
.stream()
.map(ProtoConverter::mapStringField)
.collect(Collectors.toList()));
entity.setSource(
m.getSourceList().stream()
.map(ProtoConverter::mapStringField)
.collect(Collectors.toList()));
entity.setFulltext(
m.getFulltextList().stream()
.map(ProtoConverter::mapStringField)
.collect(Collectors.toList()));
entity.setFormat(
m.getFormatList().stream()
.map(ProtoConverter::mapStringField)
.collect(Collectors.toList()));
entity.setContributor(
m.getContributorList().stream()
.map(ProtoConverter::mapStringField)
.collect(Collectors.toList()));
entity.setResourcetype(mapQualifier(m.getResourcetype()));
entity.setCoverage(m.getCoverageList()
.stream()
.map(ProtoConverter::mapStringField)
.collect(Collectors.toList()));
entity.setContext(m.getContextList()
.stream()
.map(ProtoConverter::mapContext)
.collect(Collectors.toList()));
entity.setCoverage(
m.getCoverageList().stream()
.map(ProtoConverter::mapStringField)
.collect(Collectors.toList()));
entity.setContext(
m.getContextList().stream()
.map(ProtoConverter::mapContext)
.collect(Collectors.toList()));
entity.setBestaccessright(getBestAccessRights(oaf.getEntity().getResult().getInstanceList()));
entity.setBestaccessright(
getBestAccessRights(oaf.getEntity().getResult().getInstanceList()));
return entity;
}
private static Qualifier getBestAccessRights(List<ResultProtos.Result.Instance> instanceList) {
if (instanceList != null) {
final Optional<FieldTypeProtos.Qualifier> min = instanceList.stream()
.map(i -> i.getAccessright()).min(new LicenseComparator());
final Optional<FieldTypeProtos.Qualifier> min =
instanceList.stream().map(i -> i.getAccessright()).min(new LicenseComparator());
final Qualifier rights = min.isPresent() ? mapQualifier(min.get()) : new Qualifier();
if (StringUtils.isBlank(rights.getClassid())) {
rights.setClassid(UNKNOWN);
}
if (StringUtils.isBlank(rights.getClassname()) || UNKNOWN.equalsIgnoreCase(rights.getClassname())) {
if (StringUtils.isBlank(rights.getClassname())
|| UNKNOWN.equalsIgnoreCase(rights.getClassname())) {
rights.setClassname(NOT_AVAILABLE);
}
if (StringUtils.isBlank(rights.getSchemeid())) {
@ -425,14 +431,13 @@ public class ProtoConverter implements Serializable {
final Context entity = new Context();
entity.setId(context.getId());
entity.setDataInfo(context.getDataInfoList()
.stream()
.map(ProtoConverter::mapDataInfo)
.collect(Collectors.toList()));
entity.setDataInfo(
context.getDataInfoList().stream()
.map(ProtoConverter::mapDataInfo)
.collect(Collectors.toList()));
return entity;
}
public static KeyValue mapKV(FieldTypeProtos.KeyValue kv) {
final KeyValue keyValue = new KeyValue();
keyValue.setKey(kv.getKey());
@ -495,7 +500,8 @@ public class ProtoConverter implements Serializable {
return entity;
}
public static OriginDescription mapOriginalDescription(FieldTypeProtos.OAIProvenance.OriginDescription originDescription) {
public static OriginDescription mapOriginalDescription(
FieldTypeProtos.OAIProvenance.OriginDescription originDescription) {
final OriginDescription originDescriptionResult = new OriginDescription();
originDescriptionResult.setHarvestDate(originDescription.getHarvestDate());
originDescriptionResult.setAltered(originDescription.getAltered());
@ -550,24 +556,24 @@ public class ProtoConverter implements Serializable {
entity.setName(author.getName());
entity.setSurname(author.getSurname());
entity.setRank(author.getRank());
entity.setPid(author.getPidList()
.stream()
.map(kv -> {
final StructuredProperty sp = new StructuredProperty();
sp.setValue(kv.getValue());
final Qualifier q = new Qualifier();
q.setClassid(kv.getKey());
q.setClassname(kv.getKey());
sp.setQualifier(q);
return sp;
})
.collect(Collectors.toList()));
entity.setAffiliation(author.getAffiliationList()
.stream()
.map(ProtoConverter::mapStringField)
.collect(Collectors.toList()));
entity.setPid(
author.getPidList().stream()
.map(
kv -> {
final StructuredProperty sp = new StructuredProperty();
sp.setValue(kv.getValue());
final Qualifier q = new Qualifier();
q.setClassid(kv.getKey());
q.setClassname(kv.getKey());
sp.setQualifier(q);
return sp;
})
.collect(Collectors.toList()));
entity.setAffiliation(
author.getAffiliationList().stream()
.map(ProtoConverter::mapStringField)
.collect(Collectors.toList()));
return entity;
}
public static GeoLocation mapGeolocation(ResultProtos.Result.GeoLocation geoLocation) {

View File

@ -5,14 +5,17 @@ import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.common.base.Splitter;
import com.google.common.collect.Lists;
import com.google.protobuf.InvalidProtocolBufferException;
import eu.dnetlib.dhp.schema.action.AtomicAction;
import eu.dnetlib.data.proto.OafProtos;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.schema.action.AtomicAction;
import eu.dnetlib.dhp.schema.oaf.*;
import eu.dnetlib.dhp.utils.ISLookupClientFactory;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
import java.io.IOException;
import java.io.Serializable;
import java.util.LinkedList;
import java.util.Objects;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.logging.Log;
@ -21,16 +24,9 @@ import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.compress.GzipCodec;
import org.apache.hadoop.mapred.SequenceFileOutputFormat;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.SparkSession;
import scala.Tuple2;
import java.io.IOException;
import java.io.Serializable;
import java.util.LinkedList;
import java.util.Objects;
public class TransformActions implements Serializable {
@ -38,9 +34,11 @@ public class TransformActions implements Serializable {
private static final String SEPARATOR = "/";
public static void main(String[] args) throws Exception {
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
IOUtils.toString(MigrateActionSet.class.getResourceAsStream(
"/eu/dnetlib/dhp/migration/transform_actionsets_parameters.json")));
final ArgumentApplicationParser parser =
new ArgumentApplicationParser(
IOUtils.toString(
MigrateActionSet.class.getResourceAsStream(
"/eu/dnetlib/dhp/migration/transform_actionsets_parameters.json")));
parser.parseArgument(args);
new TransformActions().run(parser);
@ -51,7 +49,7 @@ public class TransformActions implements Serializable {
final String isLookupUrl = parser.get("isLookupUrl");
log.info("isLookupUrl: " + isLookupUrl);
final String inputPaths = parser.get("inputPaths");
final String inputPaths = parser.get("inputPaths");
if (StringUtils.isBlank(inputPaths)) {
throw new RuntimeException("empty inputPaths");
@ -60,18 +58,25 @@ public class TransformActions implements Serializable {
final String targetBaseDir = getTargetBaseDir(isLookupUrl);
try(SparkSession spark = getSparkSession(parser)) {
try (SparkSession spark = getSparkSession(parser)) {
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
final FileSystem fs = FileSystem.get(spark.sparkContext().hadoopConfiguration());
for(String sourcePath : Lists.newArrayList(Splitter.on(",").split(inputPaths))) {
for (String sourcePath : Lists.newArrayList(Splitter.on(",").split(inputPaths))) {
LinkedList<String> pathQ = Lists.newLinkedList(Splitter.on(SEPARATOR).split(sourcePath));
LinkedList<String> pathQ =
Lists.newLinkedList(Splitter.on(SEPARATOR).split(sourcePath));
final String rawset = pathQ.pollLast();
final String actionSetDirectory = pathQ.pollLast();
final Path targetDirectory = new Path(targetBaseDir + SEPARATOR + actionSetDirectory + SEPARATOR + rawset);
final Path targetDirectory =
new Path(
targetBaseDir
+ SEPARATOR
+ actionSetDirectory
+ SEPARATOR
+ rawset);
if (fs.exists(targetDirectory)) {
log.info(String.format("found target directory '%s", targetDirectory));
@ -79,20 +84,27 @@ public class TransformActions implements Serializable {
log.info(String.format("deleted target directory '%s", targetDirectory));
}
log.info(String.format("transforming actions from '%s' to '%s'", sourcePath, targetDirectory));
log.info(
String.format(
"transforming actions from '%s' to '%s'",
sourcePath, targetDirectory));
sc.sequenceFile(sourcePath, Text.class, Text.class)
.map(a -> eu.dnetlib.actionmanager.actions.AtomicAction.fromJSON(a._2().toString()))
.map(a -> doTransform(a))
.filter(Objects::isNull)
.filter(a -> a.getPayload() == null)
.map(a -> new ObjectMapper().writeValueAsString(a))
.saveAsTextFile(targetDirectory.toString(), GzipCodec.class);
.map(
a ->
eu.dnetlib.actionmanager.actions.AtomicAction.fromJSON(
a._2().toString()))
.map(a -> doTransform(a))
.filter(Objects::isNull)
.filter(a -> a.getPayload() == null)
.map(a -> new ObjectMapper().writeValueAsString(a))
.saveAsTextFile(targetDirectory.toString(), GzipCodec.class);
}
}
}
private Text transformAction(eu.dnetlib.actionmanager.actions.AtomicAction aa) throws InvalidProtocolBufferException, JsonProcessingException {
private Text transformAction(eu.dnetlib.actionmanager.actions.AtomicAction aa)
throws InvalidProtocolBufferException, JsonProcessingException {
final Text out = new Text();
final ObjectMapper mapper = new ObjectMapper();
if (aa.getTargetValue() != null && aa.getTargetValue().length > 0) {
@ -135,7 +147,8 @@ public class TransformActions implements Serializable {
return new AtomicAction<>(Relation.class, rel);
}
private AtomicAction doTransform(eu.dnetlib.actionmanager.actions.AtomicAction aa) throws InvalidProtocolBufferException {
private AtomicAction doTransform(eu.dnetlib.actionmanager.actions.AtomicAction aa)
throws InvalidProtocolBufferException {
final OafProtos.Oaf proto_oaf = OafProtos.Oaf.parseFrom(aa.getTargetValue());
final Oaf oaf = ProtoConverter.convert(proto_oaf);
switch (proto_oaf.getKind()) {
@ -148,14 +161,21 @@ public class TransformActions implements Serializable {
case project:
return new AtomicAction<>(Project.class, (Project) oaf);
case result:
final String resulttypeid = proto_oaf.getEntity().getResult().getMetadata().getResulttype().getClassid();
final String resulttypeid =
proto_oaf
.getEntity()
.getResult()
.getMetadata()
.getResulttype()
.getClassid();
switch (resulttypeid) {
case "publication":
return new AtomicAction<>(Publication.class, (Publication) oaf);
case "software":
return new AtomicAction<>(Software.class, (Software) oaf);
case "other":
return new AtomicAction<>(OtherResearchProduct.class, (OtherResearchProduct) oaf);
return new AtomicAction<>(
OtherResearchProduct.class, (OtherResearchProduct) oaf);
case "dataset":
return new AtomicAction<>(Dataset.class, (Dataset) oaf);
default:
@ -163,7 +183,8 @@ public class TransformActions implements Serializable {
return new AtomicAction<>(Result.class, (Result) oaf);
}
default:
throw new IllegalArgumentException("invalid entity type: " + proto_oaf.getEntity().getType());
throw new IllegalArgumentException(
"invalid entity type: " + proto_oaf.getEntity().getType());
}
case relation:
return new AtomicAction<>(Relation.class, (Relation) oaf);
@ -174,15 +195,15 @@ public class TransformActions implements Serializable {
private String getTargetBaseDir(String isLookupUrl) throws ISLookUpException {
ISLookUpService isLookUp = ISLookupClientFactory.getLookUpService(isLookupUrl);
String XQUERY = "collection('/db/DRIVER/ServiceResources/ActionManagerServiceResourceType')//SERVICE_PROPERTIES/PROPERTY[@key = 'basePath']/@value/string()";
String XQUERY =
"collection('/db/DRIVER/ServiceResources/ActionManagerServiceResourceType')//SERVICE_PROPERTIES/PROPERTY[@key = 'basePath']/@value/string()";
return isLookUp.getResourceProfileByQuery(XQUERY);
}
private static SparkSession getSparkSession(ArgumentApplicationParser parser) {
SparkConf conf = new SparkConf();
return SparkSession
.builder()
return SparkSession.builder()
.appName(TransformActions.class.getSimpleName())
.master(parser.get("master"))
.config(conf)

Some files were not shown because too many files have changed in this diff Show More