1
0
Fork 0

Merge remote-tracking branch 'upstream/master'

This commit is contained in:
Miriam Baglioni 2020-04-18 14:09:44 +02:00
commit 454b8a6a29
237 changed files with 14978 additions and 9912 deletions

View File

@ -3,7 +3,6 @@ package eu.dnetlib.maven.plugin.properties;
import java.io.File;
import java.util.ArrayList;
import java.util.List;
import org.apache.commons.lang.ArrayUtils;
import org.apache.commons.lang.StringUtils;
import org.apache.maven.plugin.AbstractMojo;
@ -12,8 +11,8 @@ import org.apache.maven.plugin.MojoFailureException;
/**
* Generates oozie properties which were not provided from commandline.
* @author mhorst
*
* @author mhorst
* @goal generate-properties
*/
public class GenerateOoziePropertiesMojo extends AbstractMojo {
@ -25,39 +24,43 @@ public class GenerateOoziePropertiesMojo extends AbstractMojo {
@Override
public void execute() throws MojoExecutionException, MojoFailureException {
if (System.getProperties().containsKey(PROPERTY_NAME_WF_SOURCE_DIR) &&
!System.getProperties().containsKey(PROPERTY_NAME_SANDBOX_NAME)) {
String generatedSandboxName = generateSandboxName(System.getProperties().getProperty(
PROPERTY_NAME_WF_SOURCE_DIR));
if (generatedSandboxName!=null) {
System.getProperties().setProperty(PROPERTY_NAME_SANDBOX_NAME,
generatedSandboxName);
} else {
System.out.println("unable to generate sandbox name from path: " +
if (System.getProperties().containsKey(PROPERTY_NAME_WF_SOURCE_DIR)
&& !System.getProperties().containsKey(PROPERTY_NAME_SANDBOX_NAME)) {
String generatedSandboxName =
generateSandboxName(
System.getProperties().getProperty(PROPERTY_NAME_WF_SOURCE_DIR));
if (generatedSandboxName != null) {
System.getProperties()
.setProperty(PROPERTY_NAME_SANDBOX_NAME, generatedSandboxName);
} else {
System.out.println(
"unable to generate sandbox name from path: "
+ System.getProperties().getProperty(PROPERTY_NAME_WF_SOURCE_DIR));
}
}
}
/**
* Generates sandbox name from workflow source directory.
*
* @param wfSourceDir
* @return generated sandbox name
*/
private String generateSandboxName(String wfSourceDir) {
// utilize all dir names until finding one of the limiters
// utilize all dir names until finding one of the limiters
List<String> sandboxNameParts = new ArrayList<String>();
String[] tokens = StringUtils.split(wfSourceDir, File.separatorChar);
ArrayUtils.reverse(tokens);
if (tokens.length>0) {
if (tokens.length > 0) {
for (String token : tokens) {
for (String limiter : limiters) {
if (limiter.equals(token)) {
return sandboxNameParts.size()>0?
StringUtils.join(sandboxNameParts.toArray()):null;
return sandboxNameParts.size() > 0
? StringUtils.join(sandboxNameParts.toArray())
: null;
}
}
if (sandboxNameParts.size()>0) {
if (sandboxNameParts.size() > 0) {
sandboxNameParts.add(0, File.separator);
}
sandboxNameParts.add(0, token);
@ -67,5 +70,4 @@ public class GenerateOoziePropertiesMojo extends AbstractMojo {
return null;
}
}
}

View File

@ -1,19 +1,17 @@
/**
* Licensed under the Educational Community License, Version 2.0 (the "License"); you may not use
* this file except in compliance with the License. You may obtain a copy of the License at
*
* Licensed under the Educational Community License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* <p>http://www.opensource.org/licenses/ecl2.php
*
* http://www.opensource.org/licenses/ecl2.php
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* <p>Unless required by applicable law or agreed to in writing, software distributed under the
* License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
* express or implied. See the License for the specific language governing permissions and
* limitations under the License.
*/
package eu.dnetlib.maven.plugin.properties;
import edu.umd.cs.findbugs.annotations.SuppressFBWarnings;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
@ -26,7 +24,6 @@ import java.util.List;
import java.util.Map.Entry;
import java.util.Properties;
import java.util.Set;
import org.apache.commons.io.FileUtils;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang.StringUtils;
@ -38,13 +35,10 @@ import org.springframework.core.io.DefaultResourceLoader;
import org.springframework.core.io.Resource;
import org.springframework.core.io.ResourceLoader;
import edu.umd.cs.findbugs.annotations.SuppressFBWarnings;
/**
* Writes project properties for the keys listed in specified properties files.
* Based on:
* Writes project properties for the keys listed in specified properties files. Based on:
* http://site.kuali.org/maven/plugins/properties-maven-plugin/1.3.2/write-project-properties-mojo.html
*
* @author mhorst
* @goal write-project-properties
*/
@ -56,9 +50,7 @@ public class WritePredefinedProjectProperties extends AbstractMojo {
protected static final String PROPERTY_PREFIX_ENV = "env.";
private static final String ENCODING_UTF8 = "utf8";
/**
* @parameter property="properties.includePropertyKeysFromFiles"
*/
/** @parameter property="properties.includePropertyKeysFromFiles" */
private String[] includePropertyKeysFromFiles;
/**
@ -78,31 +70,33 @@ public class WritePredefinedProjectProperties extends AbstractMojo {
protected File outputFile;
/**
* If true, the plugin will silently ignore any non-existent properties files, and the build will continue
* If true, the plugin will silently ignore any non-existent properties files, and the build
* will continue
*
* @parameter property="properties.quiet" default-value="true"
*/
private boolean quiet;
/**
* Comma separated list of characters to escape when writing property values. cr=carriage return, lf=linefeed,
* tab=tab. Any other values are taken literally.
* Comma separated list of characters to escape when writing property values. cr=carriage
* return, lf=linefeed, tab=tab. Any other values are taken literally.
*
* @parameter default-value="cr,lf,tab" property="properties.escapeChars"
*/
private String escapeChars;
/**
* If true, the plugin will include system properties when writing the properties file. System properties override
* both environment variables and project properties.
* If true, the plugin will include system properties when writing the properties file. System
* properties override both environment variables and project properties.
*
* @parameter default-value="false" property="properties.includeSystemProperties"
*/
private boolean includeSystemProperties;
/**
* If true, the plugin will include environment variables when writing the properties file. Environment variables
* are prefixed with "env". Environment variables override project properties.
* If true, the plugin will include environment variables when writing the properties file.
* Environment variables are prefixed with "env". Environment variables override project
* properties.
*
* @parameter default-value="false" property="properties.includeEnvironmentVariables"
*/
@ -116,8 +110,8 @@ public class WritePredefinedProjectProperties extends AbstractMojo {
private String exclude;
/**
* Comma separated set of properties to write to the properties file. If provided, only the properties matching
* those supplied here will be written to the properties file.
* Comma separated set of properties to write to the properties file. If provided, only the
* properties matching those supplied here will be written to the properties file.
*
* @parameter property="properties.include"
*/
@ -127,7 +121,7 @@ public class WritePredefinedProjectProperties extends AbstractMojo {
* @see org.apache.maven.plugin.AbstractMojo#execute()
*/
@Override
@SuppressFBWarnings({"NP_UNWRITTEN_FIELD","UWF_UNWRITTEN_FIELD"})
@SuppressFBWarnings({"NP_UNWRITTEN_FIELD", "UWF_UNWRITTEN_FIELD"})
public void execute() throws MojoExecutionException, MojoFailureException {
Properties properties = new Properties();
// Add project properties
@ -153,6 +147,7 @@ public class WritePredefinedProjectProperties extends AbstractMojo {
/**
* Provides environment variables.
*
* @return environment variables
*/
protected static Properties getEnvironmentVariables() {
@ -165,20 +160,22 @@ public class WritePredefinedProjectProperties extends AbstractMojo {
/**
* Removes properties which should not be written.
*
* @param properties
* @param omitCSV
* @param includeCSV
* @throws MojoExecutionException
*/
protected void trim(Properties properties, String omitCSV, String includeCSV) throws MojoExecutionException {
protected void trim(Properties properties, String omitCSV, String includeCSV)
throws MojoExecutionException {
List<String> omitKeys = getListFromCSV(omitCSV);
for (String key : omitKeys) {
properties.remove(key);
}
List<String> includeKeys = getListFromCSV(includeCSV);
// mh: including keys from predefined properties
if (includePropertyKeysFromFiles!=null && includePropertyKeysFromFiles.length>0) {
// mh: including keys from predefined properties
if (includePropertyKeysFromFiles != null && includePropertyKeysFromFiles.length > 0) {
for (String currentIncludeLoc : includePropertyKeysFromFiles) {
if (validate(currentIncludeLoc)) {
Properties p = getProperties(currentIncludeLoc);
@ -188,8 +185,8 @@ public class WritePredefinedProjectProperties extends AbstractMojo {
}
}
}
if (includeKeys!=null && !includeKeys.isEmpty()) {
// removing only when include keys provided
if (includeKeys != null && !includeKeys.isEmpty()) {
// removing only when include keys provided
Set<String> keys = properties.stringPropertyNames();
for (String key : keys) {
if (!includeKeys.contains(key)) {
@ -201,6 +198,7 @@ public class WritePredefinedProjectProperties extends AbstractMojo {
/**
* Checks whether file exists.
*
* @param location
* @return true when exists, false otherwise.
*/
@ -219,6 +217,7 @@ public class WritePredefinedProjectProperties extends AbstractMojo {
/**
* Validates resource location.
*
* @param location
* @return true when valid, false otherwise
* @throws MojoExecutionException
@ -238,6 +237,7 @@ public class WritePredefinedProjectProperties extends AbstractMojo {
/**
* Provides input stream.
*
* @param location
* @return input stream
* @throws IOException
@ -254,6 +254,7 @@ public class WritePredefinedProjectProperties extends AbstractMojo {
/**
* Creates properties for given location.
*
* @param location
* @return properties for given location
* @throws MojoExecutionException
@ -278,6 +279,7 @@ public class WritePredefinedProjectProperties extends AbstractMojo {
/**
* Provides escape characters.
*
* @param escapeChars
* @return escape characters
*/
@ -293,6 +295,7 @@ public class WritePredefinedProjectProperties extends AbstractMojo {
/**
* Provides real token.
*
* @param token
* @return real token
*/
@ -310,6 +313,7 @@ public class WritePredefinedProjectProperties extends AbstractMojo {
/**
* Returns content.
*
* @param comment
* @param properties
* @param escapeTokens
@ -332,13 +336,15 @@ public class WritePredefinedProjectProperties extends AbstractMojo {
/**
* Writes properties to given file.
*
* @param file
* @param comment
* @param properties
* @param escapeTokens
* @throws MojoExecutionException
*/
protected void writeProperties(File file, String comment, Properties properties, List<String> escapeTokens)
protected void writeProperties(
File file, String comment, Properties properties, List<String> escapeTokens)
throws MojoExecutionException {
try {
String content = getContent(comment, properties, escapeTokens);
@ -350,6 +356,7 @@ public class WritePredefinedProjectProperties extends AbstractMojo {
/**
* Escapes characters.
*
* @param s
* @param escapeChars
* @return
@ -364,6 +371,7 @@ public class WritePredefinedProjectProperties extends AbstractMojo {
/**
* Provides replacement token.
*
* @param escapeChar
* @return replacement token
*/
@ -381,6 +389,7 @@ public class WritePredefinedProjectProperties extends AbstractMojo {
/**
* Returns list from csv.
*
* @param csv
* @return list of values generated from CSV
*/
@ -422,15 +431,14 @@ public class WritePredefinedProjectProperties extends AbstractMojo {
/**
* Sets property files for which keys properties should be included.
*
* @param includePropertyKeysFromFiles
*/
public void setIncludePropertyKeysFromFiles(
String[] includePropertyKeysFromFiles) {
if (includePropertyKeysFromFiles!=null) {
this.includePropertyKeysFromFiles = Arrays.copyOf(
includePropertyKeysFromFiles,
includePropertyKeysFromFiles.length);
public void setIncludePropertyKeysFromFiles(String[] includePropertyKeysFromFiles) {
if (includePropertyKeysFromFiles != null) {
this.includePropertyKeysFromFiles =
Arrays.copyOf(
includePropertyKeysFromFiles, includePropertyKeysFromFiles.length);
}
}
}

View File

@ -1,16 +1,13 @@
package eu.dnetlib.maven.plugin.properties;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import static eu.dnetlib.maven.plugin.properties.GenerateOoziePropertiesMojo.PROPERTY_NAME_SANDBOX_NAME;
import static eu.dnetlib.maven.plugin.properties.GenerateOoziePropertiesMojo.PROPERTY_NAME_WF_SOURCE_DIR;
import static org.junit.jupiter.api.Assertions.*;
/**
* @author mhorst, claudio.atzori
*
*/
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
/** @author mhorst, claudio.atzori */
public class GenerateOoziePropertiesMojoTest {
private GenerateOoziePropertiesMojo mojo = new GenerateOoziePropertiesMojo();
@ -96,5 +93,4 @@ public class GenerateOoziePropertiesMojoTest {
// assert
assertEquals("wf/transformers", System.getProperty(PROPERTY_NAME_SANDBOX_NAME));
}
}

View File

@ -1,5 +1,12 @@
package eu.dnetlib.maven.plugin.properties;
import static eu.dnetlib.maven.plugin.properties.WritePredefinedProjectProperties.PROPERTY_PREFIX_ENV;
import static org.junit.jupiter.api.Assertions.*;
import static org.mockito.Mockito.doReturn;
import static org.mockito.Mockito.lenient;
import java.io.*;
import java.util.Properties;
import org.apache.maven.plugin.MojoExecutionException;
import org.apache.maven.project.MavenProject;
import org.junit.jupiter.api.*;
@ -9,23 +16,11 @@ import org.mockito.Mock;
import org.mockito.MockitoAnnotations;
import org.mockito.junit.jupiter.MockitoExtension;
import java.io.*;
import java.util.Properties;
import static eu.dnetlib.maven.plugin.properties.WritePredefinedProjectProperties.PROPERTY_PREFIX_ENV;
import static org.junit.jupiter.api.Assertions.*;
import static org.mockito.Mockito.doReturn;
import static org.mockito.Mockito.lenient;
/**
* @author mhorst, claudio.atzori
*
*/
/** @author mhorst, claudio.atzori */
@ExtendWith(MockitoExtension.class)
public class WritePredefinedProjectPropertiesTest {
@Mock
private MavenProject mavenProject;
@Mock private MavenProject mavenProject;
private WritePredefinedProjectProperties mojo;
@ -86,7 +81,8 @@ public class WritePredefinedProjectPropertiesTest {
}
@Test
public void testExecuteWithProjectPropertiesExclusion(@TempDir File testFolder) throws Exception {
public void testExecuteWithProjectPropertiesExclusion(@TempDir File testFolder)
throws Exception {
// given
String key = "projectPropertyKey";
String value = "projectPropertyValue";
@ -110,7 +106,8 @@ public class WritePredefinedProjectPropertiesTest {
}
@Test
public void testExecuteWithProjectPropertiesInclusion(@TempDir File testFolder) throws Exception {
public void testExecuteWithProjectPropertiesInclusion(@TempDir File testFolder)
throws Exception {
// given
String key = "projectPropertyKey";
String value = "projectPropertyValue";
@ -134,7 +131,8 @@ public class WritePredefinedProjectPropertiesTest {
}
@Test
public void testExecuteIncludingPropertyKeysFromFile(@TempDir File testFolder) throws Exception {
public void testExecuteIncludingPropertyKeysFromFile(@TempDir File testFolder)
throws Exception {
// given
String key = "projectPropertyKey";
String value = "projectPropertyValue";
@ -150,7 +148,8 @@ public class WritePredefinedProjectPropertiesTest {
includedProperties.setProperty(includedKey, "irrelevantValue");
includedProperties.store(new FileWriter(includedPropertiesFile), null);
mojo.setIncludePropertyKeysFromFiles(new String[] {includedPropertiesFile.getAbsolutePath()});
mojo.setIncludePropertyKeysFromFiles(
new String[] {includedPropertiesFile.getAbsolutePath()});
// execute
mojo.execute();
@ -164,7 +163,8 @@ public class WritePredefinedProjectPropertiesTest {
}
@Test
public void testExecuteIncludingPropertyKeysFromClasspathResource(@TempDir File testFolder) throws Exception {
public void testExecuteIncludingPropertyKeysFromClasspathResource(@TempDir File testFolder)
throws Exception {
// given
String key = "projectPropertyKey";
String value = "projectPropertyValue";
@ -175,7 +175,8 @@ public class WritePredefinedProjectPropertiesTest {
projectProperties.setProperty(includedKey, includedValue);
doReturn(projectProperties).when(mavenProject).getProperties();
mojo.setIncludePropertyKeysFromFiles(new String[] {"/eu/dnetlib/maven/plugin/properties/included.properties"});
mojo.setIncludePropertyKeysFromFiles(
new String[] {"/eu/dnetlib/maven/plugin/properties/included.properties"});
// execute
mojo.execute();
@ -207,7 +208,8 @@ public class WritePredefinedProjectPropertiesTest {
}
@Test
public void testExecuteIncludingPropertyKeysFromXmlFile(@TempDir File testFolder) throws Exception {
public void testExecuteIncludingPropertyKeysFromXmlFile(@TempDir File testFolder)
throws Exception {
// given
String key = "projectPropertyKey";
String value = "projectPropertyValue";
@ -223,7 +225,8 @@ public class WritePredefinedProjectPropertiesTest {
includedProperties.setProperty(includedKey, "irrelevantValue");
includedProperties.storeToXML(new FileOutputStream(includedPropertiesFile), null);
mojo.setIncludePropertyKeysFromFiles(new String[] {includedPropertiesFile.getAbsolutePath()});
mojo.setIncludePropertyKeysFromFiles(
new String[] {includedPropertiesFile.getAbsolutePath()});
// execute
mojo.execute();
@ -237,7 +240,8 @@ public class WritePredefinedProjectPropertiesTest {
}
@Test
public void testExecuteIncludingPropertyKeysFromInvalidXmlFile(@TempDir File testFolder) throws Exception {
public void testExecuteIncludingPropertyKeysFromInvalidXmlFile(@TempDir File testFolder)
throws Exception {
// given
String key = "projectPropertyKey";
String value = "projectPropertyValue";
@ -253,7 +257,8 @@ public class WritePredefinedProjectPropertiesTest {
includedProperties.setProperty(includedKey, "irrelevantValue");
includedProperties.store(new FileOutputStream(includedPropertiesFile), null);
mojo.setIncludePropertyKeysFromFiles(new String[] {includedPropertiesFile.getAbsolutePath()});
mojo.setIncludePropertyKeysFromFiles(
new String[] {includedPropertiesFile.getAbsolutePath()});
// execute
Assertions.assertThrows(MojoExecutionException.class, () -> mojo.execute());
@ -296,7 +301,7 @@ public class WritePredefinedProjectPropertiesTest {
Properties storedProperties = getStoredProperties(testFolder);
assertTrue(storedProperties.size() > 0);
for (Object currentKey : storedProperties.keySet()) {
assertTrue(((String)currentKey).startsWith(PROPERTY_PREFIX_ENV));
assertTrue(((String) currentKey).startsWith(PROPERTY_PREFIX_ENV));
}
}
@ -320,7 +325,8 @@ public class WritePredefinedProjectPropertiesTest {
}
@Test
public void testExecuteWithSystemPropertiesAndEscapeChars(@TempDir File testFolder) throws Exception {
public void testExecuteWithSystemPropertiesAndEscapeChars(@TempDir File testFolder)
throws Exception {
// given
String key = "systemPropertyKey ";
String value = "systemPropertyValue";
@ -347,7 +353,8 @@ public class WritePredefinedProjectPropertiesTest {
return new File(testFolder, "test.properties");
}
private Properties getStoredProperties(File testFolder) throws FileNotFoundException, IOException {
private Properties getStoredProperties(File testFolder)
throws FileNotFoundException, IOException {
Properties properties = new Properties();
properties.load(new FileInputStream(getPropertiesFileLocation(testFolder)));
return properties;

View File

@ -3,7 +3,6 @@ package eu.dnetlib.collector.worker.model;
import java.util.HashMap;
import java.util.Map;
public class ApiDescriptor {
private String id;
@ -45,5 +44,4 @@ public class ApiDescriptor {
public void setProtocol(final String protocol) {
this.protocol = protocol;
}
}

View File

@ -2,7 +2,6 @@ package eu.dnetlib.data.mdstore.manager.common.model;
import java.io.Serializable;
import java.util.UUID;
import javax.persistence.Column;
import javax.persistence.Entity;
import javax.persistence.Id;
@ -12,9 +11,7 @@ import javax.persistence.Table;
@Table(name = "mdstores")
public class MDStore implements Serializable {
/**
*
*/
/** */
private static final long serialVersionUID = 3160530489149700055L;
@Id
@ -95,11 +92,13 @@ public class MDStore implements Serializable {
this.apiId = apiId;
}
public static MDStore newInstance(final String format, final String layout, final String interpretation) {
public static MDStore newInstance(
final String format, final String layout, final String interpretation) {
return newInstance(format, layout, interpretation, null, null, null);
}
public static MDStore newInstance(final String format,
public static MDStore newInstance(
final String format,
final String layout,
final String interpretation,
final String dsName,
@ -115,5 +114,4 @@ public class MDStore implements Serializable {
md.setApiId(apiId);
return md;
}
}

View File

@ -1,7 +1,6 @@
package eu.dnetlib.data.mdstore.manager.common.model;
import java.io.Serializable;
import javax.persistence.Column;
import javax.persistence.Entity;
import javax.persistence.Id;
@ -11,9 +10,7 @@ import javax.persistence.Table;
@Table(name = "mdstore_current_versions")
public class MDStoreCurrentVersion implements Serializable {
/**
*
*/
/** */
private static final long serialVersionUID = -4757725888593745773L;
@Id

View File

@ -2,7 +2,6 @@ package eu.dnetlib.data.mdstore.manager.common.model;
import java.io.Serializable;
import java.util.Date;
import javax.persistence.Column;
import javax.persistence.Entity;
import javax.persistence.Id;
@ -14,9 +13,7 @@ import javax.persistence.TemporalType;
@Table(name = "mdstore_versions")
public class MDStoreVersion implements Serializable {
/**
*
*/
/** */
private static final long serialVersionUID = -4763494442274298339L;
@Id
@ -97,5 +94,4 @@ public class MDStoreVersion implements Serializable {
public void setSize(final long size) {
this.size = size;
}
}

View File

@ -2,7 +2,6 @@ package eu.dnetlib.data.mdstore.manager.common.model;
import java.io.Serializable;
import java.util.Date;
import javax.persistence.Column;
import javax.persistence.Entity;
import javax.persistence.Id;
@ -14,9 +13,7 @@ import javax.persistence.TemporalType;
@Table(name = "mdstores_with_info")
public class MDStoreWithInfo implements Serializable {
/**
*
*/
/** */
private static final long serialVersionUID = -8445784770687571492L;
@Id
@ -141,5 +138,4 @@ public class MDStoreWithInfo implements Serializable {
public void setNumberOfVersions(final long numberOfVersions) {
this.numberOfVersions = numberOfVersions;
}
}

View File

@ -1,10 +1,6 @@
package eu.dnetlib.dhp.application;
import com.fasterxml.jackson.databind.ObjectMapper;
import org.apache.commons.cli.*;
import org.apache.commons.codec.binary.Base64;
import org.apache.commons.io.IOUtils;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.Serializable;
@ -12,7 +8,9 @@ import java.io.StringWriter;
import java.util.*;
import java.util.zip.GZIPInputStream;
import java.util.zip.GZIPOutputStream;
import java.util.zip.Inflater;
import org.apache.commons.cli.*;
import org.apache.commons.codec.binary.Base64;
import org.apache.commons.io.IOUtils;
public class ArgumentApplicationParser implements Serializable {
@ -23,7 +21,8 @@ public class ArgumentApplicationParser implements Serializable {
public ArgumentApplicationParser(final String json_configuration) throws Exception {
final ObjectMapper mapper = new ObjectMapper();
final OptionsParameter[] configuration = mapper.readValue(json_configuration, OptionsParameter[].class);
final OptionsParameter[] configuration =
mapper.readValue(json_configuration, OptionsParameter[].class);
createOptionMap(configuration);
}
@ -33,23 +32,26 @@ public class ArgumentApplicationParser implements Serializable {
private void createOptionMap(final OptionsParameter[] configuration) {
Arrays.stream(configuration).map(conf -> {
final Option o = new Option(conf.getParamName(), true, conf.getParamDescription());
Arrays.stream(configuration)
.map(
conf -> {
final Option o =
new Option(
conf.getParamName(), true, conf.getParamDescription());
o.setLongOpt(conf.getParamLongName());
o.setRequired(conf.isParamRequired());
if (conf.isCompressed()) {
compressedValues.add(conf.getParamLongName());
}
return o;
}).forEach(options::addOption);
// HelpFormatter formatter = new HelpFormatter();
// formatter.printHelp("myapp", null, options, null, true);
})
.forEach(options::addOption);
// HelpFormatter formatter = new HelpFormatter();
// formatter.printHelp("myapp", null, options, null, true);
}
public static String decompressValue(final String abstractCompressed) {
try {
byte[] byteArray = Base64.decodeBase64(abstractCompressed.getBytes());
@ -63,7 +65,7 @@ public class ArgumentApplicationParser implements Serializable {
}
}
public static String compressArgument(final String value) throws Exception{
public static String compressArgument(final String value) throws Exception {
ByteArrayOutputStream out = new ByteArrayOutputStream();
GZIPOutputStream gzip = new GZIPOutputStream(out);
gzip.write(value.getBytes());
@ -74,7 +76,14 @@ public class ArgumentApplicationParser implements Serializable {
public void parseArgument(final String[] args) throws Exception {
CommandLineParser parser = new BasicParser();
CommandLine cmd = parser.parse(options, args);
Arrays.stream(cmd.getOptions()).forEach(it -> objectMap.put(it.getLongOpt(), compressedValues.contains(it.getLongOpt())? decompressValue(it.getValue()): it.getValue()));
Arrays.stream(cmd.getOptions())
.forEach(
it ->
objectMap.put(
it.getLongOpt(),
compressedValues.contains(it.getLongOpt())
? decompressValue(it.getValue())
: it.getValue()));
}
public String get(final String key) {

View File

@ -1,6 +1,5 @@
package eu.dnetlib.dhp.application;
public class OptionsParameter {
private String paramName;
@ -9,8 +8,7 @@ public class OptionsParameter {
private boolean paramRequired;
private boolean compressed;
public OptionsParameter() {
}
public OptionsParameter() {}
public String getParamName() {
return paramName;

View File

@ -3,23 +3,19 @@ package eu.dnetlib.dhp.common;
import java.io.Serializable;
import java.util.function.Supplier;
/**
* Provides serializable and throwing extensions to standard functional interfaces.
*/
/** Provides serializable and throwing extensions to standard functional interfaces. */
public class FunctionalInterfaceSupport {
private FunctionalInterfaceSupport() {
}
private FunctionalInterfaceSupport() {}
/**
* Serializable supplier of any kind of objects. To be used withing spark processing pipelines when supplying
* functions externally.
* Serializable supplier of any kind of objects. To be used withing spark processing pipelines
* when supplying functions externally.
*
* @param <T>
*/
@FunctionalInterface
public interface SerializableSupplier<T> extends Supplier<T>, Serializable {
}
public interface SerializableSupplier<T> extends Supplier<T>, Serializable {}
/**
* Extension of consumer accepting functions throwing an exception.
@ -52,5 +48,4 @@ public class FunctionalInterfaceSupport {
public interface ThrowingRunnable<E extends Exception> {
void run() throws E;
}
}

View File

@ -1,5 +1,10 @@
package eu.dnetlib.dhp.common;
import static eu.dnetlib.dhp.common.ThrowingSupport.rethrowAsRuntimeException;
import java.util.Arrays;
import java.util.List;
import java.util.stream.Collectors;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
@ -7,19 +12,26 @@ import org.apache.hadoop.fs.Path;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.Arrays;
import java.util.List;
import java.util.stream.Collectors;
import static eu.dnetlib.dhp.common.ThrowingSupport.rethrowAsRuntimeException;
/**
* HDFS utility methods.
*/
/** HDFS utility methods. */
public class HdfsSupport {
private static final Logger logger = LoggerFactory.getLogger(HdfsSupport.class);
private HdfsSupport() {
private HdfsSupport() {}
/**
* Checks a path (file or dir) exists on HDFS.
*
* @param path Path to be checked
* @param configuration Configuration of hadoop env
*/
public static boolean exists(String path, Configuration configuration) {
logger.info("Removing path: {}", path);
return rethrowAsRuntimeException(
() -> {
Path f = new Path(path);
FileSystem fileSystem = FileSystem.get(configuration);
return fileSystem.exists(f);
});
}
/**
@ -30,7 +42,8 @@ public class HdfsSupport {
*/
public static void remove(String path, Configuration configuration) {
logger.info("Removing path: {}", path);
rethrowAsRuntimeException(() -> {
rethrowAsRuntimeException(
() -> {
Path f = new Path(path);
FileSystem fileSystem = FileSystem.get(configuration);
if (fileSystem.exists(f)) {
@ -48,8 +61,9 @@ public class HdfsSupport {
*/
public static List<String> listFiles(String path, Configuration configuration) {
logger.info("Listing files in path: {}", path);
return rethrowAsRuntimeException(() -> Arrays
.stream(FileSystem.get(configuration).listStatus(new Path(path)))
return rethrowAsRuntimeException(
() ->
Arrays.stream(FileSystem.get(configuration).listStatus(new Path(path)))
.filter(FileStatus::isDirectory)
.map(x -> x.getPath().toString())
.collect(Collectors.toList()));

View File

@ -1,58 +1,68 @@
package eu.dnetlib.dhp.common;
import eu.dnetlib.dhp.common.FunctionalInterfaceSupport.ThrowingConsumer;
import java.util.Objects;
import java.util.function.Function;
import org.apache.spark.SparkConf;
import org.apache.spark.sql.SparkSession;
import java.util.Objects;
import java.util.function.Function;
/**
* SparkSession utility methods.
*/
/** SparkSession utility methods. */
public class SparkSessionSupport {
private SparkSessionSupport() {
}
private SparkSessionSupport() {}
/**
* Runs a given function using SparkSession created using default builder and supplied SparkConf. Stops SparkSession
* when SparkSession is managed. Allows to reuse SparkSession created externally.
* Runs a given function using SparkSession created using default builder and supplied
* SparkConf. Stops SparkSession when SparkSession is managed. Allows to reuse SparkSession
* created externally.
*
* @param conf SparkConf instance
* @param isSparkSessionManaged When true will stop SparkSession
* @param fn Consumer to be applied to constructed SparkSession
*/
public static void runWithSparkSession(SparkConf conf,
public static void runWithSparkSession(
SparkConf conf,
Boolean isSparkSessionManaged,
ThrowingConsumer<SparkSession, Exception> fn) {
runWithSparkSession(c -> SparkSession.builder().config(c).getOrCreate(), conf, isSparkSessionManaged, fn);
runWithSparkSession(
c -> SparkSession.builder().config(c).getOrCreate(),
conf,
isSparkSessionManaged,
fn);
}
/**
* Runs a given function using SparkSession created with hive support and using default builder and supplied SparkConf.
* Stops SparkSession when SparkSession is managed. Allows to reuse SparkSession created externally.
* Runs a given function using SparkSession created with hive support and using default builder
* and supplied SparkConf. Stops SparkSession when SparkSession is managed. Allows to reuse
* SparkSession created externally.
*
* @param conf SparkConf instance
* @param isSparkSessionManaged When true will stop SparkSession
* @param fn Consumer to be applied to constructed SparkSession
*/
public static void runWithSparkHiveSession(SparkConf conf,
public static void runWithSparkHiveSession(
SparkConf conf,
Boolean isSparkSessionManaged,
ThrowingConsumer<SparkSession, Exception> fn) {
runWithSparkSession(c -> SparkSession.builder().config(c).enableHiveSupport().getOrCreate(), conf, isSparkSessionManaged, fn);
runWithSparkSession(
c -> SparkSession.builder().config(c).enableHiveSupport().getOrCreate(),
conf,
isSparkSessionManaged,
fn);
}
/**
* Runs a given function using SparkSession created using supplied builder and supplied SparkConf. Stops SparkSession
* when SparkSession is managed. Allows to reuse SparkSession created externally.
* Runs a given function using SparkSession created using supplied builder and supplied
* SparkConf. Stops SparkSession when SparkSession is managed. Allows to reuse SparkSession
* created externally.
*
* @param sparkSessionBuilder Builder of SparkSession
* @param conf SparkConf instance
* @param isSparkSessionManaged When true will stop SparkSession
* @param fn Consumer to be applied to constructed SparkSession
*/
public static void runWithSparkSession(Function<SparkConf, SparkSession> sparkSessionBuilder,
public static void runWithSparkSession(
Function<SparkConf, SparkSession> sparkSessionBuilder,
SparkConf conf,
Boolean isSparkSessionManaged,
ThrowingConsumer<SparkSession, Exception> fn) {

View File

@ -3,13 +3,10 @@ package eu.dnetlib.dhp.common;
import eu.dnetlib.dhp.common.FunctionalInterfaceSupport.ThrowingRunnable;
import eu.dnetlib.dhp.common.FunctionalInterfaceSupport.ThrowingSupplier;
/**
* Exception handling utility methods.
*/
/** Exception handling utility methods. */
public class ThrowingSupport {
private ThrowingSupport() {
}
private ThrowingSupport() {}
/**
* Executes given runnable and rethrows any exceptions as RuntimeException.
@ -32,7 +29,8 @@ public class ThrowingSupport {
* @param msg Message to be set for rethrown exception
* @param <E> Type of exception thrown
*/
public static <E extends Exception> void rethrowAsRuntimeException(ThrowingRunnable<E> fn, String msg) {
public static <E extends Exception> void rethrowAsRuntimeException(
ThrowingRunnable<E> fn, String msg) {
try {
fn.run();
} catch (Exception e) {
@ -65,12 +63,12 @@ public class ThrowingSupport {
* @param <E> Type of exception thrown
* @return Result of supplier execution
*/
public static <T, E extends Exception> T rethrowAsRuntimeException(ThrowingSupplier<T, E> fn, String msg) {
public static <T, E extends Exception> T rethrowAsRuntimeException(
ThrowingSupplier<T, E> fn, String msg) {
try {
return fn.get();
} catch (Exception e) {
throw new RuntimeException(msg, e);
}
}
}

View File

@ -1,66 +1,52 @@
package eu.dnetlib.dhp.model.mdstore;
import eu.dnetlib.dhp.utils.DHPUtils;
import java.io.Serializable;
/**
* This class models a record inside the new Metadata store collection on HDFS *
*
*/
/** This class models a record inside the new Metadata store collection on HDFS * */
public class MetadataRecord implements Serializable {
/**
* The D-Net Identifier associated to the record
*/
/** The D-Net Identifier associated to the record */
private String id;
/**
* The original Identifier of the record
*/
/** The original Identifier of the record */
private String originalId;
/**
* The encoding of the record, should be JSON or XML
*/
/** The encoding of the record, should be JSON or XML */
private String encoding;
/**
* The information about the provenance of the record see @{@link Provenance}
* for the model of this information
* The information about the provenance of the record see @{@link Provenance} for the model of
* this information
*/
private Provenance provenance;
/**
* The content of the metadata
*/
/** The content of the metadata */
private String body;
/**
* the date when the record has been stored
*/
/** the date when the record has been stored */
private long dateOfCollection;
/**
* the date when the record has been stored
*/
/** the date when the record has been stored */
private long dateOfTransformation;
public MetadataRecord() {
this.dateOfCollection = System.currentTimeMillis();
}
public MetadataRecord(String originalId, String encoding, Provenance provenance, String body, long dateOfCollection) {
public MetadataRecord(
String originalId,
String encoding,
Provenance provenance,
String body,
long dateOfCollection) {
this.originalId = originalId;
this.encoding = encoding;
this.provenance = provenance;
this.body = body;
this.dateOfCollection = dateOfCollection;
this.id = DHPUtils.generateIdentifier(originalId,this.provenance.getNsPrefix());
this.id = DHPUtils.generateIdentifier(originalId, this.provenance.getNsPrefix());
}
public String getId() {
@ -71,7 +57,6 @@ public class MetadataRecord implements Serializable {
this.id = id;
}
public String getOriginalId() {
return originalId;
}
@ -96,7 +81,6 @@ public class MetadataRecord implements Serializable {
this.provenance = provenance;
}
public String getBody() {
return body;
}
@ -127,7 +111,6 @@ public class MetadataRecord implements Serializable {
return false;
}
return ((MetadataRecord) o).getId().equalsIgnoreCase(id);
}
@Override

View File

@ -2,27 +2,20 @@ package eu.dnetlib.dhp.model.mdstore;
import java.io.Serializable;
/**
* @author Sandro La Bruzzo
*
* Provenace class models the provenance of the record in the metadataStore
* It contains the identifier and the name of the datasource that gives the
* record
*
* <p>Provenace class models the provenance of the record in the metadataStore It contains the
* identifier and the name of the datasource that gives the record
*/
public class Provenance implements Serializable {
private String datasourceId;
private String datasourceName;
private String nsPrefix;
public Provenance() {
}
public Provenance() {}
public Provenance(String datasourceId, String datasourceName, String nsPrefix) {
this.datasourceId = datasourceId;

View File

@ -1,20 +1,17 @@
package eu.dnetlib.dhp.parser.utility;
import com.ximpleware.AutoPilot;
import com.ximpleware.VTDNav;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import com.ximpleware.AutoPilot;
import com.ximpleware.VTDNav;
/**
* Created by sandro on 9/29/16.
*/
/** Created by sandro on 9/29/16. */
public class VtdUtilityParser {
public static List<Node> getTextValuesWithAttributes(final AutoPilot ap, final VTDNav vn, final String xpath, final List<String> attributes)
public static List<Node> getTextValuesWithAttributes(
final AutoPilot ap, final VTDNav vn, final String xpath, final List<String> attributes)
throws VtdException {
final List<Node> results = new ArrayList<>();
try {
@ -35,11 +32,13 @@ public class VtdUtilityParser {
}
}
private static Map<String, String> getAttributes(final VTDNav vn, final List<String> attributes) {
private static Map<String, String> getAttributes(
final VTDNav vn, final List<String> attributes) {
final Map<String, String> currentAttributes = new HashMap<>();
if (attributes != null) {
attributes.forEach(attributeKey -> {
attributes.forEach(
attributeKey -> {
try {
int attr = vn.getAttrVal(attributeKey);
if (attr > -1) {
@ -53,7 +52,8 @@ public class VtdUtilityParser {
return currentAttributes;
}
public static List<String> getTextValue(final AutoPilot ap, final VTDNav vn, final String xpath) throws VtdException {
public static List<String> getTextValue(final AutoPilot ap, final VTDNav vn, final String xpath)
throws VtdException {
List<String> results = new ArrayList<>();
try {
ap.selectXPath(xpath);
@ -67,13 +67,13 @@ public class VtdUtilityParser {
}
}
public static String getSingleValue(final AutoPilot ap, final VTDNav nav, final String xpath) throws VtdException {
public static String getSingleValue(final AutoPilot ap, final VTDNav nav, final String xpath)
throws VtdException {
try {
ap.selectXPath(xpath);
while (ap.evalXPath() != -1) {
int it = nav.getText();
if (it > -1)
return nav.toNormalizedString(it);
if (it > -1) return nav.toNormalizedString(it);
}
return null;
} catch (Exception e) {
@ -103,5 +103,4 @@ public class VtdUtilityParser {
this.attributes = attributes;
}
}
}

View File

@ -1,18 +1,16 @@
package eu.dnetlib.dhp.utils;
import com.jayway.jsonpath.JsonPath;
import net.minidev.json.JSONArray;
import org.apache.commons.codec.binary.Base64;
import org.apache.commons.codec.binary.Base64OutputStream;
import org.apache.commons.codec.binary.Hex;
import org.apache.commons.lang3.StringUtils;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.nio.charset.StandardCharsets;
import java.security.MessageDigest;
import java.util.zip.GZIPInputStream;
import java.util.zip.GZIPOutputStream;
import net.minidev.json.JSONArray;
import org.apache.commons.codec.binary.Base64;
import org.apache.commons.codec.binary.Base64OutputStream;
import org.apache.commons.codec.binary.Hex;
public class DHPUtils {
@ -28,41 +26,40 @@ public class DHPUtils {
}
public static String generateIdentifier(final String originalId, final String nsPrefix) {
return String.format("%s::%s",nsPrefix, DHPUtils.md5(originalId));
return String.format("%s::%s", nsPrefix, DHPUtils.md5(originalId));
}
public static String compressString(final String input ) {
try ( ByteArrayOutputStream out = new ByteArrayOutputStream(); Base64OutputStream b64os = new Base64OutputStream(out)) {
public static String compressString(final String input) {
try (ByteArrayOutputStream out = new ByteArrayOutputStream();
Base64OutputStream b64os = new Base64OutputStream(out)) {
GZIPOutputStream gzip = new GZIPOutputStream(b64os);
gzip.write(input.getBytes(StandardCharsets.UTF_8));
gzip.close();
return out.toString();
} catch (Throwable e ) {
} catch (Throwable e) {
return null;
}
}
public static String decompressString(final String input) {
byte[] byteArray = Base64.decodeBase64(input.getBytes());
int len;
try (GZIPInputStream gis = new GZIPInputStream(new ByteArrayInputStream((byteArray))); ByteArrayOutputStream bos = new ByteArrayOutputStream(byteArray.length)) {
try (GZIPInputStream gis = new GZIPInputStream(new ByteArrayInputStream((byteArray)));
ByteArrayOutputStream bos = new ByteArrayOutputStream(byteArray.length)) {
byte[] buffer = new byte[1024];
while((len = gis.read(buffer)) != -1){
while ((len = gis.read(buffer)) != -1) {
bos.write(buffer, 0, len);
}
return bos.toString();
} catch (Exception e) {
return null;
}
}
public static String getJPathString(final String jsonPath, final String json) {
try {
Object o = JsonPath.read(json, jsonPath);
if (o instanceof String)
return (String) o;
if (o instanceof String) return (String) o;
if (o instanceof JSONArray && ((JSONArray) o).size() > 0)
return (String) ((JSONArray) o).get(0);
return o.toString();
@ -70,5 +67,4 @@ public class DHPUtils {
return "";
}
}
}

View File

@ -9,10 +9,13 @@ import net.sf.saxon.trans.XPathException;
public abstract class AbstractExtensionFunction extends ExtensionFunctionDefinition {
public static String DEFAULT_SAXON_EXT_NS_URI = "http://www.d-net.research-infrastructures.eu/saxon-extension";
public static String DEFAULT_SAXON_EXT_NS_URI =
"http://www.d-net.research-infrastructures.eu/saxon-extension";
public abstract String getName();
public abstract Sequence doCall(XPathContext context, Sequence[] arguments) throws XPathException;
public abstract Sequence doCall(XPathContext context, Sequence[] arguments)
throws XPathException;
@Override
public StructuredQName getFunctionQName() {
@ -28,5 +31,4 @@ public abstract class AbstractExtensionFunction extends ExtensionFunctionDefinit
}
};
}
}

View File

@ -1,5 +1,9 @@
package eu.dnetlib.dhp.utils.saxon;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Calendar;
import java.util.GregorianCalendar;
import net.sf.saxon.expr.XPathContext;
import net.sf.saxon.om.Item;
import net.sf.saxon.om.Sequence;
@ -7,14 +11,9 @@ import net.sf.saxon.trans.XPathException;
import net.sf.saxon.value.SequenceType;
import net.sf.saxon.value.StringValue;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Calendar;
import java.util.GregorianCalendar;
public class ExtractYear extends AbstractExtensionFunction {
private static final String[] dateFormats = { "yyyy-MM-dd", "yyyy/MM/dd" };
private static final String[] dateFormats = {"yyyy-MM-dd", "yyyy/MM/dd"};
@Override
public String getName() {
@ -45,7 +44,7 @@ public class ExtractYear extends AbstractExtensionFunction {
@Override
public SequenceType[] getArgumentTypes() {
return new SequenceType[] { SequenceType.OPTIONAL_ITEM };
return new SequenceType[] {SequenceType.OPTIONAL_ITEM};
}
@Override
@ -60,7 +59,8 @@ public class ExtractYear extends AbstractExtensionFunction {
c.setTime(new SimpleDateFormat(format).parse(s));
String year = String.valueOf(c.get(Calendar.YEAR));
return year;
} catch (ParseException e) {}
} catch (ParseException e) {
}
}
return "";
}

View File

@ -1,18 +1,19 @@
package eu.dnetlib.dhp.utils.saxon;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Date;
import net.sf.saxon.expr.XPathContext;
import net.sf.saxon.om.Sequence;
import net.sf.saxon.trans.XPathException;
import net.sf.saxon.value.SequenceType;
import net.sf.saxon.value.StringValue;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Date;
public class NormalizeDate extends AbstractExtensionFunction {
private static final String[] normalizeDateFormats = { "yyyy-MM-dd'T'hh:mm:ss", "yyyy-MM-dd", "yyyy/MM/dd", "yyyy" };
private static final String[] normalizeDateFormats = {
"yyyy-MM-dd'T'hh:mm:ss", "yyyy-MM-dd", "yyyy/MM/dd", "yyyy"
};
private static final String normalizeOutFormat = new String("yyyy-MM-dd'T'hh:mm:ss'Z'");
@ -42,7 +43,7 @@ public class NormalizeDate extends AbstractExtensionFunction {
@Override
public SequenceType[] getArgumentTypes() {
return new SequenceType[] { SequenceType.OPTIONAL_ITEM };
return new SequenceType[] {SequenceType.OPTIONAL_ITEM};
}
@Override
@ -58,9 +59,9 @@ public class NormalizeDate extends AbstractExtensionFunction {
Date parse = new SimpleDateFormat(format).parse(date);
String res = new SimpleDateFormat(normalizeOutFormat).format(parse);
return res;
} catch (ParseException e) {}
} catch (ParseException e) {
}
}
return "";
}
}

View File

@ -24,7 +24,8 @@ public class PickFirst extends AbstractExtensionFunction {
final String s1 = getValue(arguments[0]);
final String s2 = getValue(arguments[1]);
return new StringValue(StringUtils.isNotBlank(s1) ? s1 : StringUtils.isNotBlank(s2) ? s2 : "");
return new StringValue(
StringUtils.isNotBlank(s1) ? s1 : StringUtils.isNotBlank(s2) ? s2 : "");
}
private String getValue(final Sequence arg) throws XPathException {
@ -49,12 +50,11 @@ public class PickFirst extends AbstractExtensionFunction {
@Override
public SequenceType[] getArgumentTypes() {
return new SequenceType[] { SequenceType.OPTIONAL_ITEM };
return new SequenceType[] {SequenceType.OPTIONAL_ITEM};
}
@Override
public SequenceType getResultType(SequenceType[] suppliedArgumentTypes) {
return SequenceType.SINGLE_STRING;
}
}

View File

@ -1,17 +1,17 @@
package eu.dnetlib.dhp.utils.saxon;
import net.sf.saxon.Configuration;
import net.sf.saxon.TransformerFactoryImpl;
import java.io.StringReader;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerException;
import javax.xml.transform.stream.StreamSource;
import java.io.StringReader;
import net.sf.saxon.Configuration;
import net.sf.saxon.TransformerFactoryImpl;
public class SaxonTransformerFactory {
/**
* Creates the index record transformer from the given XSLT
*
* @param xslt
* @return
* @throws TransformerException
@ -26,5 +26,4 @@ public class SaxonTransformerFactory {
return factory.newTransformer(new StreamSource(new StringReader(xslt)));
}
}

View File

@ -2,7 +2,6 @@ package eu.dnetlib.message;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.ObjectMapper;
import java.io.IOException;
import java.util.Map;
@ -16,20 +15,12 @@ public class Message {
private Map<String, String> body;
public static Message fromJson(final String json) throws IOException {
final ObjectMapper jsonMapper = new ObjectMapper();
return jsonMapper.readValue(json, Message.class);
}
public Message() {
}
public Message() {}
public Message(String workflowId, String jobName, MessageType type, Map<String, String> body) {
this.workflowId = workflowId;

View File

@ -4,7 +4,6 @@ import com.rabbitmq.client.AMQP;
import com.rabbitmq.client.Channel;
import com.rabbitmq.client.DefaultConsumer;
import com.rabbitmq.client.Envelope;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.concurrent.LinkedBlockingQueue;
@ -13,7 +12,6 @@ public class MessageConsumer extends DefaultConsumer {
final LinkedBlockingQueue<Message> queueMessages;
/**
* Constructs a new instance and records its association to the passed-in channel.
*
@ -25,19 +23,20 @@ public class MessageConsumer extends DefaultConsumer {
this.queueMessages = queueMessages;
}
@Override
public void handleDelivery(String consumerTag, Envelope envelope, AMQP.BasicProperties properties, byte[] body) throws IOException {
public void handleDelivery(
String consumerTag, Envelope envelope, AMQP.BasicProperties properties, byte[] body)
throws IOException {
final String json = new String(body, StandardCharsets.UTF_8);
Message message = Message.fromJson(json);
try {
this.queueMessages.put(message);
System.out.println("Receiving Message "+message);
System.out.println("Receiving Message " + message);
} catch (InterruptedException e) {
if (message.getType()== MessageType.REPORT)
if (message.getType() == MessageType.REPORT)
throw new RuntimeException("Error on sending message");
else {
//TODO LOGGING EXCEPTION
// TODO LOGGING EXCEPTION
}
} finally {
getChannel().basicAck(envelope.getDeliveryTag(), false);

View File

@ -3,8 +3,6 @@ package eu.dnetlib.message;
import com.rabbitmq.client.Channel;
import com.rabbitmq.client.Connection;
import com.rabbitmq.client.ConnectionFactory;
import sun.rmi.runtime.Log;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
@ -21,23 +19,32 @@ public class MessageManager {
private Connection connection;
private Map<String , Channel> channels = new HashMap<>();
private Map<String, Channel> channels = new HashMap<>();
private boolean durable;
private boolean autodelete;
final private LinkedBlockingQueue<Message> queueMessages;
private final LinkedBlockingQueue<Message> queueMessages;
public MessageManager(String messageHost, String username, String password, final LinkedBlockingQueue<Message> queueMessages) {
public MessageManager(
String messageHost,
String username,
String password,
final LinkedBlockingQueue<Message> queueMessages) {
this.queueMessages = queueMessages;
this.messageHost = messageHost;
this.username = username;
this.password = password;
}
public MessageManager(String messageHost, String username, String password, boolean durable, boolean autodelete, final LinkedBlockingQueue<Message> queueMessages) {
public MessageManager(
String messageHost,
String username,
String password,
boolean durable,
boolean autodelete,
final LinkedBlockingQueue<Message> queueMessages) {
this.queueMessages = queueMessages;
this.messageHost = messageHost;
this.username = username;
@ -55,7 +62,12 @@ public class MessageManager {
return factory.newConnection();
}
private Channel createChannel(final Connection connection, final String queueName, final boolean durable, final boolean autodelete ) throws Exception {
private Channel createChannel(
final Connection connection,
final String queueName,
final boolean durable,
final boolean autodelete)
throws Exception {
Map<String, Object> args = new HashMap<>();
args.put("x-message-ttl", 10000);
Channel channel = connection.createChannel();
@ -63,7 +75,8 @@ public class MessageManager {
return channel;
}
private Channel getOrCreateChannel(final String queueName, boolean durable, boolean autodelete) throws Exception {
private Channel getOrCreateChannel(final String queueName, boolean durable, boolean autodelete)
throws Exception {
if (channels.containsKey(queueName)) {
return channels.get(queueName);
}
@ -75,14 +88,14 @@ public class MessageManager {
return channels.get(queueName);
}
public void close() throws IOException {
channels.values().forEach(ch-> {
channels.values()
.forEach(
ch -> {
try {
ch.close();
} catch (Exception e) {
//TODO LOG
// TODO LOG
}
});
@ -92,26 +105,30 @@ public class MessageManager {
public boolean sendMessage(final Message message, String queueName) throws Exception {
try {
Channel channel = getOrCreateChannel(queueName, this.durable, this.autodelete);
channel.basicPublish("", queueName,null, message.toString().getBytes());
channel.basicPublish("", queueName, null, message.toString().getBytes());
return true;
} catch (Throwable e) {
throw new RuntimeException(e);
}
}
public boolean sendMessage(final Message message, String queueName, boolean durable_var, boolean autodelete_var) throws Exception {
public boolean sendMessage(
final Message message, String queueName, boolean durable_var, boolean autodelete_var)
throws Exception {
try {
Channel channel = getOrCreateChannel(queueName, durable_var, autodelete_var);
channel.basicPublish("", queueName,null, message.toString().getBytes());
channel.basicPublish("", queueName, null, message.toString().getBytes());
return true;
} catch (Throwable e) {
throw new RuntimeException(e);
}
}
public void startConsumingMessage(final String queueName, final boolean durable, final boolean autodelete) throws Exception{
public void startConsumingMessage(
final String queueName, final boolean durable, final boolean autodelete)
throws Exception {
Channel channel = createChannel(createConnection(), queueName, durable, autodelete);
channel.basicConsume(queueName, false, new MessageConsumer(channel,queueMessages));
channel.basicConsume(queueName, false, new MessageConsumer(channel, queueMessages));
}
}

View File

@ -1,8 +1,6 @@
package eu.dnetlib.message;
public enum MessageType {
ONGOING,
REPORT
}

View File

@ -1,19 +1,18 @@
package eu.dnetlib.scholexplorer.relation;
import com.fasterxml.jackson.databind.ObjectMapper;
import org.apache.commons.io.IOUtils;
import java.io.Serializable;
import java.util.HashMap;
import org.apache.commons.io.IOUtils;
public class RelationMapper extends HashMap<String,RelInfo > implements Serializable {
public class RelationMapper extends HashMap<String, RelInfo> implements Serializable {
public static RelationMapper load() throws Exception {
final String json = IOUtils.toString(RelationMapper.class.getResourceAsStream("relations.json"));
final String json =
IOUtils.toString(RelationMapper.class.getResourceAsStream("relations.json"));
ObjectMapper mapper = new ObjectMapper();
return mapper.readValue(json, RelationMapper.class);
}
}

View File

@ -1,29 +1,45 @@
package eu.dnetlib.dhp.application;
import org.apache.commons.io.IOUtils;
import org.junit.jupiter.api.Test;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNotNull;
import org.apache.commons.io.IOUtils;
import org.junit.jupiter.api.Test;
public class ArgumentApplicationParserTest {
@Test
public void testParseParameter() throws Exception {
final String jsonConfiguration = IOUtils.toString(this.getClass().getResourceAsStream("/eu/dnetlib/application/parameters.json"));
final String jsonConfiguration =
IOUtils.toString(
this.getClass()
.getResourceAsStream("/eu/dnetlib/application/parameters.json"));
assertNotNull(jsonConfiguration);
ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
parser.parseArgument(new String[]{"-p", "value0",
"-a", "value1",
"-n", "value2",
"-u", "value3",
"-ru", "value4",
"-rp", "value5",
"-rh", "value6",
"-ro", "value7",
"-rr", "value8",
"-w", "value9",
"-cc", ArgumentApplicationParser.compressArgument(jsonConfiguration)
parser.parseArgument(
new String[] {
"-p",
"value0",
"-a",
"value1",
"-n",
"value2",
"-u",
"value3",
"-ru",
"value4",
"-rp",
"value5",
"-rh",
"value6",
"-ro",
"value7",
"-rr",
"value8",
"-w",
"value9",
"-cc",
ArgumentApplicationParser.compressArgument(jsonConfiguration)
});
assertNotNull(parser.get("hdfsPath"));
assertNotNull(parser.get("apidescriptor"));
@ -47,10 +63,4 @@ public class ArgumentApplicationParserTest {
assertEquals("value9", parser.get("workflowId"));
assertEquals(jsonConfiguration, parser.get("ccCoco"));
}
}

View File

@ -1,9 +1,6 @@
package eu.dnetlib.dhp.common;
import org.apache.hadoop.conf.Configuration;
import org.junit.jupiter.api.Nested;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.io.TempDir;
import static org.junit.jupiter.api.Assertions.*;
import java.io.IOException;
import java.nio.file.Files;
@ -11,8 +8,10 @@ import java.nio.file.Path;
import java.util.Arrays;
import java.util.List;
import java.util.stream.Collectors;
import static org.junit.jupiter.api.Assertions.*;
import org.apache.hadoop.conf.Configuration;
import org.junit.jupiter.api.Nested;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.io.TempDir;
public class HdfsSupportTest {
@ -22,8 +21,8 @@ public class HdfsSupportTest {
@Test
public void shouldThrowARuntimeExceptionOnError() {
// when
assertThrows(RuntimeException.class, () ->
HdfsSupport.remove(null, new Configuration()));
assertThrows(
RuntimeException.class, () -> HdfsSupport.remove(null, new Configuration()));
}
@Test
@ -54,8 +53,8 @@ public class HdfsSupportTest {
@Test
public void shouldThrowARuntimeExceptionOnError() {
// when
assertThrows(RuntimeException.class, () ->
HdfsSupport.listFiles(null, new Configuration()));
assertThrows(
RuntimeException.class, () -> HdfsSupport.listFiles(null, new Configuration()));
}
@Test
@ -68,8 +67,10 @@ public class HdfsSupportTest {
// then
assertEquals(2, paths.size());
List<String> expecteds = Arrays.stream(new String[]{subDir1.toString(), subDir2.toString()})
.sorted().collect(Collectors.toList());
List<String> expecteds =
Arrays.stream(new String[] {subDir1.toString(), subDir2.toString()})
.sorted()
.collect(Collectors.toList());
List<String> actuals = paths.stream().sorted().collect(Collectors.toList());
assertTrue(actuals.get(0).contains(expecteds.get(0)));
assertTrue(actuals.get(1).contains(expecteds.get(1)));

View File

@ -1,22 +1,22 @@
package eu.dnetlib.dhp.common;
import static org.mockito.Mockito.*;
import eu.dnetlib.dhp.common.FunctionalInterfaceSupport.ThrowingConsumer;
import java.util.function.Function;
import org.apache.spark.SparkConf;
import org.apache.spark.sql.SparkSession;
import org.junit.jupiter.api.Nested;
import org.junit.jupiter.api.Test;
import java.util.function.Function;
import static org.mockito.Mockito.*;
public class SparkSessionSupportTest {
@Nested
class RunWithSparkSession {
@Test
public void shouldExecuteFunctionAndNotStopSparkSessionWhenSparkSessionIsNotManaged() throws Exception {
public void shouldExecuteFunctionAndNotStopSparkSessionWhenSparkSessionIsNotManaged()
throws Exception {
// given
SparkSession spark = mock(SparkSession.class);
SparkConf conf = mock(SparkConf.class);
@ -34,7 +34,8 @@ public class SparkSessionSupportTest {
}
@Test
public void shouldExecuteFunctionAndStopSparkSessionWhenSparkSessionIsManaged() throws Exception {
public void shouldExecuteFunctionAndStopSparkSessionWhenSparkSessionIsManaged()
throws Exception {
// given
SparkSession spark = mock(SparkSession.class);
SparkConf conf = mock(SparkConf.class);

View File

@ -1,9 +1,9 @@
package eu.dnetlib.dhp.model.mdstore;
import org.junit.jupiter.api.Test;
import static org.junit.jupiter.api.Assertions.assertTrue;
import org.junit.jupiter.api.Test;
public class MetadataRecordTest {
@Test

View File

@ -1,12 +1,11 @@
package eu.dnetlib.message;
import org.junit.jupiter.api.Test;
import static org.junit.jupiter.api.Assertions.*;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import static org.junit.jupiter.api.Assertions.*;
import org.junit.jupiter.api.Test;
public class MessageTest {
@ -16,7 +15,7 @@ public class MessageTest {
m.setWorkflowId("wId");
m.setType(MessageType.ONGOING);
m.setJobName("Collection");
Map<String,String> body= new HashMap<>();
Map<String, String> body = new HashMap<>();
body.put("parsedItem", "300");
body.put("ExecutionTime", "30s");
@ -28,28 +27,26 @@ public class MessageTest {
assertEquals(m1.getJobName(), m.getJobName());
assertNotNull(m1.getBody());
m1.getBody().keySet().forEach(it -> assertEquals(m1.getBody().get(it), m.getBody().get(it)));
m1.getBody()
.keySet()
.forEach(it -> assertEquals(m1.getBody().get(it), m.getBody().get(it)));
assertEquals(m1.getJobName(), m.getJobName());
}
@Test
public void toStringTest() {
final String expectedJson= "{\"workflowId\":\"wId\",\"jobName\":\"Collection\",\"type\":\"ONGOING\",\"body\":{\"ExecutionTime\":\"30s\",\"parsedItem\":\"300\"}}";
final String expectedJson =
"{\"workflowId\":\"wId\",\"jobName\":\"Collection\",\"type\":\"ONGOING\",\"body\":{\"ExecutionTime\":\"30s\",\"parsedItem\":\"300\"}}";
Message m = new Message();
m.setWorkflowId("wId");
m.setType(MessageType.ONGOING);
m.setJobName("Collection");
Map<String,String> body= new HashMap<>();
Map<String, String> body = new HashMap<>();
body.put("parsedItem", "300");
body.put("ExecutionTime", "30s");
m.setBody(body);
assertEquals(expectedJson,m.toString());
assertEquals(expectedJson, m.toString());
}
}

View File

@ -2,14 +2,12 @@ package eu.dnetlib.scholexplorer.relation;
import org.junit.jupiter.api.Test;
public class RelationMapperTest {
@Test
public void testLoadRels() throws Exception{
public void testLoadRels() throws Exception {
RelationMapper relationMapper = RelationMapper.load();
relationMapper.keySet().forEach(System.out::println);
}
}

View File

@ -2,7 +2,6 @@ package eu.dnetlib.dhp.schema.action;
import com.fasterxml.jackson.databind.annotation.JsonDeserialize;
import eu.dnetlib.dhp.schema.oaf.Oaf;
import java.io.Serializable;
@JsonDeserialize(using = AtomicActionDeserializer.class)
@ -12,8 +11,7 @@ public class AtomicAction<T extends Oaf> implements Serializable {
private T payload;
public AtomicAction() {
}
public AtomicAction() {}
public AtomicAction(Class<T> clazz, T payload) {
this.clazz = clazz;

View File

@ -7,13 +7,13 @@ import com.fasterxml.jackson.databind.JsonDeserializer;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.schema.oaf.Oaf;
import java.io.IOException;
public class AtomicActionDeserializer extends JsonDeserializer {
@Override
public Object deserialize(JsonParser jp, DeserializationContext ctxt) throws IOException, JsonProcessingException {
public Object deserialize(JsonParser jp, DeserializationContext ctxt)
throws IOException, JsonProcessingException {
JsonNode node = jp.getCodec().readTree(jp);
String classTag = node.get("clazz").asText();
JsonNode payload = node.get("payload");

View File

@ -2,15 +2,19 @@ package eu.dnetlib.dhp.schema.common;
import eu.dnetlib.dhp.schema.oaf.OafEntity;
/**
* Actual entity types in the Graph
*/
/** Actual entity types in the Graph */
public enum EntityType {
publication, dataset, otherresearchproduct, software, datasource, organization, project;
publication,
dataset,
otherresearchproduct,
software,
datasource,
organization,
project;
/**
* Resolves the EntityType, given the relative class name
*
* @param clazz the given class name
* @param <T> actual OafEntity subclass
* @return the EntityType associated to the given class
@ -19,5 +23,4 @@ public enum EntityType {
return EntityType.valueOf(clazz.getSimpleName().toLowerCase());
}
}

View File

@ -1,9 +1,9 @@
package eu.dnetlib.dhp.schema.common;
/**
* Main entity types in the Graph
*/
/** Main entity types in the Graph */
public enum MainEntityType {
result, datasource, organization, project
result,
datasource,
organization,
project
}

View File

@ -2,17 +2,12 @@ package eu.dnetlib.dhp.schema.common;
import com.google.common.collect.Maps;
import eu.dnetlib.dhp.schema.oaf.*;
import java.util.Map;
/**
* Oaf model utility methods.
*/
/** Oaf model utility methods. */
public class ModelSupport {
/**
* Defines the mapping between the actual entity type and the main entity type
*/
/** Defines the mapping between the actual entity type and the main entity type */
private static Map<EntityType, MainEntityType> entityMapping = Maps.newHashMap();
static {
@ -26,9 +21,10 @@ public class ModelSupport {
}
/**
* Defines the mapping between the actual entity types and the relative classes implementing them
* Defines the mapping between the actual entity types and the relative classes implementing
* them
*/
public final static Map<EntityType, Class> entityTypes = Maps.newHashMap();
public static final Map<EntityType, Class> entityTypes = Maps.newHashMap();
static {
entityTypes.put(EntityType.datasource, Datasource.class);
@ -40,7 +36,7 @@ public class ModelSupport {
entityTypes.put(EntityType.publication, Publication.class);
}
public final static Map<String, Class> oafTypes = Maps.newHashMap();
public static final Map<String, Class> oafTypes = Maps.newHashMap();
static {
oafTypes.put("datasource", Datasource.class);
@ -55,8 +51,7 @@ public class ModelSupport {
private static final String schemeTemplate = "dnet:%s_%s_relations";
private ModelSupport() {
}
private ModelSupport() {}
/**
* Checks subclass-superclass relationship.
@ -67,7 +62,8 @@ public class ModelSupport {
* @param <Y> Superclass type
* @return True if X is a subclass of Y
*/
public static <X extends Oaf, Y extends Oaf> Boolean isSubClass(X subClazzObject, Y superClazzObject) {
public static <X extends Oaf, Y extends Oaf> Boolean isSubClass(
X subClazzObject, Y superClazzObject) {
return isSubClass(subClazzObject.getClass(), superClazzObject.getClass());
}
@ -80,7 +76,8 @@ public class ModelSupport {
* @param <Y> Superclass type
* @return True if X is a subclass of Y
*/
public static <X extends Oaf, Y extends Oaf> Boolean isSubClass(X subClazzObject, Class<Y> superClazz) {
public static <X extends Oaf, Y extends Oaf> Boolean isSubClass(
X subClazzObject, Class<Y> superClazz) {
return isSubClass(subClazzObject.getClass(), superClazz);
}
@ -93,7 +90,8 @@ public class ModelSupport {
* @param <Y> Superclass type
* @return True if X is a subclass of Y
*/
public static <X extends Oaf, Y extends Oaf> Boolean isSubClass(Class<X> subClazz, Class<Y> superClazz) {
public static <X extends Oaf, Y extends Oaf> Boolean isSubClass(
Class<X> subClazz, Class<Y> superClazz) {
return superClazz.isAssignableFrom(subClazz);
}
@ -104,7 +102,7 @@ public class ModelSupport {
* @return
*/
public static <T extends Oaf> Class<T>[] getOafModelClasses() {
return new Class[]{
return new Class[] {
Author.class,
Context.class,
Country.class,
@ -143,9 +141,9 @@ public class ModelSupport {
}
public static String getScheme(final String sourceType, final String targetType) {
return String.format(schemeTemplate,
return String.format(
schemeTemplate,
entityMapping.get(EntityType.valueOf(sourceType)).name(),
entityMapping.get(EntityType.valueOf(targetType)).name());
}
}

View File

@ -71,12 +71,12 @@ public class Author implements Serializable {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
Author author = (Author) o;
return Objects.equals(fullname, author.fullname) &&
Objects.equals(name, author.name) &&
Objects.equals(surname, author.surname) &&
Objects.equals(rank, author.rank) &&
Objects.equals(pid, author.pid) &&
Objects.equals(affiliation, author.affiliation);
return Objects.equals(fullname, author.fullname)
&& Objects.equals(name, author.name)
&& Objects.equals(surname, author.surname)
&& Objects.equals(rank, author.rank)
&& Objects.equals(pid, author.pid)
&& Objects.equals(affiliation, author.affiliation);
}
@Override

View File

@ -26,17 +26,14 @@ public class Context implements Serializable {
@Override
public int hashCode() {
return id ==null? 0 : id.hashCode();
return id == null ? 0 : id.hashCode();
}
@Override
public boolean equals(Object obj) {
if (this == obj)
return true;
if (obj == null)
return false;
if (getClass() != obj.getClass())
return false;
if (this == obj) return true;
if (obj == null) return false;
if (getClass() != obj.getClass()) return false;
Context other = (Context) obj;

View File

@ -1,7 +1,6 @@
package eu.dnetlib.dhp.schema.oaf;
import java.io.Serializable;
import java.util.List;
import java.util.Objects;
public class DataInfo implements Serializable {
@ -13,7 +12,6 @@ public class DataInfo implements Serializable {
private String inferenceprovenance;
private Qualifier provenanceaction;
public Boolean getInvisible() {
return invisible;
}
@ -67,16 +65,22 @@ public class DataInfo implements Serializable {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
DataInfo dataInfo = (DataInfo) o;
return Objects.equals(invisible, dataInfo.invisible) &&
Objects.equals(inferred, dataInfo.inferred) &&
Objects.equals(deletedbyinference, dataInfo.deletedbyinference) &&
Objects.equals(trust, dataInfo.trust) &&
Objects.equals(inferenceprovenance, dataInfo.inferenceprovenance) &&
Objects.equals(provenanceaction, dataInfo.provenanceaction);
return Objects.equals(invisible, dataInfo.invisible)
&& Objects.equals(inferred, dataInfo.inferred)
&& Objects.equals(deletedbyinference, dataInfo.deletedbyinference)
&& Objects.equals(trust, dataInfo.trust)
&& Objects.equals(inferenceprovenance, dataInfo.inferenceprovenance)
&& Objects.equals(provenanceaction, dataInfo.provenanceaction);
}
@Override
public int hashCode() {
return Objects.hash(invisible, inferred, deletedbyinference, trust, inferenceprovenance, provenanceaction);
return Objects.hash(
invisible,
inferred,
deletedbyinference,
trust,
inferenceprovenance,
provenanceaction);
}
}

View File

@ -80,23 +80,32 @@ public class Dataset extends Result implements Serializable {
public void mergeFrom(OafEntity e) {
super.mergeFrom(e);
if (!Dataset.class.isAssignableFrom(e.getClass())){
if (!Dataset.class.isAssignableFrom(e.getClass())) {
return;
}
final Dataset d = (Dataset) e;
storagedate = d.getStoragedate() != null && compareTrust(this, e)<0? d.getStoragedate() : storagedate;
storagedate =
d.getStoragedate() != null && compareTrust(this, e) < 0
? d.getStoragedate()
: storagedate;
device= d.getDevice() != null && compareTrust(this, e)<0? d.getDevice() : device;
device = d.getDevice() != null && compareTrust(this, e) < 0 ? d.getDevice() : device;
size= d.getSize() != null && compareTrust(this, e)<0? d.getSize() : size;
size = d.getSize() != null && compareTrust(this, e) < 0 ? d.getSize() : size;
version= d.getVersion() != null && compareTrust(this, e)<0? d.getVersion() : version;
version = d.getVersion() != null && compareTrust(this, e) < 0 ? d.getVersion() : version;
lastmetadataupdate= d.getLastmetadataupdate() != null && compareTrust(this, e)<0? d.getLastmetadataupdate() :lastmetadataupdate;
lastmetadataupdate =
d.getLastmetadataupdate() != null && compareTrust(this, e) < 0
? d.getLastmetadataupdate()
: lastmetadataupdate;
metadataversionnumber= d.getMetadataversionnumber() != null && compareTrust(this, e)<0? d.getMetadataversionnumber() : metadataversionnumber;
metadataversionnumber =
d.getMetadataversionnumber() != null && compareTrust(this, e) < 0
? d.getMetadataversionnumber()
: metadataversionnumber;
geolocation = mergeLists(geolocation, d.getGeolocation());
@ -109,17 +118,25 @@ public class Dataset extends Result implements Serializable {
if (o == null || getClass() != o.getClass()) return false;
if (!super.equals(o)) return false;
Dataset dataset = (Dataset) o;
return Objects.equals(storagedate, dataset.storagedate) &&
Objects.equals(device, dataset.device) &&
Objects.equals(size, dataset.size) &&
Objects.equals(version, dataset.version) &&
Objects.equals(lastmetadataupdate, dataset.lastmetadataupdate) &&
Objects.equals(metadataversionnumber, dataset.metadataversionnumber) &&
Objects.equals(geolocation, dataset.geolocation);
return Objects.equals(storagedate, dataset.storagedate)
&& Objects.equals(device, dataset.device)
&& Objects.equals(size, dataset.size)
&& Objects.equals(version, dataset.version)
&& Objects.equals(lastmetadataupdate, dataset.lastmetadataupdate)
&& Objects.equals(metadataversionnumber, dataset.metadataversionnumber)
&& Objects.equals(geolocation, dataset.geolocation);
}
@Override
public int hashCode() {
return Objects.hash(super.hashCode(), storagedate, device, size, version, lastmetadataupdate, metadataversionnumber, geolocation);
return Objects.hash(
super.hashCode(),
storagedate,
device,
size,
version,
lastmetadataupdate,
metadataversionnumber,
geolocation);
}
}

View File

@ -72,7 +72,7 @@ public class Datasource extends OafEntity implements Serializable {
private Field<String> citationguidelineurl;
//{yes, no, uknown}
// {yes, no, uknown}
private Field<String> qualitymanagementkind;
private Field<String> pidsystems;
@ -367,65 +367,148 @@ public class Datasource extends OafEntity implements Serializable {
public void mergeFrom(OafEntity e) {
super.mergeFrom(e);
if (!Datasource.class.isAssignableFrom(e.getClass())){
if (!Datasource.class.isAssignableFrom(e.getClass())) {
return;
}
Datasource d = (Datasource)e;
Datasource d = (Datasource) e;
datasourcetype = d.getDatasourcetype() != null && compareTrust(this, e)<0? d.getDatasourcetype() : datasourcetype;
openairecompatibility = d.getOpenairecompatibility() != null && compareTrust(this, e)<0? d.getOpenairecompatibility() : openairecompatibility;
officialname = d.getOfficialname() != null && compareTrust(this, e)<0? d.getOfficialname() : officialname;
englishname = d.getEnglishname() != null && compareTrust(this, e)<0? d.getEnglishname() : officialname;
websiteurl = d.getWebsiteurl() != null && compareTrust(this, e)<0? d.getWebsiteurl() : websiteurl;
logourl = d.getLogourl() != null && compareTrust(this, e)<0? d.getLogourl() : getLogourl();
contactemail = d.getContactemail() != null && compareTrust(this, e)<0? d.getContactemail() : contactemail;
namespaceprefix = d.getNamespaceprefix() != null && compareTrust(this, e)<0? d.getNamespaceprefix() : namespaceprefix;
latitude = d.getLatitude() != null && compareTrust(this, e)<0? d.getLatitude() : latitude;
longitude = d.getLongitude() != null && compareTrust(this, e)<0? d.getLongitude() : longitude;
dateofvalidation = d.getDateofvalidation() != null && compareTrust(this, e)<0? d.getDateofvalidation() : dateofvalidation;
description = d.getDescription() != null && compareTrust(this, e)<0? d.getDescription() : description;
datasourcetype =
d.getDatasourcetype() != null && compareTrust(this, e) < 0
? d.getDatasourcetype()
: datasourcetype;
openairecompatibility =
d.getOpenairecompatibility() != null && compareTrust(this, e) < 0
? d.getOpenairecompatibility()
: openairecompatibility;
officialname =
d.getOfficialname() != null && compareTrust(this, e) < 0
? d.getOfficialname()
: officialname;
englishname =
d.getEnglishname() != null && compareTrust(this, e) < 0
? d.getEnglishname()
: officialname;
websiteurl =
d.getWebsiteurl() != null && compareTrust(this, e) < 0
? d.getWebsiteurl()
: websiteurl;
logourl =
d.getLogourl() != null && compareTrust(this, e) < 0 ? d.getLogourl() : getLogourl();
contactemail =
d.getContactemail() != null && compareTrust(this, e) < 0
? d.getContactemail()
: contactemail;
namespaceprefix =
d.getNamespaceprefix() != null && compareTrust(this, e) < 0
? d.getNamespaceprefix()
: namespaceprefix;
latitude =
d.getLatitude() != null && compareTrust(this, e) < 0 ? d.getLatitude() : latitude;
longitude =
d.getLongitude() != null && compareTrust(this, e) < 0
? d.getLongitude()
: longitude;
dateofvalidation =
d.getDateofvalidation() != null && compareTrust(this, e) < 0
? d.getDateofvalidation()
: dateofvalidation;
description =
d.getDescription() != null && compareTrust(this, e) < 0
? d.getDescription()
: description;
subjects = mergeLists(subjects, d.getSubjects());
// opendoar specific fields (od*)
odnumberofitems = d.getOdnumberofitems() != null && compareTrust(this, e)<0? d.getOdnumberofitems() : odnumberofitems;
odnumberofitemsdate = d.getOdnumberofitemsdate() != null && compareTrust(this, e)<0? d.getOdnumberofitemsdate() : odnumberofitemsdate;
odpolicies = d.getOdpolicies() != null && compareTrust(this, e)<0? d.getOdpolicies() : odpolicies;
odnumberofitems =
d.getOdnumberofitems() != null && compareTrust(this, e) < 0
? d.getOdnumberofitems()
: odnumberofitems;
odnumberofitemsdate =
d.getOdnumberofitemsdate() != null && compareTrust(this, e) < 0
? d.getOdnumberofitemsdate()
: odnumberofitemsdate;
odpolicies =
d.getOdpolicies() != null && compareTrust(this, e) < 0
? d.getOdpolicies()
: odpolicies;
odlanguages = mergeLists(odlanguages, d.getOdlanguages());
odcontenttypes = mergeLists(odcontenttypes, d.getOdcontenttypes());
accessinfopackage = mergeLists(accessinfopackage, d.getAccessinfopackage());
// re3data fields
releasestartdate = d.getReleasestartdate() != null && compareTrust(this, e)<0? d.getReleasestartdate() : releasestartdate;
releaseenddate = d.getReleaseenddate() != null && compareTrust(this, e)<0? d.getReleaseenddate() : releaseenddate;
missionstatementurl = d.getMissionstatementurl() != null && compareTrust(this, e)<0? d.getMissionstatementurl() : missionstatementurl;
dataprovider = d.getDataprovider() != null && compareTrust(this, e)<0? d.getDataprovider() : dataprovider;
serviceprovider = d.getServiceprovider() != null && compareTrust(this, e)<0? d.getServiceprovider() : serviceprovider;
releasestartdate =
d.getReleasestartdate() != null && compareTrust(this, e) < 0
? d.getReleasestartdate()
: releasestartdate;
releaseenddate =
d.getReleaseenddate() != null && compareTrust(this, e) < 0
? d.getReleaseenddate()
: releaseenddate;
missionstatementurl =
d.getMissionstatementurl() != null && compareTrust(this, e) < 0
? d.getMissionstatementurl()
: missionstatementurl;
dataprovider =
d.getDataprovider() != null && compareTrust(this, e) < 0
? d.getDataprovider()
: dataprovider;
serviceprovider =
d.getServiceprovider() != null && compareTrust(this, e) < 0
? d.getServiceprovider()
: serviceprovider;
// {open, restricted or closed}
databaseaccesstype = d.getDatabaseaccesstype() != null && compareTrust(this, e)<0? d.getDatabaseaccesstype() : databaseaccesstype;
databaseaccesstype =
d.getDatabaseaccesstype() != null && compareTrust(this, e) < 0
? d.getDatabaseaccesstype()
: databaseaccesstype;
// {open, restricted or closed}
datauploadtype = d.getDatauploadtype() != null && compareTrust(this, e)<0? d.getDatauploadtype() : datauploadtype;
datauploadtype =
d.getDatauploadtype() != null && compareTrust(this, e) < 0
? d.getDatauploadtype()
: datauploadtype;
// {feeRequired, registration, other}
databaseaccessrestriction = d.getDatabaseaccessrestriction() != null && compareTrust(this, e)<0? d.getDatabaseaccessrestriction() : databaseaccessrestriction;
databaseaccessrestriction =
d.getDatabaseaccessrestriction() != null && compareTrust(this, e) < 0
? d.getDatabaseaccessrestriction()
: databaseaccessrestriction;
// {feeRequired, registration, other}
datauploadrestriction = d.getDatauploadrestriction() != null && compareTrust(this, e)<0? d.getDatauploadrestriction() : datauploadrestriction;
datauploadrestriction =
d.getDatauploadrestriction() != null && compareTrust(this, e) < 0
? d.getDatauploadrestriction()
: datauploadrestriction;
versioning = d.getVersioning() != null && compareTrust(this, e)<0? d.getVersioning() : versioning;
citationguidelineurl = d.getCitationguidelineurl() != null && compareTrust(this, e)<0? d.getCitationguidelineurl() : citationguidelineurl;
versioning =
d.getVersioning() != null && compareTrust(this, e) < 0
? d.getVersioning()
: versioning;
citationguidelineurl =
d.getCitationguidelineurl() != null && compareTrust(this, e) < 0
? d.getCitationguidelineurl()
: citationguidelineurl;
//{yes, no, unknown}
qualitymanagementkind = d.getQualitymanagementkind() != null && compareTrust(this, e)<0? d.getQualitymanagementkind() : qualitymanagementkind;
pidsystems = d.getPidsystems() != null && compareTrust(this, e)<0? d.getPidsystems() : pidsystems;
// {yes, no, unknown}
qualitymanagementkind =
d.getQualitymanagementkind() != null && compareTrust(this, e) < 0
? d.getQualitymanagementkind()
: qualitymanagementkind;
pidsystems =
d.getPidsystems() != null && compareTrust(this, e) < 0
? d.getPidsystems()
: pidsystems;
certificates = d.getCertificates() != null && compareTrust(this, e)<0? d.getCertificates() : certificates;
certificates =
d.getCertificates() != null && compareTrust(this, e) < 0
? d.getCertificates()
: certificates;
policies = mergeLists(policies, d.getPolicies());
journal = d.getJournal() != null && compareTrust(this, e)<0? d.getJournal() : journal;
journal = d.getJournal() != null && compareTrust(this, e) < 0 ? d.getJournal() : journal;
mergeOAFDataInfo(e);
}
@ -436,45 +519,81 @@ public class Datasource extends OafEntity implements Serializable {
if (o == null || getClass() != o.getClass()) return false;
if (!super.equals(o)) return false;
Datasource that = (Datasource) o;
return Objects.equals(datasourcetype, that.datasourcetype) &&
Objects.equals(openairecompatibility, that.openairecompatibility) &&
Objects.equals(officialname, that.officialname) &&
Objects.equals(englishname, that.englishname) &&
Objects.equals(websiteurl, that.websiteurl) &&
Objects.equals(logourl, that.logourl) &&
Objects.equals(contactemail, that.contactemail) &&
Objects.equals(namespaceprefix, that.namespaceprefix) &&
Objects.equals(latitude, that.latitude) &&
Objects.equals(longitude, that.longitude) &&
Objects.equals(dateofvalidation, that.dateofvalidation) &&
Objects.equals(description, that.description) &&
Objects.equals(subjects, that.subjects) &&
Objects.equals(odnumberofitems, that.odnumberofitems) &&
Objects.equals(odnumberofitemsdate, that.odnumberofitemsdate) &&
Objects.equals(odpolicies, that.odpolicies) &&
Objects.equals(odlanguages, that.odlanguages) &&
Objects.equals(odcontenttypes, that.odcontenttypes) &&
Objects.equals(accessinfopackage, that.accessinfopackage) &&
Objects.equals(releasestartdate, that.releasestartdate) &&
Objects.equals(releaseenddate, that.releaseenddate) &&
Objects.equals(missionstatementurl, that.missionstatementurl) &&
Objects.equals(dataprovider, that.dataprovider) &&
Objects.equals(serviceprovider, that.serviceprovider) &&
Objects.equals(databaseaccesstype, that.databaseaccesstype) &&
Objects.equals(datauploadtype, that.datauploadtype) &&
Objects.equals(databaseaccessrestriction, that.databaseaccessrestriction) &&
Objects.equals(datauploadrestriction, that.datauploadrestriction) &&
Objects.equals(versioning, that.versioning) &&
Objects.equals(citationguidelineurl, that.citationguidelineurl) &&
Objects.equals(qualitymanagementkind, that.qualitymanagementkind) &&
Objects.equals(pidsystems, that.pidsystems) &&
Objects.equals(certificates, that.certificates) &&
Objects.equals(policies, that.policies) &&
Objects.equals(journal, that.journal);
return Objects.equals(datasourcetype, that.datasourcetype)
&& Objects.equals(openairecompatibility, that.openairecompatibility)
&& Objects.equals(officialname, that.officialname)
&& Objects.equals(englishname, that.englishname)
&& Objects.equals(websiteurl, that.websiteurl)
&& Objects.equals(logourl, that.logourl)
&& Objects.equals(contactemail, that.contactemail)
&& Objects.equals(namespaceprefix, that.namespaceprefix)
&& Objects.equals(latitude, that.latitude)
&& Objects.equals(longitude, that.longitude)
&& Objects.equals(dateofvalidation, that.dateofvalidation)
&& Objects.equals(description, that.description)
&& Objects.equals(subjects, that.subjects)
&& Objects.equals(odnumberofitems, that.odnumberofitems)
&& Objects.equals(odnumberofitemsdate, that.odnumberofitemsdate)
&& Objects.equals(odpolicies, that.odpolicies)
&& Objects.equals(odlanguages, that.odlanguages)
&& Objects.equals(odcontenttypes, that.odcontenttypes)
&& Objects.equals(accessinfopackage, that.accessinfopackage)
&& Objects.equals(releasestartdate, that.releasestartdate)
&& Objects.equals(releaseenddate, that.releaseenddate)
&& Objects.equals(missionstatementurl, that.missionstatementurl)
&& Objects.equals(dataprovider, that.dataprovider)
&& Objects.equals(serviceprovider, that.serviceprovider)
&& Objects.equals(databaseaccesstype, that.databaseaccesstype)
&& Objects.equals(datauploadtype, that.datauploadtype)
&& Objects.equals(databaseaccessrestriction, that.databaseaccessrestriction)
&& Objects.equals(datauploadrestriction, that.datauploadrestriction)
&& Objects.equals(versioning, that.versioning)
&& Objects.equals(citationguidelineurl, that.citationguidelineurl)
&& Objects.equals(qualitymanagementkind, that.qualitymanagementkind)
&& Objects.equals(pidsystems, that.pidsystems)
&& Objects.equals(certificates, that.certificates)
&& Objects.equals(policies, that.policies)
&& Objects.equals(journal, that.journal);
}
@Override
public int hashCode() {
return Objects.hash(super.hashCode(), datasourcetype, openairecompatibility, officialname, englishname, websiteurl, logourl, contactemail, namespaceprefix, latitude, longitude, dateofvalidation, description, subjects, odnumberofitems, odnumberofitemsdate, odpolicies, odlanguages, odcontenttypes, accessinfopackage, releasestartdate, releaseenddate, missionstatementurl, dataprovider, serviceprovider, databaseaccesstype, datauploadtype, databaseaccessrestriction, datauploadrestriction, versioning, citationguidelineurl, qualitymanagementkind, pidsystems, certificates, policies, journal);
return Objects.hash(
super.hashCode(),
datasourcetype,
openairecompatibility,
officialname,
englishname,
websiteurl,
logourl,
contactemail,
namespaceprefix,
latitude,
longitude,
dateofvalidation,
description,
subjects,
odnumberofitems,
odnumberofitemsdate,
odpolicies,
odlanguages,
odcontenttypes,
accessinfopackage,
releasestartdate,
releaseenddate,
missionstatementurl,
dataprovider,
serviceprovider,
databaseaccesstype,
datauploadtype,
databaseaccessrestriction,
datauploadrestriction,
versioning,
citationguidelineurl,
qualitymanagementkind,
pidsystems,
certificates,
policies,
journal);
}
}

View File

@ -97,18 +97,19 @@ public class ExternalReference implements Serializable {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
ExternalReference that = (ExternalReference) o;
return Objects.equals(sitename, that.sitename) &&
Objects.equals(label, that.label) &&
Objects.equals(url, that.url) &&
Objects.equals(description, that.description) &&
Objects.equals(qualifier, that.qualifier) &&
Objects.equals(refidentifier, that.refidentifier) &&
Objects.equals(query, that.query) &&
Objects.equals(dataInfo, that.dataInfo);
return Objects.equals(sitename, that.sitename)
&& Objects.equals(label, that.label)
&& Objects.equals(url, that.url)
&& Objects.equals(description, that.description)
&& Objects.equals(qualifier, that.qualifier)
&& Objects.equals(refidentifier, that.refidentifier)
&& Objects.equals(query, that.query)
&& Objects.equals(dataInfo, that.dataInfo);
}
@Override
public int hashCode() {
return Objects.hash(sitename, label, url, description, qualifier, refidentifier, query, dataInfo);
return Objects.hash(
sitename, label, url, description, qualifier, refidentifier, query, dataInfo);
}
}

View File

@ -60,11 +60,11 @@ public class ExtraInfo implements Serializable {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
ExtraInfo extraInfo = (ExtraInfo) o;
return Objects.equals(name, extraInfo.name) &&
Objects.equals(typology, extraInfo.typology) &&
Objects.equals(provenance, extraInfo.provenance) &&
Objects.equals(trust, extraInfo.trust) &&
Objects.equals(value, extraInfo.value);
return Objects.equals(name, extraInfo.name)
&& Objects.equals(typology, extraInfo.typology)
&& Objects.equals(provenance, extraInfo.provenance)
&& Objects.equals(trust, extraInfo.trust)
&& Objects.equals(value, extraInfo.value);
}
@Override

View File

@ -31,12 +31,9 @@ public class Field<T> implements Serializable {
@Override
public boolean equals(Object obj) {
if (this == obj)
return true;
if (obj == null)
return false;
if (getClass() != obj.getClass())
return false;
if (this == obj) return true;
if (obj == null) return false;
if (getClass() != obj.getClass()) return false;
Field<T> other = (Field<T>) obj;
return getValue().equals(other.getValue());
}

View File

@ -1,9 +1,8 @@
package eu.dnetlib.dhp.schema.oaf;
import com.fasterxml.jackson.annotation.JsonIgnore;
import org.apache.commons.lang3.StringUtils;
import java.io.Serializable;
import org.apache.commons.lang3.StringUtils;
public class GeoLocation implements Serializable {
@ -39,13 +38,17 @@ public class GeoLocation implements Serializable {
@JsonIgnore
public boolean isBlank() {
return StringUtils.isBlank(point) &&
StringUtils.isBlank(box) &&
StringUtils.isBlank(place);
return StringUtils.isBlank(point) && StringUtils.isBlank(box) && StringUtils.isBlank(place);
}
public String toComparableString() {
return isBlank()?"":String.format("%s::%s%s", point != null ? point.toLowerCase() : "", box != null ? box.toLowerCase() : "", place != null ? place.toLowerCase() : "");
return isBlank()
? ""
: String.format(
"%s::%s%s",
point != null ? point.toLowerCase() : "",
box != null ? box.toLowerCase() : "",
place != null ? place.toLowerCase() : "");
}
@Override
@ -55,16 +58,12 @@ public class GeoLocation implements Serializable {
@Override
public boolean equals(Object obj) {
if (this == obj)
return true;
if (obj == null)
return false;
if (getClass() != obj.getClass())
return false;
if (this == obj) return true;
if (obj == null) return false;
if (getClass() != obj.getClass()) return false;
GeoLocation other = (GeoLocation) obj;
return toComparableString()
.equals(other.toComparableString());
return toComparableString().equals(other.toComparableString());
}
}

View File

@ -22,13 +22,15 @@ public class Instance implements Serializable {
private Field<String> dateofacceptance;
// ( article | book ) processing charges. Defined here to cope with possible wrongly typed results
// ( article | book ) processing charges. Defined here to cope with possible wrongly typed
// results
private Field<String> processingchargeamount;
// currency - alphabetic code describe in ISO-4217. Defined here to cope with possible wrongly typed results
// currency - alphabetic code describe in ISO-4217. Defined here to cope with possible wrongly
// typed results
private Field<String> processingchargecurrency;
private Field<String> refereed; //peer-review status
private Field<String> refereed; // peer-review status
public Field<String> getLicense() {
return license;
@ -118,12 +120,19 @@ public class Instance implements Serializable {
this.refereed = refereed;
}
public String toComparableString(){
return String.format("%s::%s::%s::%s",
hostedby != null && hostedby.getKey()!= null ? hostedby.getKey().toLowerCase() : "",
accessright!= null && accessright.getClassid()!= null ? accessright.getClassid() : "",
instancetype!= null && instancetype.getClassid()!= null ? instancetype.getClassid() : "",
url != null ? url:"");
public String toComparableString() {
return String.format(
"%s::%s::%s::%s",
hostedby != null && hostedby.getKey() != null
? hostedby.getKey().toLowerCase()
: "",
accessright != null && accessright.getClassid() != null
? accessright.getClassid()
: "",
instancetype != null && instancetype.getClassid() != null
? instancetype.getClassid()
: "",
url != null ? url : "");
}
@Override
@ -133,16 +142,12 @@ public class Instance implements Serializable {
@Override
public boolean equals(Object obj) {
if (this == obj)
return true;
if (obj == null)
return false;
if (getClass() != obj.getClass())
return false;
if (this == obj) return true;
if (obj == null) return false;
if (getClass() != obj.getClass()) return false;
Instance other = (Instance) obj;
return toComparableString()
.equals(other.toComparableString());
return toComparableString().equals(other.toComparableString());
}
}

View File

@ -130,22 +130,34 @@ public class Journal implements Serializable {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
Journal journal = (Journal) o;
return Objects.equals(name, journal.name) &&
Objects.equals(issnPrinted, journal.issnPrinted) &&
Objects.equals(issnOnline, journal.issnOnline) &&
Objects.equals(issnLinking, journal.issnLinking) &&
Objects.equals(ep, journal.ep) &&
Objects.equals(iss, journal.iss) &&
Objects.equals(sp, journal.sp) &&
Objects.equals(vol, journal.vol) &&
Objects.equals(edition, journal.edition) &&
Objects.equals(conferenceplace, journal.conferenceplace) &&
Objects.equals(conferencedate, journal.conferencedate) &&
Objects.equals(dataInfo, journal.dataInfo);
return Objects.equals(name, journal.name)
&& Objects.equals(issnPrinted, journal.issnPrinted)
&& Objects.equals(issnOnline, journal.issnOnline)
&& Objects.equals(issnLinking, journal.issnLinking)
&& Objects.equals(ep, journal.ep)
&& Objects.equals(iss, journal.iss)
&& Objects.equals(sp, journal.sp)
&& Objects.equals(vol, journal.vol)
&& Objects.equals(edition, journal.edition)
&& Objects.equals(conferenceplace, journal.conferenceplace)
&& Objects.equals(conferencedate, journal.conferencedate)
&& Objects.equals(dataInfo, journal.dataInfo);
}
@Override
public int hashCode() {
return Objects.hash(name, issnPrinted, issnOnline, issnLinking, ep, iss, sp, vol, edition, conferenceplace, conferencedate, dataInfo);
return Objects.hash(
name,
issnPrinted,
issnOnline,
issnLinking,
ep,
iss,
sp,
vol,
edition,
conferenceplace,
conferencedate,
dataInfo);
}
}

View File

@ -1,9 +1,8 @@
package eu.dnetlib.dhp.schema.oaf;
import com.fasterxml.jackson.annotation.JsonIgnore;
import org.apache.commons.lang3.StringUtils;
import java.io.Serializable;
import org.apache.commons.lang3.StringUtils;
public class KeyValue implements Serializable {
@ -38,7 +37,12 @@ public class KeyValue implements Serializable {
}
public String toComparableString() {
return isBlank()?"":String.format("%s::%s", key != null ? key.toLowerCase() : "", value != null ? value.toLowerCase() : "");
return isBlank()
? ""
: String.format(
"%s::%s",
key != null ? key.toLowerCase() : "",
value != null ? value.toLowerCase() : "");
}
@JsonIgnore
@ -53,12 +57,9 @@ public class KeyValue implements Serializable {
@Override
public boolean equals(Object obj) {
if (this == obj)
return true;
if (obj == null)
return false;
if (getClass() != obj.getClass())
return false;
if (this == obj) return true;
if (obj == null) return false;
if (getClass() != obj.getClass()) return false;
KeyValue other = (KeyValue) obj;

View File

@ -25,24 +25,18 @@ public abstract class Oaf implements Serializable {
this.lastupdatetimestamp = lastupdatetimestamp;
}
public void mergeOAFDataInfo(Oaf e) {
if (e.getDataInfo()!= null && compareTrust(this,e)<0)
dataInfo = e.getDataInfo();
if (e.getDataInfo() != null && compareTrust(this, e) < 0) dataInfo = e.getDataInfo();
}
protected String extractTrust(Oaf e) {
if (e == null || e.getDataInfo()== null || e.getDataInfo().getTrust()== null)
if (e == null || e.getDataInfo() == null || e.getDataInfo().getTrust() == null)
return "0.0";
return e.getDataInfo().getTrust();
}
protected int compareTrust(Oaf a, Oaf b) {
return extractTrust(a).compareTo(extractTrust(b));
}
@Override
@ -50,8 +44,8 @@ public abstract class Oaf implements Serializable {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
Oaf oaf = (Oaf) o;
return Objects.equals(dataInfo, oaf.dataInfo) &&
Objects.equals(lastupdatetimestamp, oaf.lastupdatetimestamp);
return Objects.equals(dataInfo, oaf.dataInfo)
&& Objects.equals(lastupdatetimestamp, oaf.lastupdatetimestamp);
}
@Override

View File

@ -86,11 +86,9 @@ public abstract class OafEntity extends Oaf implements Serializable {
this.oaiprovenance = oaiprovenance;
}
public void mergeFrom(OafEntity e) {
if (e == null)
return;
if (e == null) return;
originalId = mergeLists(originalId, e.getOriginalId());
@ -108,12 +106,15 @@ public abstract class OafEntity extends Oaf implements Serializable {
if (e.getOaiprovenance() != null && compareTrust(this, e) < 0)
oaiprovenance = e.getOaiprovenance();
}
protected <T> List<T> mergeLists(final List<T>... lists) {
return Arrays.stream(lists).filter(Objects::nonNull).flatMap(List::stream).distinct().collect(Collectors.toList());
return Arrays.stream(lists)
.filter(Objects::nonNull)
.flatMap(List::stream)
.distinct()
.collect(Collectors.toList());
}
@Override
@ -122,18 +123,27 @@ public abstract class OafEntity extends Oaf implements Serializable {
if (o == null || getClass() != o.getClass()) return false;
if (!super.equals(o)) return false;
OafEntity oafEntity = (OafEntity) o;
return Objects.equals(id, oafEntity.id) &&
Objects.equals(originalId, oafEntity.originalId) &&
Objects.equals(collectedfrom, oafEntity.collectedfrom) &&
Objects.equals(pid, oafEntity.pid) &&
Objects.equals(dateofcollection, oafEntity.dateofcollection) &&
Objects.equals(dateoftransformation, oafEntity.dateoftransformation) &&
Objects.equals(extraInfo, oafEntity.extraInfo) &&
Objects.equals(oaiprovenance, oafEntity.oaiprovenance);
return Objects.equals(id, oafEntity.id)
&& Objects.equals(originalId, oafEntity.originalId)
&& Objects.equals(collectedfrom, oafEntity.collectedfrom)
&& Objects.equals(pid, oafEntity.pid)
&& Objects.equals(dateofcollection, oafEntity.dateofcollection)
&& Objects.equals(dateoftransformation, oafEntity.dateoftransformation)
&& Objects.equals(extraInfo, oafEntity.extraInfo)
&& Objects.equals(oaiprovenance, oafEntity.oaiprovenance);
}
@Override
public int hashCode() {
return Objects.hash(super.hashCode(), id, originalId, collectedfrom, pid, dateofcollection, dateoftransformation, extraInfo, oaiprovenance);
return Objects.hash(
super.hashCode(),
id,
originalId,
collectedfrom,
pid,
dateofcollection,
dateoftransformation,
extraInfo,
oaiprovenance);
}
}

View File

@ -122,7 +122,8 @@ public class Organization extends OafEntity implements Serializable {
return ecinternationalorganizationeurinterests;
}
public void setEcinternationalorganizationeurinterests(Field<String> ecinternationalorganizationeurinterests) {
public void setEcinternationalorganizationeurinterests(
Field<String> ecinternationalorganizationeurinterests) {
this.ecinternationalorganizationeurinterests = ecinternationalorganizationeurinterests;
}
@ -166,32 +167,70 @@ public class Organization extends OafEntity implements Serializable {
this.country = country;
}
@Override
public void mergeFrom(OafEntity e) {
super.mergeFrom(e);
if (!Organization.class.isAssignableFrom(e.getClass())){
if (!Organization.class.isAssignableFrom(e.getClass())) {
return;
}
final Organization o = (Organization) e;
legalshortname = o.getLegalshortname() != null && compareTrust(this, e)<0? o.getLegalshortname() : legalshortname;
legalname = o.getLegalname() != null && compareTrust(this, e)<0 ? o.getLegalname() : legalname;
legalshortname =
o.getLegalshortname() != null && compareTrust(this, e) < 0
? o.getLegalshortname()
: legalshortname;
legalname =
o.getLegalname() != null && compareTrust(this, e) < 0
? o.getLegalname()
: legalname;
alternativeNames = mergeLists(o.getAlternativeNames(), alternativeNames);
websiteurl = o.getWebsiteurl() != null && compareTrust(this, e)<0? o.getWebsiteurl() : websiteurl;
logourl = o.getLogourl() != null && compareTrust(this, e)<0? o.getLogourl() : logourl;
eclegalbody = o.getEclegalbody() != null && compareTrust(this, e)<0? o.getEclegalbody() : eclegalbody;
eclegalperson = o.getEclegalperson() != null && compareTrust(this, e)<0? o.getEclegalperson() : eclegalperson;
ecnonprofit = o.getEcnonprofit() != null && compareTrust(this, e)<0? o.getEcnonprofit() : ecnonprofit;
ecresearchorganization = o.getEcresearchorganization() != null && compareTrust(this, e)<0? o.getEcresearchorganization() : ecresearchorganization;
echighereducation = o.getEchighereducation() != null && compareTrust(this, e)<0? o.getEchighereducation() : echighereducation;
ecinternationalorganizationeurinterests = o.getEcinternationalorganizationeurinterests() != null && compareTrust(this, e)<0? o.getEcinternationalorganizationeurinterests() : ecinternationalorganizationeurinterests;
ecinternationalorganization = o.getEcinternationalorganization() != null && compareTrust(this, e)<0? o.getEcinternationalorganization() : ecinternationalorganization;
ecenterprise = o.getEcenterprise() != null && compareTrust(this, e)<0? o.getEcenterprise() :ecenterprise;
ecsmevalidated = o.getEcsmevalidated() != null && compareTrust(this, e)<0? o.getEcsmevalidated() :ecsmevalidated;
ecnutscode = o.getEcnutscode() != null && compareTrust(this, e)<0? o.getEcnutscode() :ecnutscode;
country = o.getCountry() != null && compareTrust(this, e)<0 ? o.getCountry() :country;
websiteurl =
o.getWebsiteurl() != null && compareTrust(this, e) < 0
? o.getWebsiteurl()
: websiteurl;
logourl = o.getLogourl() != null && compareTrust(this, e) < 0 ? o.getLogourl() : logourl;
eclegalbody =
o.getEclegalbody() != null && compareTrust(this, e) < 0
? o.getEclegalbody()
: eclegalbody;
eclegalperson =
o.getEclegalperson() != null && compareTrust(this, e) < 0
? o.getEclegalperson()
: eclegalperson;
ecnonprofit =
o.getEcnonprofit() != null && compareTrust(this, e) < 0
? o.getEcnonprofit()
: ecnonprofit;
ecresearchorganization =
o.getEcresearchorganization() != null && compareTrust(this, e) < 0
? o.getEcresearchorganization()
: ecresearchorganization;
echighereducation =
o.getEchighereducation() != null && compareTrust(this, e) < 0
? o.getEchighereducation()
: echighereducation;
ecinternationalorganizationeurinterests =
o.getEcinternationalorganizationeurinterests() != null && compareTrust(this, e) < 0
? o.getEcinternationalorganizationeurinterests()
: ecinternationalorganizationeurinterests;
ecinternationalorganization =
o.getEcinternationalorganization() != null && compareTrust(this, e) < 0
? o.getEcinternationalorganization()
: ecinternationalorganization;
ecenterprise =
o.getEcenterprise() != null && compareTrust(this, e) < 0
? o.getEcenterprise()
: ecenterprise;
ecsmevalidated =
o.getEcsmevalidated() != null && compareTrust(this, e) < 0
? o.getEcsmevalidated()
: ecsmevalidated;
ecnutscode =
o.getEcnutscode() != null && compareTrust(this, e) < 0
? o.getEcnutscode()
: ecnutscode;
country = o.getCountry() != null && compareTrust(this, e) < 0 ? o.getCountry() : country;
mergeOAFDataInfo(o);
}
@ -201,26 +240,45 @@ public class Organization extends OafEntity implements Serializable {
if (o == null || getClass() != o.getClass()) return false;
if (!super.equals(o)) return false;
Organization that = (Organization) o;
return Objects.equals(legalshortname, that.legalshortname) &&
Objects.equals(legalname, that.legalname) &&
Objects.equals(alternativeNames, that.alternativeNames) &&
Objects.equals(websiteurl, that.websiteurl) &&
Objects.equals(logourl, that.logourl) &&
Objects.equals(eclegalbody, that.eclegalbody) &&
Objects.equals(eclegalperson, that.eclegalperson) &&
Objects.equals(ecnonprofit, that.ecnonprofit) &&
Objects.equals(ecresearchorganization, that.ecresearchorganization) &&
Objects.equals(echighereducation, that.echighereducation) &&
Objects.equals(ecinternationalorganizationeurinterests, that.ecinternationalorganizationeurinterests) &&
Objects.equals(ecinternationalorganization, that.ecinternationalorganization) &&
Objects.equals(ecenterprise, that.ecenterprise) &&
Objects.equals(ecsmevalidated, that.ecsmevalidated) &&
Objects.equals(ecnutscode, that.ecnutscode) &&
Objects.equals(country, that.country);
return Objects.equals(legalshortname, that.legalshortname)
&& Objects.equals(legalname, that.legalname)
&& Objects.equals(alternativeNames, that.alternativeNames)
&& Objects.equals(websiteurl, that.websiteurl)
&& Objects.equals(logourl, that.logourl)
&& Objects.equals(eclegalbody, that.eclegalbody)
&& Objects.equals(eclegalperson, that.eclegalperson)
&& Objects.equals(ecnonprofit, that.ecnonprofit)
&& Objects.equals(ecresearchorganization, that.ecresearchorganization)
&& Objects.equals(echighereducation, that.echighereducation)
&& Objects.equals(
ecinternationalorganizationeurinterests,
that.ecinternationalorganizationeurinterests)
&& Objects.equals(ecinternationalorganization, that.ecinternationalorganization)
&& Objects.equals(ecenterprise, that.ecenterprise)
&& Objects.equals(ecsmevalidated, that.ecsmevalidated)
&& Objects.equals(ecnutscode, that.ecnutscode)
&& Objects.equals(country, that.country);
}
@Override
public int hashCode() {
return Objects.hash(super.hashCode(), legalshortname, legalname, alternativeNames, websiteurl, logourl, eclegalbody, eclegalperson, ecnonprofit, ecresearchorganization, echighereducation, ecinternationalorganizationeurinterests, ecinternationalorganization, ecenterprise, ecsmevalidated, ecnutscode, country);
return Objects.hash(
super.hashCode(),
legalshortname,
legalname,
alternativeNames,
websiteurl,
logourl,
eclegalbody,
eclegalperson,
ecnonprofit,
ecresearchorganization,
echighereducation,
ecinternationalorganizationeurinterests,
ecinternationalorganization,
ecenterprise,
ecsmevalidated,
ecnutscode,
country);
}
}

View File

@ -70,16 +70,17 @@ public class OriginDescription implements Serializable {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
OriginDescription that = (OriginDescription) o;
return Objects.equals(harvestDate, that.harvestDate) &&
Objects.equals(altered, that.altered) &&
Objects.equals(baseURL, that.baseURL) &&
Objects.equals(identifier, that.identifier) &&
Objects.equals(datestamp, that.datestamp) &&
Objects.equals(metadataNamespace, that.metadataNamespace);
return Objects.equals(harvestDate, that.harvestDate)
&& Objects.equals(altered, that.altered)
&& Objects.equals(baseURL, that.baseURL)
&& Objects.equals(identifier, that.identifier)
&& Objects.equals(datestamp, that.datestamp)
&& Objects.equals(metadataNamespace, that.metadataNamespace);
}
@Override
public int hashCode() {
return Objects.hash(harvestDate, altered, baseURL, identifier, datestamp, metadataNamespace);
return Objects.hash(
harvestDate, altered, baseURL, identifier, datestamp, metadataNamespace);
}
}

View File

@ -40,11 +40,11 @@ public class OtherResearchProduct extends Result implements Serializable {
public void mergeFrom(OafEntity e) {
super.mergeFrom(e);
if (!OtherResearchProduct.class.isAssignableFrom(e.getClass())){
if (!OtherResearchProduct.class.isAssignableFrom(e.getClass())) {
return;
}
OtherResearchProduct o = (OtherResearchProduct)e;
OtherResearchProduct o = (OtherResearchProduct) e;
contactperson = mergeLists(contactperson, o.getContactperson());
contactgroup = mergeLists(contactgroup, o.getContactgroup());
@ -58,9 +58,9 @@ public class OtherResearchProduct extends Result implements Serializable {
if (o == null || getClass() != o.getClass()) return false;
if (!super.equals(o)) return false;
OtherResearchProduct that = (OtherResearchProduct) o;
return Objects.equals(contactperson, that.contactperson) &&
Objects.equals(contactgroup, that.contactgroup) &&
Objects.equals(tool, that.tool);
return Objects.equals(contactperson, that.contactperson)
&& Objects.equals(contactgroup, that.contactgroup)
&& Objects.equals(tool, that.tool);
}
@Override

View File

@ -266,43 +266,90 @@ public class Project extends OafEntity implements Serializable {
this.fundedamount = fundedamount;
}
@Override
public void mergeFrom(OafEntity e) {
super.mergeFrom(e);
if (!Project.class.isAssignableFrom(e.getClass())){
if (!Project.class.isAssignableFrom(e.getClass())) {
return;
}
Project p = (Project)e;
Project p = (Project) e;
websiteurl= p.getWebsiteurl()!= null && compareTrust(this,e)<0?p.getWebsiteurl():websiteurl;
code= p.getCode()!=null && compareTrust(this,e)<0?p.getCode():code;
acronym= p.getAcronym()!= null && compareTrust(this,e)<0?p.getAcronym():acronym;
title= p.getTitle()!= null && compareTrust(this,e)<0?p.getTitle():title;
startdate= p.getStartdate()!=null && compareTrust(this,e)<0?p.getStartdate():startdate;
enddate= p.getEnddate()!=null && compareTrust(this,e)<0?p.getEnddate():enddate;
callidentifier= p.getCallidentifier()!=null && compareTrust(this,e)<0?p.getCallidentifier():callidentifier;
keywords= p.getKeywords()!=null && compareTrust(this,e)<0?p.getKeywords():keywords;
duration= p.getDuration()!=null && compareTrust(this,e)<0?p.getDuration():duration;
ecsc39= p.getEcsc39()!=null && compareTrust(this,e)<0?p.getEcsc39():ecsc39;
oamandatepublications= p.getOamandatepublications()!=null && compareTrust(this,e)<0?p.getOamandatepublications():oamandatepublications;
ecarticle29_3= p.getEcarticle29_3()!=null && compareTrust(this,e)<0?p.getEcarticle29_3():ecarticle29_3;
subjects= mergeLists(subjects, p.getSubjects());
fundingtree= mergeLists(fundingtree, p.getFundingtree());
contracttype= p.getContracttype()!=null && compareTrust(this,e)<0?p.getContracttype():contracttype;
optional1= p.getOptional1()!=null && compareTrust(this,e)<0?p.getOptional1():optional1;
optional2= p.getOptional2()!=null && compareTrust(this,e)<0?p.getOptional2():optional2;
jsonextrainfo= p.getJsonextrainfo()!=null && compareTrust(this,e)<0?p.getJsonextrainfo():jsonextrainfo;
contactfullname= p.getContactfullname()!=null && compareTrust(this,e)<0?p.getContactfullname():contactfullname;
contactfax= p.getContactfax()!=null && compareTrust(this,e)<0?p.getContactfax():contactfax;
contactphone= p.getContactphone()!=null && compareTrust(this,e)<0?p.getContactphone():contactphone;
contactemail= p.getContactemail()!=null && compareTrust(this,e)<0?p.getContactemail():contactemail;
summary= p.getSummary()!=null && compareTrust(this,e)<0?p.getSummary():summary;
currency= p.getCurrency()!=null && compareTrust(this,e)<0?p.getCurrency():currency;
totalcost= p.getTotalcost()!=null && compareTrust(this,e)<0?p.getTotalcost():totalcost;
fundedamount= p.getFundedamount()!= null && compareTrust(this,e)<0?p.getFundedamount():fundedamount;
websiteurl =
p.getWebsiteurl() != null && compareTrust(this, e) < 0
? p.getWebsiteurl()
: websiteurl;
code = p.getCode() != null && compareTrust(this, e) < 0 ? p.getCode() : code;
acronym = p.getAcronym() != null && compareTrust(this, e) < 0 ? p.getAcronym() : acronym;
title = p.getTitle() != null && compareTrust(this, e) < 0 ? p.getTitle() : title;
startdate =
p.getStartdate() != null && compareTrust(this, e) < 0
? p.getStartdate()
: startdate;
enddate = p.getEnddate() != null && compareTrust(this, e) < 0 ? p.getEnddate() : enddate;
callidentifier =
p.getCallidentifier() != null && compareTrust(this, e) < 0
? p.getCallidentifier()
: callidentifier;
keywords =
p.getKeywords() != null && compareTrust(this, e) < 0 ? p.getKeywords() : keywords;
duration =
p.getDuration() != null && compareTrust(this, e) < 0 ? p.getDuration() : duration;
ecsc39 = p.getEcsc39() != null && compareTrust(this, e) < 0 ? p.getEcsc39() : ecsc39;
oamandatepublications =
p.getOamandatepublications() != null && compareTrust(this, e) < 0
? p.getOamandatepublications()
: oamandatepublications;
ecarticle29_3 =
p.getEcarticle29_3() != null && compareTrust(this, e) < 0
? p.getEcarticle29_3()
: ecarticle29_3;
subjects = mergeLists(subjects, p.getSubjects());
fundingtree = mergeLists(fundingtree, p.getFundingtree());
contracttype =
p.getContracttype() != null && compareTrust(this, e) < 0
? p.getContracttype()
: contracttype;
optional1 =
p.getOptional1() != null && compareTrust(this, e) < 0
? p.getOptional1()
: optional1;
optional2 =
p.getOptional2() != null && compareTrust(this, e) < 0
? p.getOptional2()
: optional2;
jsonextrainfo =
p.getJsonextrainfo() != null && compareTrust(this, e) < 0
? p.getJsonextrainfo()
: jsonextrainfo;
contactfullname =
p.getContactfullname() != null && compareTrust(this, e) < 0
? p.getContactfullname()
: contactfullname;
contactfax =
p.getContactfax() != null && compareTrust(this, e) < 0
? p.getContactfax()
: contactfax;
contactphone =
p.getContactphone() != null && compareTrust(this, e) < 0
? p.getContactphone()
: contactphone;
contactemail =
p.getContactemail() != null && compareTrust(this, e) < 0
? p.getContactemail()
: contactemail;
summary = p.getSummary() != null && compareTrust(this, e) < 0 ? p.getSummary() : summary;
currency =
p.getCurrency() != null && compareTrust(this, e) < 0 ? p.getCurrency() : currency;
totalcost =
p.getTotalcost() != null && compareTrust(this, e) < 0
? p.getTotalcost()
: totalcost;
fundedamount =
p.getFundedamount() != null && compareTrust(this, e) < 0
? p.getFundedamount()
: fundedamount;
mergeOAFDataInfo(e);
}
@ -312,36 +359,63 @@ public class Project extends OafEntity implements Serializable {
if (o == null || getClass() != o.getClass()) return false;
if (!super.equals(o)) return false;
Project project = (Project) o;
return Objects.equals(websiteurl, project.websiteurl) &&
Objects.equals(code, project.code) &&
Objects.equals(acronym, project.acronym) &&
Objects.equals(title, project.title) &&
Objects.equals(startdate, project.startdate) &&
Objects.equals(enddate, project.enddate) &&
Objects.equals(callidentifier, project.callidentifier) &&
Objects.equals(keywords, project.keywords) &&
Objects.equals(duration, project.duration) &&
Objects.equals(ecsc39, project.ecsc39) &&
Objects.equals(oamandatepublications, project.oamandatepublications) &&
Objects.equals(ecarticle29_3, project.ecarticle29_3) &&
Objects.equals(subjects, project.subjects) &&
Objects.equals(fundingtree, project.fundingtree) &&
Objects.equals(contracttype, project.contracttype) &&
Objects.equals(optional1, project.optional1) &&
Objects.equals(optional2, project.optional2) &&
Objects.equals(jsonextrainfo, project.jsonextrainfo) &&
Objects.equals(contactfullname, project.contactfullname) &&
Objects.equals(contactfax, project.contactfax) &&
Objects.equals(contactphone, project.contactphone) &&
Objects.equals(contactemail, project.contactemail) &&
Objects.equals(summary, project.summary) &&
Objects.equals(currency, project.currency) &&
Objects.equals(totalcost, project.totalcost) &&
Objects.equals(fundedamount, project.fundedamount);
return Objects.equals(websiteurl, project.websiteurl)
&& Objects.equals(code, project.code)
&& Objects.equals(acronym, project.acronym)
&& Objects.equals(title, project.title)
&& Objects.equals(startdate, project.startdate)
&& Objects.equals(enddate, project.enddate)
&& Objects.equals(callidentifier, project.callidentifier)
&& Objects.equals(keywords, project.keywords)
&& Objects.equals(duration, project.duration)
&& Objects.equals(ecsc39, project.ecsc39)
&& Objects.equals(oamandatepublications, project.oamandatepublications)
&& Objects.equals(ecarticle29_3, project.ecarticle29_3)
&& Objects.equals(subjects, project.subjects)
&& Objects.equals(fundingtree, project.fundingtree)
&& Objects.equals(contracttype, project.contracttype)
&& Objects.equals(optional1, project.optional1)
&& Objects.equals(optional2, project.optional2)
&& Objects.equals(jsonextrainfo, project.jsonextrainfo)
&& Objects.equals(contactfullname, project.contactfullname)
&& Objects.equals(contactfax, project.contactfax)
&& Objects.equals(contactphone, project.contactphone)
&& Objects.equals(contactemail, project.contactemail)
&& Objects.equals(summary, project.summary)
&& Objects.equals(currency, project.currency)
&& Objects.equals(totalcost, project.totalcost)
&& Objects.equals(fundedamount, project.fundedamount);
}
@Override
public int hashCode() {
return Objects.hash(super.hashCode(), websiteurl, code, acronym, title, startdate, enddate, callidentifier, keywords, duration, ecsc39, oamandatepublications, ecarticle29_3, subjects, fundingtree, contracttype, optional1, optional2, jsonextrainfo, contactfullname, contactfax, contactphone, contactemail, summary, currency, totalcost, fundedamount);
return Objects.hash(
super.hashCode(),
websiteurl,
code,
acronym,
title,
startdate,
enddate,
callidentifier,
keywords,
duration,
ecsc39,
oamandatepublications,
ecarticle29_3,
subjects,
fundingtree,
contracttype,
optional1,
optional2,
jsonextrainfo,
contactfullname,
contactfax,
contactphone,
contactemail,
summary,
currency,
totalcost,
fundedamount);
}
}

View File

@ -20,14 +20,13 @@ public class Publication extends Result implements Serializable {
public void mergeFrom(OafEntity e) {
super.mergeFrom(e);
if (!Publication.class.isAssignableFrom(e.getClass())){
if (!Publication.class.isAssignableFrom(e.getClass())) {
return;
}
Publication p = (Publication) e;
if (p.getJournal() != null && compareTrust(this, e)<0)
journal = p.getJournal();
if (p.getJournal() != null && compareTrust(this, e) < 0) journal = p.getJournal();
mergeOAFDataInfo(e);
}

View File

@ -1,9 +1,8 @@
package eu.dnetlib.dhp.schema.oaf;
import com.fasterxml.jackson.annotation.JsonIgnore;
import org.apache.commons.lang3.StringUtils;
import java.io.Serializable;
import org.apache.commons.lang3.StringUtils;
public class Qualifier implements Serializable {
@ -45,7 +44,10 @@ public class Qualifier implements Serializable {
}
public String toComparableString() {
return isBlank()?"": String.format("%s::%s::%s::%s",
return isBlank()
? ""
: String.format(
"%s::%s::%s::%s",
classid != null ? classid : "",
classname != null ? classname : "",
schemeid != null ? schemeid : "",
@ -54,11 +56,12 @@ public class Qualifier implements Serializable {
@JsonIgnore
public boolean isBlank() {
return StringUtils.isBlank(classid) &&
StringUtils.isBlank(classname) &&
StringUtils.isBlank(schemeid) &&
StringUtils.isBlank(schemename);
return StringUtils.isBlank(classid)
&& StringUtils.isBlank(classname)
&& StringUtils.isBlank(schemeid)
&& StringUtils.isBlank(schemename);
}
@Override
public int hashCode() {
return toComparableString().hashCode();
@ -66,16 +69,12 @@ public class Qualifier implements Serializable {
@Override
public boolean equals(Object obj) {
if (this == obj)
return true;
if (obj == null)
return false;
if (getClass() != obj.getClass())
return false;
if (this == obj) return true;
if (obj == null) return false;
if (getClass() != obj.getClass()) return false;
Qualifier other = (Qualifier) obj;
return toComparableString()
.equals(other.toComparableString());
return toComparableString().equals(other.toComparableString());
}
}

View File

@ -1,11 +1,11 @@
package eu.dnetlib.dhp.schema.oaf;
import static com.google.common.base.Preconditions.checkArgument;
import java.util.*;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import static com.google.common.base.Preconditions.checkArgument;
public class Relation extends Oaf {
private String relType;
@ -70,16 +70,21 @@ public class Relation extends Oaf {
public void mergeFrom(final Relation r) {
checkArgument(Objects.equals(getSource(), r.getSource()),"source ids must be equal");
checkArgument(Objects.equals(getTarget(), r.getTarget()),"target ids must be equal");
checkArgument(Objects.equals(getRelType(), r.getRelType()),"relType(s) must be equal");
checkArgument(Objects.equals(getSubRelType(), r.getSubRelType()),"subRelType(s) must be equal");
checkArgument(Objects.equals(getRelClass(), r.getRelClass()),"relClass(es) must be equal");
checkArgument(Objects.equals(getSource(), r.getSource()), "source ids must be equal");
checkArgument(Objects.equals(getTarget(), r.getTarget()), "target ids must be equal");
checkArgument(Objects.equals(getRelType(), r.getRelType()), "relType(s) must be equal");
checkArgument(
Objects.equals(getSubRelType(), r.getSubRelType()), "subRelType(s) must be equal");
checkArgument(Objects.equals(getRelClass(), r.getRelClass()), "relClass(es) must be equal");
setCollectedFrom(
Stream
.concat(Optional.ofNullable(getCollectedFrom()).map(Collection::stream).orElse(Stream.empty()),
Optional.ofNullable(r.getCollectedFrom()).map(Collection::stream).orElse(Stream.empty()))
Stream.concat(
Optional.ofNullable(getCollectedFrom())
.map(Collection::stream)
.orElse(Stream.empty()),
Optional.ofNullable(r.getCollectedFrom())
.map(Collection::stream)
.orElse(Stream.empty()))
.distinct() // relies on KeyValue.equals
.collect(Collectors.toList()));
}
@ -89,16 +94,15 @@ public class Relation extends Oaf {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
Relation relation = (Relation) o;
return relType.equals(relation.relType) &&
subRelType.equals(relation.subRelType) &&
relClass.equals(relation.relClass) &&
source.equals(relation.source) &&
target.equals(relation.target);
return relType.equals(relation.relType)
&& subRelType.equals(relation.subRelType)
&& relClass.equals(relation.relClass)
&& source.equals(relation.source)
&& target.equals(relation.target);
}
@Override
public int hashCode() {
return Objects.hash(relType, subRelType, relClass, source, target, collectedFrom);
}
}

View File

@ -223,7 +223,7 @@ public class Result extends OafEntity implements Serializable {
public void mergeFrom(OafEntity e) {
super.mergeFrom(e);
if (!Result.class.isAssignableFrom(e.getClass())){
if (!Result.class.isAssignableFrom(e.getClass())) {
return;
}
@ -231,11 +231,9 @@ public class Result extends OafEntity implements Serializable {
instance = mergeLists(instance, r.getInstance());
if (r.getResulttype() != null && compareTrust(this, r) < 0)
resulttype = r.getResulttype();
if (r.getResulttype() != null && compareTrust(this, r) < 0) resulttype = r.getResulttype();
if (r.getLanguage() != null && compareTrust(this, r) < 0)
language = r.getLanguage();
if (r.getLanguage() != null && compareTrust(this, r) < 0) language = r.getLanguage();
country = mergeLists(country, r.getCountry());
@ -247,8 +245,7 @@ public class Result extends OafEntity implements Serializable {
description = longestLists(description, r.getDescription());
if (r.getPublisher() != null && compareTrust(this, r) < 0)
publisher = r.getPublisher();
if (r.getPublisher() != null && compareTrust(this, r) < 0) publisher = r.getPublisher();
if (r.getEmbargoenddate() != null && compareTrust(this, r) < 0)
embargoenddate = r.getEmbargoenddate();
@ -261,8 +258,7 @@ public class Result extends OafEntity implements Serializable {
contributor = mergeLists(contributor, r.getContributor());
if (r.getResourcetype() != null)
resourcetype = r.getResourcetype();
if (r.getResourcetype() != null) resourcetype = r.getResourcetype();
coverage = mergeLists(coverage, r.getCoverage());
@ -271,13 +267,21 @@ public class Result extends OafEntity implements Serializable {
externalReference = mergeLists(externalReference, r.getExternalReference());
}
private List<Field<String>> longestLists(List<Field<String>> a, List<Field<String>> b) {
if (a == null || b == null)
return a == null ? b : a;
if (a == null || b == null) return a == null ? b : a;
if (a.size() == b.size()) {
int msa = a.stream().filter(i -> i.getValue() != null).map(i -> i.getValue().length()).max(Comparator.naturalOrder()).orElse(0);
int msb = b.stream().filter(i -> i.getValue() != null).map(i -> i.getValue().length()).max(Comparator.naturalOrder()).orElse(0);
int msa =
a.stream()
.filter(i -> i.getValue() != null)
.map(i -> i.getValue().length())
.max(Comparator.naturalOrder())
.orElse(0);
int msb =
b.stream()
.filter(i -> i.getValue() != null)
.map(i -> i.getValue().length())
.max(Comparator.naturalOrder())
.orElse(0);
return msa > msb ? a : b;
}
return a.size() > b.size() ? a : b;
@ -289,31 +293,53 @@ public class Result extends OafEntity implements Serializable {
if (o == null || getClass() != o.getClass()) return false;
if (!super.equals(o)) return false;
Result result = (Result) o;
return Objects.equals(author, result.author) &&
Objects.equals(resulttype, result.resulttype) &&
Objects.equals(language, result.language) &&
Objects.equals(country, result.country) &&
Objects.equals(subject, result.subject) &&
Objects.equals(title, result.title) &&
Objects.equals(relevantdate, result.relevantdate) &&
Objects.equals(description, result.description) &&
Objects.equals(dateofacceptance, result.dateofacceptance) &&
Objects.equals(publisher, result.publisher) &&
Objects.equals(embargoenddate, result.embargoenddate) &&
Objects.equals(source, result.source) &&
Objects.equals(fulltext, result.fulltext) &&
Objects.equals(format, result.format) &&
Objects.equals(contributor, result.contributor) &&
Objects.equals(resourcetype, result.resourcetype) &&
Objects.equals(coverage, result.coverage) &&
Objects.equals(bestaccessright, result.bestaccessright) &&
Objects.equals(context, result.context) &&
Objects.equals(externalReference, result.externalReference) &&
Objects.equals(instance, result.instance);
return Objects.equals(author, result.author)
&& Objects.equals(resulttype, result.resulttype)
&& Objects.equals(language, result.language)
&& Objects.equals(country, result.country)
&& Objects.equals(subject, result.subject)
&& Objects.equals(title, result.title)
&& Objects.equals(relevantdate, result.relevantdate)
&& Objects.equals(description, result.description)
&& Objects.equals(dateofacceptance, result.dateofacceptance)
&& Objects.equals(publisher, result.publisher)
&& Objects.equals(embargoenddate, result.embargoenddate)
&& Objects.equals(source, result.source)
&& Objects.equals(fulltext, result.fulltext)
&& Objects.equals(format, result.format)
&& Objects.equals(contributor, result.contributor)
&& Objects.equals(resourcetype, result.resourcetype)
&& Objects.equals(coverage, result.coverage)
&& Objects.equals(bestaccessright, result.bestaccessright)
&& Objects.equals(context, result.context)
&& Objects.equals(externalReference, result.externalReference)
&& Objects.equals(instance, result.instance);
}
@Override
public int hashCode() {
return Objects.hash(super.hashCode(), author, resulttype, language, country, subject, title, relevantdate, description, dateofacceptance, publisher, embargoenddate, source, fulltext, format, contributor, resourcetype, coverage, bestaccessright, context, externalReference, instance);
return Objects.hash(
super.hashCode(),
author,
resulttype,
language,
country,
subject,
title,
relevantdate,
description,
dateofacceptance,
publisher,
embargoenddate,
source,
fulltext,
format,
contributor,
resourcetype,
coverage,
bestaccessright,
context,
externalReference,
instance);
}
}

View File

@ -50,7 +50,7 @@ public class Software extends Result implements Serializable {
public void mergeFrom(OafEntity e) {
super.mergeFrom(e);
if (!Software.class.isAssignableFrom(e.getClass())){
if (!Software.class.isAssignableFrom(e.getClass())) {
return;
}
@ -59,9 +59,15 @@ public class Software extends Result implements Serializable {
license = mergeLists(license, s.getLicense());
codeRepositoryUrl = s.getCodeRepositoryUrl()!= null && compareTrust(this, s)<0?s.getCodeRepositoryUrl():codeRepositoryUrl;
codeRepositoryUrl =
s.getCodeRepositoryUrl() != null && compareTrust(this, s) < 0
? s.getCodeRepositoryUrl()
: codeRepositoryUrl;
programmingLanguage= s.getProgrammingLanguage()!= null && compareTrust(this, s)<0?s.getProgrammingLanguage():programmingLanguage;
programmingLanguage =
s.getProgrammingLanguage() != null && compareTrust(this, s) < 0
? s.getProgrammingLanguage()
: programmingLanguage;
mergeOAFDataInfo(e);
}
@ -72,14 +78,19 @@ public class Software extends Result implements Serializable {
if (o == null || getClass() != o.getClass()) return false;
if (!super.equals(o)) return false;
Software software = (Software) o;
return Objects.equals(documentationUrl, software.documentationUrl) &&
Objects.equals(license, software.license) &&
Objects.equals(codeRepositoryUrl, software.codeRepositoryUrl) &&
Objects.equals(programmingLanguage, software.programmingLanguage);
return Objects.equals(documentationUrl, software.documentationUrl)
&& Objects.equals(license, software.license)
&& Objects.equals(codeRepositoryUrl, software.codeRepositoryUrl)
&& Objects.equals(programmingLanguage, software.programmingLanguage);
}
@Override
public int hashCode() {
return Objects.hash(super.hashCode(), documentationUrl, license, codeRepositoryUrl, programmingLanguage);
return Objects.hash(
super.hashCode(),
documentationUrl,
license,
codeRepositoryUrl,
programmingLanguage);
}
}

View File

@ -34,7 +34,7 @@ public class StructuredProperty implements Serializable {
this.dataInfo = dataInfo;
}
public String toComparableString(){
public String toComparableString() {
return value != null ? value.toLowerCase() : "";
}
@ -45,16 +45,12 @@ public class StructuredProperty implements Serializable {
@Override
public boolean equals(Object obj) {
if (this == obj)
return true;
if (obj == null)
return false;
if (getClass() != obj.getClass())
return false;
if (this == obj) return true;
if (obj == null) return false;
if (getClass() != obj.getClass()) return false;
StructuredProperty other = (StructuredProperty) obj;
return toComparableString()
.equals(other.toComparableString());
return toComparableString().equals(other.toComparableString());
}
}

View File

@ -2,12 +2,11 @@ package eu.dnetlib.dhp.schema.scholexplorer;
import eu.dnetlib.dhp.schema.oaf.Dataset;
import eu.dnetlib.dhp.schema.oaf.OafEntity;
import org.apache.commons.lang3.StringUtils;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.commons.lang3.StringUtils;
public class DLIDataset extends Dataset {
@ -47,17 +46,23 @@ public class DLIDataset extends Dataset {
DLIDataset p = (DLIDataset) e;
if (StringUtils.isBlank(completionStatus) && StringUtils.isNotBlank(p.completionStatus))
completionStatus = p.completionStatus;
if ("complete".equalsIgnoreCase(p.completionStatus))
completionStatus = "complete";
if ("complete".equalsIgnoreCase(p.completionStatus)) completionStatus = "complete";
dlicollectedfrom = mergeProvenance(dlicollectedfrom, p.getDlicollectedfrom());
}
private List<ProvenaceInfo> mergeProvenance(final List<ProvenaceInfo> a, final List<ProvenaceInfo> b) {
private List<ProvenaceInfo> mergeProvenance(
final List<ProvenaceInfo> a, final List<ProvenaceInfo> b) {
Map<String, ProvenaceInfo> result = new HashMap<>();
if (a != null)
a.forEach(p -> {
if (p != null && StringUtils.isNotBlank(p.getId()) && result.containsKey(p.getId())) {
if ("incomplete".equalsIgnoreCase(result.get(p.getId()).getCompletionStatus()) && StringUtils.isNotBlank(p.getCompletionStatus())) {
a.forEach(
p -> {
if (p != null
&& StringUtils.isNotBlank(p.getId())
&& result.containsKey(p.getId())) {
if ("incomplete"
.equalsIgnoreCase(
result.get(p.getId()).getCompletionStatus())
&& StringUtils.isNotBlank(p.getCompletionStatus())) {
result.put(p.getId(), p);
}
@ -65,9 +70,15 @@ public class DLIDataset extends Dataset {
result.put(p.getId(), p);
});
if (b != null)
b.forEach(p -> {
if (p != null && StringUtils.isNotBlank(p.getId()) && result.containsKey(p.getId())) {
if ("incomplete".equalsIgnoreCase(result.get(p.getId()).getCompletionStatus()) && StringUtils.isNotBlank(p.getCompletionStatus())) {
b.forEach(
p -> {
if (p != null
&& StringUtils.isNotBlank(p.getId())
&& result.containsKey(p.getId())) {
if ("incomplete"
.equalsIgnoreCase(
result.get(p.getId()).getCompletionStatus())
&& StringUtils.isNotBlank(p.getCompletionStatus())) {
result.put(p.getId(), p);
}

View File

@ -2,9 +2,9 @@ package eu.dnetlib.dhp.schema.scholexplorer;
import eu.dnetlib.dhp.schema.oaf.OafEntity;
import eu.dnetlib.dhp.schema.oaf.Publication;
import org.apache.commons.lang3.StringUtils;
import java.io.Serializable;
import java.util.*;
import org.apache.commons.lang3.StringUtils;
public class DLIPublication extends Publication implements Serializable {
@ -44,17 +44,23 @@ public class DLIPublication extends Publication implements Serializable {
DLIPublication p = (DLIPublication) e;
if (StringUtils.isBlank(completionStatus) && StringUtils.isNotBlank(p.completionStatus))
completionStatus = p.completionStatus;
if ("complete".equalsIgnoreCase(p.completionStatus))
completionStatus = "complete";
if ("complete".equalsIgnoreCase(p.completionStatus)) completionStatus = "complete";
dlicollectedfrom = mergeProvenance(dlicollectedfrom, p.getDlicollectedfrom());
}
private List<ProvenaceInfo> mergeProvenance(final List<ProvenaceInfo> a, final List<ProvenaceInfo> b) {
private List<ProvenaceInfo> mergeProvenance(
final List<ProvenaceInfo> a, final List<ProvenaceInfo> b) {
Map<String, ProvenaceInfo> result = new HashMap<>();
if (a != null)
a.forEach(p -> {
if (p != null && StringUtils.isNotBlank(p.getId()) && result.containsKey(p.getId())) {
if ("incomplete".equalsIgnoreCase(result.get(p.getId()).getCompletionStatus()) && StringUtils.isNotBlank(p.getCompletionStatus())) {
a.forEach(
p -> {
if (p != null
&& StringUtils.isNotBlank(p.getId())
&& result.containsKey(p.getId())) {
if ("incomplete"
.equalsIgnoreCase(
result.get(p.getId()).getCompletionStatus())
&& StringUtils.isNotBlank(p.getCompletionStatus())) {
result.put(p.getId(), p);
}
@ -62,9 +68,15 @@ public class DLIPublication extends Publication implements Serializable {
result.put(p.getId(), p);
});
if (b != null)
b.forEach(p -> {
if (p != null && StringUtils.isNotBlank(p.getId()) && result.containsKey(p.getId())) {
if ("incomplete".equalsIgnoreCase(result.get(p.getId()).getCompletionStatus()) && StringUtils.isNotBlank(p.getCompletionStatus())) {
b.forEach(
p -> {
if (p != null
&& StringUtils.isNotBlank(p.getId())
&& result.containsKey(p.getId())) {
if ("incomplete"
.equalsIgnoreCase(
result.get(p.getId()).getCompletionStatus())
&& StringUtils.isNotBlank(p.getCompletionStatus())) {
result.put(p.getId(), p);
}

View File

@ -1,15 +1,13 @@
package eu.dnetlib.dhp.schema.scholexplorer;
import eu.dnetlib.dhp.schema.oaf.Oaf;
import eu.dnetlib.dhp.schema.oaf.OafEntity;
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
import org.apache.commons.lang3.StringUtils;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.commons.lang3.StringUtils;
public class DLIUnknown extends Oaf implements Serializable {
@ -49,7 +47,6 @@ public class DLIUnknown extends Oaf implements Serializable {
this.id = id;
}
public List<StructuredProperty> getPid() {
return pid;
}
@ -75,17 +72,23 @@ public class DLIUnknown extends Oaf implements Serializable {
}
public void mergeFrom(DLIUnknown p) {
if ("complete".equalsIgnoreCase(p.completionStatus))
completionStatus = "complete";
if ("complete".equalsIgnoreCase(p.completionStatus)) completionStatus = "complete";
dlicollectedfrom = mergeProvenance(dlicollectedfrom, p.getDlicollectedfrom());
}
private List<ProvenaceInfo> mergeProvenance(final List<ProvenaceInfo> a, final List<ProvenaceInfo> b) {
private List<ProvenaceInfo> mergeProvenance(
final List<ProvenaceInfo> a, final List<ProvenaceInfo> b) {
Map<String, ProvenaceInfo> result = new HashMap<>();
if (a != null)
a.forEach(p -> {
if (p != null && StringUtils.isNotBlank(p.getId()) && result.containsKey(p.getId())) {
if ("incomplete".equalsIgnoreCase(result.get(p.getId()).getCompletionStatus()) && StringUtils.isNotBlank(p.getCompletionStatus())) {
a.forEach(
p -> {
if (p != null
&& StringUtils.isNotBlank(p.getId())
&& result.containsKey(p.getId())) {
if ("incomplete"
.equalsIgnoreCase(
result.get(p.getId()).getCompletionStatus())
&& StringUtils.isNotBlank(p.getCompletionStatus())) {
result.put(p.getId(), p);
}
@ -93,9 +96,15 @@ public class DLIUnknown extends Oaf implements Serializable {
result.put(p.getId(), p);
});
if (b != null)
b.forEach(p -> {
if (p != null && StringUtils.isNotBlank(p.getId()) && result.containsKey(p.getId())) {
if ("incomplete".equalsIgnoreCase(result.get(p.getId()).getCompletionStatus()) && StringUtils.isNotBlank(p.getCompletionStatus())) {
b.forEach(
p -> {
if (p != null
&& StringUtils.isNotBlank(p.getId())
&& result.containsKey(p.getId())) {
if ("incomplete"
.equalsIgnoreCase(
result.get(p.getId()).getCompletionStatus())
&& StringUtils.isNotBlank(p.getCompletionStatus())) {
result.put(p.getId(), p);
}

View File

@ -10,7 +10,7 @@ public class ProvenaceInfo implements Serializable {
private String completionStatus;
private String collectionMode ="collected";
private String collectionMode = "collected";
public String getId() {
return id;

View File

@ -1,18 +1,14 @@
package eu.dnetlib.dhp.schema.action;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.schema.oaf.Relation;
import org.apache.commons.lang3.StringUtils;
import org.junit.jupiter.api.Test;
import java.io.IOException;
import static org.junit.jupiter.api.Assertions.*;
/**
* @author claudio.atzori
*/
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.schema.oaf.Relation;
import java.io.IOException;
import org.apache.commons.lang3.StringUtils;
import org.junit.jupiter.api.Test;
/** @author claudio.atzori */
public class AtomicActionTest {
@Test
@ -36,7 +32,5 @@ public class AtomicActionTest {
assertEquals(aa1.getClazz(), aa2.getClazz());
assertEquals(aa1.getPayload(), aa2.getPayload());
}
}

View File

@ -1,14 +1,14 @@
package eu.dnetlib.dhp.schema.common;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertTrue;
import eu.dnetlib.dhp.schema.oaf.OafEntity;
import eu.dnetlib.dhp.schema.oaf.Relation;
import eu.dnetlib.dhp.schema.oaf.Result;
import org.junit.jupiter.api.Nested;
import org.junit.jupiter.api.Test;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertTrue;
public class ModelSupportTest {
@Nested

View File

@ -1,11 +1,11 @@
package eu.dnetlib.dhp.schema.oaf;
import static org.junit.jupiter.api.Assertions.*;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import java.util.Arrays;
import java.util.List;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
public class MergeTest {
@ -19,7 +19,7 @@ public class MergeTest {
@Test
public void mergeListsTest() {
//string list merge test
// string list merge test
List<String> a = Arrays.asList("a", "b", "c", "e");
List<String> b = Arrays.asList("a", "b", "c", "d");
List<String> c = null;
@ -44,7 +44,6 @@ public class MergeTest {
assertNotNull(a.getCollectedfrom());
assertEquals(3, a.getCollectedfrom().size());
}
@Test
@ -60,7 +59,6 @@ public class MergeTest {
assertNotNull(a.getSubject());
assertEquals(3, a.getSubject().size());
}
private KeyValue setKV(final String key, final String value) {
@ -73,7 +71,8 @@ public class MergeTest {
return k;
}
private StructuredProperty setSP(final String value, final String schema, final String classname) {
private StructuredProperty setSP(
final String value, final String schema, final String classname) {
StructuredProperty s = new StructuredProperty();
s.setValue(value);
Qualifier q = new Qualifier();

View File

@ -6,48 +6,44 @@ import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.SerializationFeature;
import eu.dnetlib.dhp.schema.oaf.Qualifier;
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
import org.junit.jupiter.api.Test;
import java.io.IOException;
import java.util.Arrays;
import java.util.Collections;
import org.junit.jupiter.api.Test;
public class DLItest {
@Test
public void testMergePublication() throws JsonProcessingException {
DLIPublication a1 = new DLIPublication();
a1.setPid(Arrays.asList( createSP("123456","pdb","dnet:pid_types")));
a1.setPid(Arrays.asList(createSP("123456", "pdb", "dnet:pid_types")));
a1.setTitle(Collections.singletonList(createSP("Un Titolo", "title", "dnetTitle")));
a1.setDlicollectedfrom(Arrays.asList(createCollectedFrom("znd","Zenodo","complete")));
a1.setDlicollectedfrom(Arrays.asList(createCollectedFrom("znd", "Zenodo", "complete")));
a1.setCompletionStatus("complete");
DLIPublication a = new DLIPublication();
a.setPid(Arrays.asList(createSP("10.11","doi","dnet:pid_types"), createSP("123456","pdb","dnet:pid_types")));
a.setPid(
Arrays.asList(
createSP("10.11", "doi", "dnet:pid_types"),
createSP("123456", "pdb", "dnet:pid_types")));
a.setTitle(Collections.singletonList(createSP("A Title", "title", "dnetTitle")));
a.setDlicollectedfrom(Arrays.asList(createCollectedFrom("dct","datacite","complete"),createCollectedFrom("dct","datacite","incomplete")));
a.setDlicollectedfrom(
Arrays.asList(
createCollectedFrom("dct", "datacite", "complete"),
createCollectedFrom("dct", "datacite", "incomplete")));
a.setCompletionStatus("incomplete");
a.mergeFrom(a1);
ObjectMapper mapper = new ObjectMapper();
System.out.println(mapper.writeValueAsString(a));
}
@Test
public void testDeserialization() throws IOException {
final String json ="{\"dataInfo\":{\"invisible\":false,\"inferred\":null,\"deletedbyinference\":false,\"trust\":\"0.9\",\"inferenceprovenance\":null,\"provenanceaction\":null},\"lastupdatetimestamp\":null,\"id\":\"60|bd9352547098929a394655ad1a44a479\",\"originalId\":[\"bd9352547098929a394655ad1a44a479\"],\"collectedfrom\":[{\"key\":\"dli_________::datacite\",\"value\":\"Datasets in Datacite\",\"dataInfo\":null,\"blank\":false}],\"pid\":[{\"value\":\"10.7925/DRS1.DUCHAS_5078760\",\"qualifier\":{\"classid\":\"doi\",\"classname\":\"doi\",\"schemeid\":\"dnet:pid_types\",\"schemename\":\"dnet:pid_types\",\"blank\":false},\"dataInfo\":null}],\"dateofcollection\":\"2020-01-09T08:29:31.885Z\",\"dateoftransformation\":null,\"extraInfo\":null,\"oaiprovenance\":null,\"author\":[{\"fullname\":\"Cathail, S. Ó\",\"name\":null,\"surname\":null,\"rank\":null,\"pid\":null,\"affiliation\":null},{\"fullname\":\"Donnell, Breda Mc\",\"name\":null,\"surname\":null,\"rank\":null,\"pid\":null,\"affiliation\":null},{\"fullname\":\"Ireland. Department of Arts, Culture, and the Gaeltacht\",\"name\":null,\"surname\":null,\"rank\":null,\"pid\":null,\"affiliation\":null},{\"fullname\":\"University College Dublin\",\"name\":null,\"surname\":null,\"rank\":null,\"pid\":null,\"affiliation\":null},{\"fullname\":\"National Folklore Foundation\",\"name\":null,\"surname\":null,\"rank\":null,\"pid\":null,\"affiliation\":null},{\"fullname\":\"Cathail, S. Ó\",\"name\":null,\"surname\":null,\"rank\":null,\"pid\":null,\"affiliation\":null},{\"fullname\":\"Donnell, Breda Mc\",\"name\":null,\"surname\":null,\"rank\":null,\"pid\":null,\"affiliation\":null}],\"resulttype\":null,\"language\":null,\"country\":null,\"subject\":[{\"value\":\"Recreation\",\"qualifier\":{\"classid\":\"dnet:subject\",\"classname\":\"dnet:subject\",\"schemeid\":\"unknown\",\"schemename\":\"unknown\",\"blank\":false},\"dataInfo\":null},{\"value\":\"Entertainments and recreational activities\",\"qualifier\":{\"classid\":\"dnet:subject\",\"classname\":\"dnet:subject\",\"schemeid\":\"unknown\",\"schemename\":\"unknown\",\"blank\":false},\"dataInfo\":null},{\"value\":\"Siamsaíocht agus caitheamh aimsire\",\"qualifier\":{\"classid\":\"dnet:subject\",\"classname\":\"dnet:subject\",\"schemeid\":\"unknown\",\"schemename\":\"unknown\",\"blank\":false},\"dataInfo\":null}],\"title\":[{\"value\":\"Games We Play\",\"qualifier\":null,\"dataInfo\":null}],\"relevantdate\":[{\"value\":\"1938-09-28\",\"qualifier\":{\"classid\":\"date\",\"classname\":\"date\",\"schemeid\":\"dnet::date\",\"schemename\":\"dnet::date\",\"blank\":false},\"dataInfo\":null}],\"description\":[{\"value\":\"Story collected by Breda Mc Donnell, a student at Tenure school (Tinure, Co. Louth) (no informant identified).\",\"dataInfo\":null}],\"dateofacceptance\":null,\"publisher\":{\"value\":\"University College Dublin\",\"dataInfo\":null},\"embargoenddate\":null,\"source\":null,\"fulltext\":null,\"format\":null,\"contributor\":null,\"resourcetype\":null,\"coverage\":null,\"refereed\":null,\"context\":null,\"processingchargeamount\":null,\"processingchargecurrency\":null,\"externalReference\":null,\"instance\":[],\"storagedate\":null,\"device\":null,\"size\":null,\"version\":null,\"lastmetadataupdate\":null,\"metadataversionnumber\":null,\"geolocation\":null,\"dlicollectedfrom\":[{\"id\":\"dli_________::datacite\",\"name\":\"Datasets in Datacite\",\"completionStatus\":\"complete\",\"collectionMode\":\"resolved\"}],\"completionStatus\":\"complete\"}";
final String json =
"{\"dataInfo\":{\"invisible\":false,\"inferred\":null,\"deletedbyinference\":false,\"trust\":\"0.9\",\"inferenceprovenance\":null,\"provenanceaction\":null},\"lastupdatetimestamp\":null,\"id\":\"60|bd9352547098929a394655ad1a44a479\",\"originalId\":[\"bd9352547098929a394655ad1a44a479\"],\"collectedfrom\":[{\"key\":\"dli_________::datacite\",\"value\":\"Datasets in Datacite\",\"dataInfo\":null,\"blank\":false}],\"pid\":[{\"value\":\"10.7925/DRS1.DUCHAS_5078760\",\"qualifier\":{\"classid\":\"doi\",\"classname\":\"doi\",\"schemeid\":\"dnet:pid_types\",\"schemename\":\"dnet:pid_types\",\"blank\":false},\"dataInfo\":null}],\"dateofcollection\":\"2020-01-09T08:29:31.885Z\",\"dateoftransformation\":null,\"extraInfo\":null,\"oaiprovenance\":null,\"author\":[{\"fullname\":\"Cathail, S. Ó\",\"name\":null,\"surname\":null,\"rank\":null,\"pid\":null,\"affiliation\":null},{\"fullname\":\"Donnell, Breda Mc\",\"name\":null,\"surname\":null,\"rank\":null,\"pid\":null,\"affiliation\":null},{\"fullname\":\"Ireland. Department of Arts, Culture, and the Gaeltacht\",\"name\":null,\"surname\":null,\"rank\":null,\"pid\":null,\"affiliation\":null},{\"fullname\":\"University College Dublin\",\"name\":null,\"surname\":null,\"rank\":null,\"pid\":null,\"affiliation\":null},{\"fullname\":\"National Folklore Foundation\",\"name\":null,\"surname\":null,\"rank\":null,\"pid\":null,\"affiliation\":null},{\"fullname\":\"Cathail, S. Ó\",\"name\":null,\"surname\":null,\"rank\":null,\"pid\":null,\"affiliation\":null},{\"fullname\":\"Donnell, Breda Mc\",\"name\":null,\"surname\":null,\"rank\":null,\"pid\":null,\"affiliation\":null}],\"resulttype\":null,\"language\":null,\"country\":null,\"subject\":[{\"value\":\"Recreation\",\"qualifier\":{\"classid\":\"dnet:subject\",\"classname\":\"dnet:subject\",\"schemeid\":\"unknown\",\"schemename\":\"unknown\",\"blank\":false},\"dataInfo\":null},{\"value\":\"Entertainments and recreational activities\",\"qualifier\":{\"classid\":\"dnet:subject\",\"classname\":\"dnet:subject\",\"schemeid\":\"unknown\",\"schemename\":\"unknown\",\"blank\":false},\"dataInfo\":null},{\"value\":\"Siamsaíocht agus caitheamh aimsire\",\"qualifier\":{\"classid\":\"dnet:subject\",\"classname\":\"dnet:subject\",\"schemeid\":\"unknown\",\"schemename\":\"unknown\",\"blank\":false},\"dataInfo\":null}],\"title\":[{\"value\":\"Games We Play\",\"qualifier\":null,\"dataInfo\":null}],\"relevantdate\":[{\"value\":\"1938-09-28\",\"qualifier\":{\"classid\":\"date\",\"classname\":\"date\",\"schemeid\":\"dnet::date\",\"schemename\":\"dnet::date\",\"blank\":false},\"dataInfo\":null}],\"description\":[{\"value\":\"Story collected by Breda Mc Donnell, a student at Tenure school (Tinure, Co. Louth) (no informant identified).\",\"dataInfo\":null}],\"dateofacceptance\":null,\"publisher\":{\"value\":\"University College Dublin\",\"dataInfo\":null},\"embargoenddate\":null,\"source\":null,\"fulltext\":null,\"format\":null,\"contributor\":null,\"resourcetype\":null,\"coverage\":null,\"refereed\":null,\"context\":null,\"processingchargeamount\":null,\"processingchargecurrency\":null,\"externalReference\":null,\"instance\":[],\"storagedate\":null,\"device\":null,\"size\":null,\"version\":null,\"lastmetadataupdate\":null,\"metadataversionnumber\":null,\"geolocation\":null,\"dlicollectedfrom\":[{\"id\":\"dli_________::datacite\",\"name\":\"Datasets in Datacite\",\"completionStatus\":\"complete\",\"collectionMode\":\"resolved\"}],\"completionStatus\":\"complete\"}";
ObjectMapper mapper = new ObjectMapper();
mapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false);
@ -56,7 +52,8 @@ public class DLItest {
System.out.println(mapper.writeValueAsString(dliDataset));
}
private ProvenaceInfo createCollectedFrom(final String id, final String name, final String completionStatus) {
private ProvenaceInfo createCollectedFrom(
final String id, final String name, final String completionStatus) {
ProvenaceInfo p = new ProvenaceInfo();
p.setId(id);
p.setName(name);
@ -64,8 +61,8 @@ public class DLItest {
return p;
}
private StructuredProperty createSP(final String value, final String className, final String schemeName) {
private StructuredProperty createSP(
final String value, final String className, final String schemeName) {
StructuredProperty p = new StructuredProperty();
p.setValue(value);
Qualifier schema = new Qualifier();
@ -76,6 +73,4 @@ public class DLItest {
p.setQualifier(schema);
return p;
}
}

View File

@ -10,22 +10,22 @@ import eu.dnetlib.dhp.actionmanager.partition.PartitionActionSetsByPayloadTypeJo
import eu.dnetlib.dhp.utils.ISLookupClientFactory;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
import org.dom4j.Document;
import org.dom4j.Element;
import org.dom4j.io.SAXReader;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.Serializable;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.List;
import java.util.NoSuchElementException;
import java.util.stream.Collectors;
import org.dom4j.Document;
import org.dom4j.Element;
import org.dom4j.io.SAXReader;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class ISClient implements Serializable {
private static final Logger log = LoggerFactory.getLogger(PartitionActionSetsByPayloadTypeJob.class);
private static final Logger log =
LoggerFactory.getLogger(PartitionActionSetsByPayloadTypeJob.class);
private static final String INPUT_ACTION_SET_ID_SEPARATOR = ",";
@ -37,7 +37,9 @@ public class ISClient implements Serializable {
public List<String> getLatestRawsetPaths(String setIds) {
List<String> ids = Lists.newArrayList(Splitter.on(INPUT_ACTION_SET_ID_SEPARATOR)
List<String> ids =
Lists.newArrayList(
Splitter.on(INPUT_ACTION_SET_ID_SEPARATOR)
.omitEmptyStrings()
.trimResults()
.split(setIds));
@ -50,8 +52,11 @@ public class ISClient implements Serializable {
private ActionManagerSet getSet(ISLookUpService isLookup, final String setId) {
final String q = "for $x in collection('/db/DRIVER/ActionManagerSetDSResources/ActionManagerSetDSResourceType') "
+ "where $x//SET/@id = '" + setId + "' return $x";
final String q =
"for $x in collection('/db/DRIVER/ActionManagerSetDSResources/ActionManagerSetDSResourceType') "
+ "where $x//SET/@id = '"
+ setId
+ "' return $x";
try {
final String basePath = getBasePathHDFS(isLookup);
@ -62,7 +67,8 @@ public class ISClient implements Serializable {
}
}
private ActionManagerSet getActionManagerSet(final String basePath, final String profile) throws ActionManagerException {
private ActionManagerSet getActionManagerSet(final String basePath, final String profile)
throws ActionManagerException {
final SAXReader reader = new SAXReader();
final ActionManagerSet set = new ActionManagerSet();
@ -72,13 +78,19 @@ public class ISClient implements Serializable {
set.setId(doc.valueOf("//SET/@id").trim());
set.setName(doc.valueOf("//SET").trim());
set.setImpact(ImpactTypes.valueOf(doc.valueOf("//IMPACT").trim()));
set.setLatest(doc.valueOf("//RAW_SETS/LATEST/@id"), doc.valueOf("//RAW_SETS/LATEST/@creationDate"), doc.valueOf("//RAW_SETS/LATEST/@lastUpdate"));
set.setLatest(
doc.valueOf("//RAW_SETS/LATEST/@id"),
doc.valueOf("//RAW_SETS/LATEST/@creationDate"),
doc.valueOf("//RAW_SETS/LATEST/@lastUpdate"));
set.setDirectory(doc.valueOf("//SET/@directory"));
final List expiredNodes = doc.selectNodes("//RAW_SETS/EXPIRED");
if (expiredNodes != null) {
for (int i = 0; i < expiredNodes.size(); i++) {
Element ex = (Element) expiredNodes.get(i);
set.addExpired(ex.attributeValue("id"), ex.attributeValue("creationDate"), ex.attributeValue("lastUpdate"));
set.addExpired(
ex.attributeValue("id"),
ex.attributeValue("creationDate"),
ex.attributeValue("lastUpdate"));
}
}
@ -100,9 +112,12 @@ public class ISClient implements Serializable {
return queryServiceProperty(isLookup, "basePath");
}
private String queryServiceProperty(ISLookUpService isLookup, final String propertyName) throws ActionManagerException {
final String q = "for $x in /RESOURCE_PROFILE[.//RESOURCE_TYPE/@value='ActionManagerServiceResourceType'] return $x//SERVICE_PROPERTIES/PROPERTY[./@ key='"
+ propertyName + "']/@value/string()";
private String queryServiceProperty(ISLookUpService isLookup, final String propertyName)
throws ActionManagerException {
final String q =
"for $x in /RESOURCE_PROFILE[.//RESOURCE_TYPE/@value='ActionManagerServiceResourceType'] return $x//SERVICE_PROPERTIES/PROPERTY[./@ key='"
+ propertyName
+ "']/@value/string()";
log.debug("quering for service property: " + q);
try {
final List<String> value = isLookup.quickSearchProfile(q);
@ -121,6 +136,4 @@ public class ISClient implements Serializable {
throw new ActionManagerException(msg, e);
}
}
}

View File

@ -1,9 +1,15 @@
package eu.dnetlib.dhp.actionmanager.partition;
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
import static org.apache.spark.sql.functions.*;
import eu.dnetlib.dhp.actionmanager.ISClient;
import eu.dnetlib.dhp.common.HdfsSupport;
import eu.dnetlib.dhp.actionmanager.promote.PromoteActionPayloadForGraphTableJob;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.common.HdfsSupport;
import java.util.Arrays;
import java.util.List;
import java.util.Optional;
import org.apache.commons.io.IOUtils;
import org.apache.hadoop.io.Text;
import org.apache.spark.SparkConf;
@ -14,32 +20,27 @@ import org.apache.spark.sql.types.*;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.Optional;
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
import static org.apache.spark.sql.functions.*;
/**
* Partitions given set of action sets by payload type.
*/
/** Partitions given set of action sets by payload type. */
public class PartitionActionSetsByPayloadTypeJob {
private static final Logger logger = LoggerFactory.getLogger(PartitionActionSetsByPayloadTypeJob.class);
private static final Logger logger =
LoggerFactory.getLogger(PartitionActionSetsByPayloadTypeJob.class);
private static final StructType KV_SCHEMA = StructType$.MODULE$.apply(
private static final StructType KV_SCHEMA =
StructType$.MODULE$.apply(
Arrays.asList(
StructField$.MODULE$.apply("key", DataTypes.StringType, false, Metadata.empty()),
StructField$.MODULE$.apply("value", DataTypes.StringType, false, Metadata.empty())
));
StructField$.MODULE$.apply(
"key", DataTypes.StringType, false, Metadata.empty()),
StructField$.MODULE$.apply(
"value", DataTypes.StringType, false, Metadata.empty())));
private static final StructType ATOMIC_ACTION_SCHEMA = StructType$.MODULE$.apply(
private static final StructType ATOMIC_ACTION_SCHEMA =
StructType$.MODULE$.apply(
Arrays.asList(
StructField$.MODULE$.apply("clazz", DataTypes.StringType, false, Metadata.empty()),
StructField$.MODULE$.apply("payload", DataTypes.StringType, false, Metadata.empty())
));
StructField$.MODULE$.apply(
"clazz", DataTypes.StringType, false, Metadata.empty()),
StructField$.MODULE$.apply(
"payload", DataTypes.StringType, false, Metadata.empty())));
private ISClient isClient;
@ -47,18 +48,18 @@ public class PartitionActionSetsByPayloadTypeJob {
this.isClient = new ISClient(isLookupUrl);
}
public PartitionActionSetsByPayloadTypeJob() {
}
public PartitionActionSetsByPayloadTypeJob() {}
public static void main(String[] args) throws Exception {
String jsonConfiguration = IOUtils.toString(
PromoteActionPayloadForGraphTableJob.class
.getResourceAsStream("/eu/dnetlib/dhp/actionmanager/partition/partition_action_sets_by_payload_type_input_parameters.json"));
String jsonConfiguration =
IOUtils.toString(
PromoteActionPayloadForGraphTableJob.class.getResourceAsStream(
"/eu/dnetlib/dhp/actionmanager/partition/partition_action_sets_by_payload_type_input_parameters.json"));
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
parser.parseArgument(args);
Boolean isSparkSessionManaged = Optional
.ofNullable(parser.get("isSparkSessionManaged"))
Boolean isSparkSessionManaged =
Optional.ofNullable(parser.get("isSparkSessionManaged"))
.map(Boolean::valueOf)
.orElse(Boolean.TRUE);
logger.info("isSparkSessionManaged: {}", isSparkSessionManaged);
@ -72,7 +73,8 @@ public class PartitionActionSetsByPayloadTypeJob {
String isLookupUrl = parser.get("isLookupUrl");
logger.info("isLookupUrl: {}", isLookupUrl);
new PartitionActionSetsByPayloadTypeJob(isLookupUrl).run(isSparkSessionManaged, inputActionSetIds, outputPath);
new PartitionActionSetsByPayloadTypeJob(isLookupUrl)
.run(isSparkSessionManaged, inputActionSetIds, outputPath);
}
protected void run(Boolean isSparkSessionManaged, String inputActionSetIds, String outputPath) {
@ -83,36 +85,41 @@ public class PartitionActionSetsByPayloadTypeJob {
SparkConf conf = new SparkConf();
conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer");
runWithSparkSession(conf, isSparkSessionManaged,
runWithSparkSession(
conf,
isSparkSessionManaged,
spark -> {
removeOutputDir(spark, outputPath);
readAndWriteActionSetsFromPaths(spark, inputActionSetPaths, outputPath);
});
}
private static void removeOutputDir(SparkSession spark,
String path) {
private static void removeOutputDir(SparkSession spark, String path) {
HdfsSupport.remove(path, spark.sparkContext().hadoopConfiguration());
}
private static void readAndWriteActionSetsFromPaths(SparkSession spark,
List<String> inputActionSetPaths,
String outputPath) {
inputActionSetPaths
.forEach(inputActionSetPath -> {
Dataset<Row> actionDS = readActionSetFromPath(spark, inputActionSetPath);
private static void readAndWriteActionSetsFromPaths(
SparkSession spark, List<String> inputActionSetPaths, String outputPath) {
inputActionSetPaths.stream()
.filter(
path ->
HdfsSupport.exists(
path, spark.sparkContext().hadoopConfiguration()))
.forEach(
inputActionSetPath -> {
Dataset<Row> actionDS =
readActionSetFromPath(spark, inputActionSetPath);
saveActions(actionDS, outputPath);
});
}
private static Dataset<Row> readActionSetFromPath(SparkSession spark,
String path) {
private static Dataset<Row> readActionSetFromPath(SparkSession spark, String path) {
logger.info("Reading actions from path: {}", path);
JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
JavaRDD<Row> rdd = sc
.sequenceFile(path, Text.class, Text.class)
JavaRDD<Row> rdd =
sc.sequenceFile(path, Text.class, Text.class)
.map(x -> RowFactory.create(x._1().toString(), x._2().toString()));
return spark.createDataFrame(rdd, KV_SCHEMA)
@ -120,14 +127,9 @@ public class PartitionActionSetsByPayloadTypeJob {
.select(expr("atomic_action.*"));
}
private static void saveActions(Dataset<Row> actionDS,
String path) {
private static void saveActions(Dataset<Row> actionDS, String path) {
logger.info("Saving actions to path: {}", path);
actionDS
.write()
.partitionBy("clazz")
.mode(SaveMode.Append)
.parquet(path);
actionDS.write().partitionBy("clazz").mode(SaveMode.Append).parquet(path);
}
public ISClient getIsClient() {

View File

@ -1,30 +1,27 @@
package eu.dnetlib.dhp.actionmanager.promote;
import static eu.dnetlib.dhp.schema.common.ModelSupport.isSubClass;
import eu.dnetlib.dhp.common.FunctionalInterfaceSupport.SerializableSupplier;
import eu.dnetlib.dhp.schema.oaf.Oaf;
import eu.dnetlib.dhp.schema.oaf.OafEntity;
import eu.dnetlib.dhp.schema.oaf.Relation;
import java.util.function.BiFunction;
import static eu.dnetlib.dhp.schema.common.ModelSupport.isSubClass;
/**
* OAF model merging support.
*/
/** OAF model merging support. */
public class MergeAndGet {
private MergeAndGet() {
}
private MergeAndGet() {}
/**
* Strategy for merging OAF model objects.
* <p>
* MERGE_FROM_AND_GET: use OAF 'mergeFrom' method
* SELECT_NEWER_AND_GET: use last update timestamp to return newer instance
*
* <p>MERGE_FROM_AND_GET: use OAF 'mergeFrom' method SELECT_NEWER_AND_GET: use last update
* timestamp to return newer instance
*/
public enum Strategy {
MERGE_FROM_AND_GET, SELECT_NEWER_AND_GET
MERGE_FROM_AND_GET,
SELECT_NEWER_AND_GET
}
/**
@ -35,7 +32,8 @@ public class MergeAndGet {
* @param <A> Action payload type
* @return BiFunction to be used to merge OAF objects
*/
public static <G extends Oaf, A extends Oaf> SerializableSupplier<BiFunction<G, A, G>> functionFor(Strategy strategy) {
public static <G extends Oaf, A extends Oaf>
SerializableSupplier<BiFunction<G, A, G>> functionFor(Strategy strategy) {
switch (strategy) {
case MERGE_FROM_AND_GET:
return () -> MergeAndGet::mergeFromAndGet;
@ -49,26 +47,36 @@ public class MergeAndGet {
if (isSubClass(x, Relation.class) && isSubClass(y, Relation.class)) {
((Relation) x).mergeFrom((Relation) y);
return x;
} else if (isSubClass(x, OafEntity.class) && isSubClass(y, OafEntity.class) && isSubClass(x, y)) {
} else if (isSubClass(x, OafEntity.class)
&& isSubClass(y, OafEntity.class)
&& isSubClass(x, y)) {
((OafEntity) x).mergeFrom((OafEntity) y);
return x;
}
throw new RuntimeException(String.format("MERGE_FROM_AND_GET incompatible types: %s, %s",
throw new RuntimeException(
String.format(
"MERGE_FROM_AND_GET incompatible types: %s, %s",
x.getClass().getCanonicalName(), y.getClass().getCanonicalName()));
}
private static <G extends Oaf, A extends Oaf> G selectNewerAndGet(G x, A y) {
if (x.getClass().equals(y.getClass()) && x.getLastupdatetimestamp() > y.getLastupdatetimestamp()) {
if (x.getClass().equals(y.getClass())
&& x.getLastupdatetimestamp() > y.getLastupdatetimestamp()) {
return x;
} else if (x.getClass().equals(y.getClass()) && x.getLastupdatetimestamp() < y.getLastupdatetimestamp()) {
} else if (x.getClass().equals(y.getClass())
&& x.getLastupdatetimestamp() < y.getLastupdatetimestamp()) {
return (G) y;
} else if (isSubClass(x, y) && x.getLastupdatetimestamp() > y.getLastupdatetimestamp()) {
return x;
} else if (isSubClass(x, y) && x.getLastupdatetimestamp() < y.getLastupdatetimestamp()) {
throw new RuntimeException(String.format("SELECT_NEWER_AND_GET cannot return right type when it is not the same as left type: %s, %s",
throw new RuntimeException(
String.format(
"SELECT_NEWER_AND_GET cannot return right type when it is not the same as left type: %s, %s",
x.getClass().getCanonicalName(), y.getClass().getCanonicalName()));
}
throw new RuntimeException(String.format("SELECT_NEWER_AND_GET cannot be used when left is not subtype of right: %s, %s",
throw new RuntimeException(
String.format(
"SELECT_NEWER_AND_GET cannot be used when left is not subtype of right: %s, %s",
x.getClass().getCanonicalName(), y.getClass().getCanonicalName()));
}
}

View File

@ -1,14 +1,20 @@
package eu.dnetlib.dhp.actionmanager.promote;
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
import static eu.dnetlib.dhp.schema.common.ModelSupport.isSubClass;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.common.FunctionalInterfaceSupport.SerializableSupplier;
import eu.dnetlib.dhp.common.HdfsSupport;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.schema.common.ModelSupport;
import eu.dnetlib.dhp.schema.oaf.*;
import java.util.Objects;
import java.util.Optional;
import java.util.function.BiFunction;
import java.util.function.Function;
import org.apache.commons.io.IOUtils;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.function.FilterFunction;
import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Encoders;
@ -17,31 +23,23 @@ import org.apache.spark.sql.SparkSession;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.Objects;
import java.util.Optional;
import java.util.function.BiFunction;
import java.util.function.Function;
import static eu.dnetlib.dhp.schema.common.ModelSupport.isSubClass;
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
/**
* Applies a given action payload file to graph table of compatible type.
*/
/** Applies a given action payload file to graph table of compatible type. */
public class PromoteActionPayloadForGraphTableJob {
private static final Logger logger = LoggerFactory.getLogger(PromoteActionPayloadForGraphTableJob.class);
private static final Logger logger =
LoggerFactory.getLogger(PromoteActionPayloadForGraphTableJob.class);
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
public static void main(String[] args) throws Exception {
String jsonConfiguration = IOUtils.toString(
PromoteActionPayloadForGraphTableJob.class
.getResourceAsStream("/eu/dnetlib/dhp/actionmanager/promote/promote_action_payload_for_graph_table_input_parameters.json"));
String jsonConfiguration =
IOUtils.toString(
PromoteActionPayloadForGraphTableJob.class.getResourceAsStream(
"/eu/dnetlib/dhp/actionmanager/promote/promote_action_payload_for_graph_table_input_parameters.json"));
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
parser.parseArgument(args);
Boolean isSparkSessionManaged = Optional
.ofNullable(parser.get("isSparkSessionManaged"))
Boolean isSparkSessionManaged =
Optional.ofNullable(parser.get("isSparkSessionManaged"))
.map(Boolean::valueOf)
.orElse(Boolean.TRUE);
logger.info("isSparkSessionManaged: {}", isSparkSessionManaged);
@ -61,11 +59,13 @@ public class PromoteActionPayloadForGraphTableJob {
String outputGraphTablePath = parser.get("outputGraphTablePath");
logger.info("outputGraphTablePath: {}", outputGraphTablePath);
MergeAndGet.Strategy strategy = MergeAndGet.Strategy.valueOf(parser.get("mergeAndGetStrategy").toUpperCase());
MergeAndGet.Strategy strategy =
MergeAndGet.Strategy.valueOf(parser.get("mergeAndGetStrategy").toUpperCase());
logger.info("strategy: {}", strategy);
Class<? extends Oaf> rowClazz = (Class<? extends Oaf>) Class.forName(graphTableClassName);
Class<? extends Oaf> actionPayloadClazz = (Class<? extends Oaf>) Class.forName(actionPayloadClassName);
Class<? extends Oaf> actionPayloadClazz =
(Class<? extends Oaf>) Class.forName(actionPayloadClassName);
throwIfGraphTableClassIsNotSubClassOfActionPayloadClass(rowClazz, actionPayloadClazz);
@ -73,10 +73,13 @@ public class PromoteActionPayloadForGraphTableJob {
conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer");
conf.registerKryoClasses(ModelSupport.getOafModelClasses());
runWithSparkSession(conf, isSparkSessionManaged,
runWithSparkSession(
conf,
isSparkSessionManaged,
spark -> {
removeOutputDir(spark, outputGraphTablePath);
promoteActionPayloadForGraphTable(spark,
promoteActionPayloadForGraphTable(
spark,
inputGraphTablePath,
inputActionPayloadPath,
outputGraphTablePath,
@ -86,21 +89,23 @@ public class PromoteActionPayloadForGraphTableJob {
});
}
private static void throwIfGraphTableClassIsNotSubClassOfActionPayloadClass(Class<? extends Oaf> rowClazz,
Class<? extends Oaf> actionPayloadClazz) {
private static void throwIfGraphTableClassIsNotSubClassOfActionPayloadClass(
Class<? extends Oaf> rowClazz, Class<? extends Oaf> actionPayloadClazz) {
if (!isSubClass(rowClazz, actionPayloadClazz)) {
String msg = String.format("graph table class is not a subclass of action payload class: graph=%s, action=%s",
String msg =
String.format(
"graph table class is not a subclass of action payload class: graph=%s, action=%s",
rowClazz.getCanonicalName(), actionPayloadClazz.getCanonicalName());
throw new RuntimeException(msg);
}
}
private static void removeOutputDir(SparkSession spark,
String path) {
private static void removeOutputDir(SparkSession spark, String path) {
HdfsSupport.remove(path, spark.sparkContext().hadoopConfiguration());
}
private static <G extends Oaf, A extends Oaf> void promoteActionPayloadForGraphTable(SparkSession spark,
private static <G extends Oaf, A extends Oaf> void promoteActionPayloadForGraphTable(
SparkSession spark,
String inputGraphTablePath,
String inputActionPayloadPath,
String outputGraphTablePath,
@ -108,22 +113,26 @@ public class PromoteActionPayloadForGraphTableJob {
Class<G> rowClazz,
Class<A> actionPayloadClazz) {
Dataset<G> rowDS = readGraphTable(spark, inputGraphTablePath, rowClazz);
Dataset<A> actionPayloadDS = readActionPayload(spark, inputActionPayloadPath, actionPayloadClazz);
Dataset<A> actionPayloadDS =
readActionPayload(spark, inputActionPayloadPath, actionPayloadClazz);
Dataset<G> result = promoteActionPayloadForGraphTable(rowDS, actionPayloadDS, strategy, rowClazz, actionPayloadClazz)
Dataset<G> result =
promoteActionPayloadForGraphTable(
rowDS, actionPayloadDS, strategy, rowClazz, actionPayloadClazz)
.map((MapFunction<G, G>) value -> value, Encoders.bean(rowClazz));
saveGraphTable(result, outputGraphTablePath);
}
private static <G extends Oaf> Dataset<G> readGraphTable(SparkSession spark,
String path,
Class<G> rowClazz) {
private static <G extends Oaf> Dataset<G> readGraphTable(
SparkSession spark, String path, Class<G> rowClazz) {
logger.info("Reading graph table from path: {}", path);
return spark.read()
.textFile(path)
.map((MapFunction<String, G>) value -> OBJECT_MAPPER.readValue(value, rowClazz), Encoders.bean(rowClazz));
.map(
(MapFunction<String, G>) value -> OBJECT_MAPPER.readValue(value, rowClazz),
Encoders.bean(rowClazz));
/*
return spark
@ -133,33 +142,44 @@ public class PromoteActionPayloadForGraphTableJob {
*/
}
private static <A extends Oaf> Dataset<A> readActionPayload(SparkSession spark,
String path,
Class<A> actionPayloadClazz) {
private static <A extends Oaf> Dataset<A> readActionPayload(
SparkSession spark, String path, Class<A> actionPayloadClazz) {
logger.info("Reading action payload from path: {}", path);
return spark
.read()
return spark.read()
.parquet(path)
.map((MapFunction<Row, A>) value -> OBJECT_MAPPER.readValue(value.<String>getAs("payload"),
actionPayloadClazz), Encoders.bean(actionPayloadClazz));
.map(
(MapFunction<Row, A>)
value ->
OBJECT_MAPPER.readValue(
value.<String>getAs("payload"), actionPayloadClazz),
Encoders.bean(actionPayloadClazz));
}
private static <G extends Oaf, A extends Oaf> Dataset<G> promoteActionPayloadForGraphTable(Dataset<G> rowDS,
private static <G extends Oaf, A extends Oaf> Dataset<G> promoteActionPayloadForGraphTable(
Dataset<G> rowDS,
Dataset<A> actionPayloadDS,
MergeAndGet.Strategy strategy,
Class<G> rowClazz,
Class<A> actionPayloadClazz) {
logger.info("Promoting action payload for graph table: payload={}, table={}", actionPayloadClazz.getSimpleName(), rowClazz.getSimpleName());
logger.info(
"Promoting action payload for graph table: payload={}, table={}",
actionPayloadClazz.getSimpleName(),
rowClazz.getSimpleName());
SerializableSupplier<Function<G, String>> rowIdFn = PromoteActionPayloadForGraphTableJob::idFn;
SerializableSupplier<Function<A, String>> actionPayloadIdFn = PromoteActionPayloadForGraphTableJob::idFn;
SerializableSupplier<BiFunction<G, A, G>> mergeRowWithActionPayloadAndGetFn = MergeAndGet.functionFor(strategy);
SerializableSupplier<BiFunction<G, G, G>> mergeRowsAndGetFn = MergeAndGet.functionFor(strategy);
SerializableSupplier<Function<G, String>> rowIdFn =
PromoteActionPayloadForGraphTableJob::idFn;
SerializableSupplier<Function<A, String>> actionPayloadIdFn =
PromoteActionPayloadForGraphTableJob::idFn;
SerializableSupplier<BiFunction<G, A, G>> mergeRowWithActionPayloadAndGetFn =
MergeAndGet.functionFor(strategy);
SerializableSupplier<BiFunction<G, G, G>> mergeRowsAndGetFn =
MergeAndGet.functionFor(strategy);
SerializableSupplier<G> zeroFn = zeroFn(rowClazz);
SerializableSupplier<Function<G, Boolean>> isNotZeroFn = PromoteActionPayloadForGraphTableJob::isNotZeroFnUsingIdOrSource;
SerializableSupplier<Function<G, Boolean>> isNotZeroFn =
PromoteActionPayloadForGraphTableJob::isNotZeroFnUsingIdOrSource;
Dataset<G> joinedAndMerged = PromoteActionPayloadFunctions
.joinGraphTableWithActionPayloadAndMerge(
Dataset<G> joinedAndMerged =
PromoteActionPayloadFunctions.joinGraphTableWithActionPayloadAndMerge(
rowDS,
actionPayloadDS,
rowIdFn,
@ -168,14 +188,8 @@ public class PromoteActionPayloadForGraphTableJob {
rowClazz,
actionPayloadClazz);
return PromoteActionPayloadFunctions
.groupGraphTableByIdAndMerge(
joinedAndMerged,
rowIdFn,
mergeRowsAndGetFn,
zeroFn,
isNotZeroFn,
rowClazz);
return PromoteActionPayloadFunctions.groupGraphTableByIdAndMerge(
joinedAndMerged, rowIdFn, mergeRowsAndGetFn, zeroFn, isNotZeroFn, rowClazz);
}
private static <T extends Oaf> Function<T, String> idFn() {
@ -190,19 +204,49 @@ public class PromoteActionPayloadForGraphTableJob {
private static <T extends Oaf> String idFnForRelation(T t) {
Relation r = (Relation) t;
return Optional.ofNullable(r.getSource())
.map(source -> Optional.ofNullable(r.getTarget())
.map(target -> Optional.ofNullable(r.getRelType())
.map(relType -> Optional.ofNullable(r.getSubRelType())
.map(subRelType -> Optional.ofNullable(r.getRelClass())
.map(relClass -> String.join(source, target, relType, subRelType, relClass))
.orElse(String.join(source, target, relType, subRelType))
)
.orElse(String.join(source, target, relType))
)
.orElse(String.join(source, target))
)
.orElse(source)
)
.map(
source ->
Optional.ofNullable(r.getTarget())
.map(
target ->
Optional.ofNullable(r.getRelType())
.map(
relType ->
Optional.ofNullable(
r
.getSubRelType())
.map(
subRelType ->
Optional
.ofNullable(
r
.getRelClass())
.map(
relClass ->
String
.join(
source,
target,
relType,
subRelType,
relClass))
.orElse(
String
.join(
source,
target,
relType,
subRelType)))
.orElse(
String
.join(
source,
target,
relType)))
.orElse(
String.join(
source, target)))
.orElse(source))
.orElse(null);
}
@ -242,13 +286,8 @@ public class PromoteActionPayloadForGraphTableJob {
};
}
private static <G extends Oaf> void saveGraphTable(Dataset<G> result,
String path) {
private static <G extends Oaf> void saveGraphTable(Dataset<G> result, String path) {
logger.info("Saving graph table to path: {}", path);
result
.toJSON()
.write()
.option("compression", "gzip")
.text(path);
result.toJSON().write().option("compression", "gzip").text(path);
}
}

View File

@ -1,7 +1,13 @@
package eu.dnetlib.dhp.actionmanager.promote;
import static eu.dnetlib.dhp.schema.common.ModelSupport.isSubClass;
import eu.dnetlib.dhp.common.FunctionalInterfaceSupport.SerializableSupplier;
import eu.dnetlib.dhp.schema.oaf.Oaf;
import java.util.Objects;
import java.util.Optional;
import java.util.function.BiFunction;
import java.util.function.Function;
import org.apache.spark.api.java.function.FilterFunction;
import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.sql.Dataset;
@ -11,23 +17,14 @@ import org.apache.spark.sql.TypedColumn;
import org.apache.spark.sql.expressions.Aggregator;
import scala.Tuple2;
import java.util.Objects;
import java.util.Optional;
import java.util.function.BiFunction;
import java.util.function.Function;
import static eu.dnetlib.dhp.schema.common.ModelSupport.isSubClass;
/**
* Promote action payload functions.
*/
/** Promote action payload functions. */
public class PromoteActionPayloadFunctions {
private PromoteActionPayloadFunctions() {
}
private PromoteActionPayloadFunctions() {}
/**
* Joins dataset representing graph table with dataset representing action payload using supplied functions.
* Joins dataset representing graph table with dataset representing action payload using
* supplied functions.
*
* @param rowDS Dataset representing graph table
* @param actionPayloadDS Dataset representing action payload
@ -40,7 +37,8 @@ public class PromoteActionPayloadFunctions {
* @param <A> Type of action payload instance
* @return Dataset of merged graph table rows and action payload instances
*/
public static <G extends Oaf, A extends Oaf> Dataset<G> joinGraphTableWithActionPayloadAndMerge(Dataset<G> rowDS,
public static <G extends Oaf, A extends Oaf> Dataset<G> joinGraphTableWithActionPayloadAndMerge(
Dataset<G> rowDS,
Dataset<A> actionPayloadDS,
SerializableSupplier<Function<G, String>> rowIdFn,
SerializableSupplier<Function<A, String>> actionPayloadIdFn,
@ -48,34 +46,58 @@ public class PromoteActionPayloadFunctions {
Class<G> rowClazz,
Class<A> actionPayloadClazz) {
if (!isSubClass(rowClazz, actionPayloadClazz)) {
throw new RuntimeException("action payload type must be the same or be a super type of table row type");
throw new RuntimeException(
"action payload type must be the same or be a super type of table row type");
}
Dataset<Tuple2<String, G>> rowWithIdDS = mapToTupleWithId(rowDS, rowIdFn, rowClazz);
Dataset<Tuple2<String, A>> actionPayloadWithIdDS = mapToTupleWithId(actionPayloadDS, actionPayloadIdFn, actionPayloadClazz);
Dataset<Tuple2<String, A>> actionPayloadWithIdDS =
mapToTupleWithId(actionPayloadDS, actionPayloadIdFn, actionPayloadClazz);
return rowWithIdDS
.joinWith(actionPayloadWithIdDS, rowWithIdDS.col("_1").equalTo(actionPayloadWithIdDS.col("_1")), "full_outer")
.map((MapFunction<Tuple2<Tuple2<String, G>, Tuple2<String, A>>, G>) value -> {
Optional<G> rowOpt = Optional.ofNullable(value._1()).map(Tuple2::_2);
Optional<A> actionPayloadOpt = Optional.ofNullable(value._2()).map(Tuple2::_2);
return rowOpt
.map(row -> actionPayloadOpt
.map(actionPayload -> mergeAndGetFn.get().apply(row, actionPayload))
.joinWith(
actionPayloadWithIdDS,
rowWithIdDS.col("_1").equalTo(actionPayloadWithIdDS.col("_1")),
"full_outer")
.map(
(MapFunction<Tuple2<Tuple2<String, G>, Tuple2<String, A>>, G>)
value -> {
Optional<G> rowOpt =
Optional.ofNullable(value._1()).map(Tuple2::_2);
Optional<A> actionPayloadOpt =
Optional.ofNullable(value._2()).map(Tuple2::_2);
return rowOpt.map(
row ->
actionPayloadOpt
.map(
actionPayload ->
mergeAndGetFn
.get()
.apply(
row,
actionPayload))
.orElse(row))
.orElseGet(() -> actionPayloadOpt
.filter(actionPayload -> actionPayload.getClass().equals(rowClazz))
.orElseGet(
() ->
actionPayloadOpt
.filter(
actionPayload ->
actionPayload
.getClass()
.equals(
rowClazz))
.map(rowClazz::cast)
.orElse(null));
}, Encoders.kryo(rowClazz))
},
Encoders.kryo(rowClazz))
.filter((FilterFunction<G>) Objects::nonNull);
}
private static <T extends Oaf> Dataset<Tuple2<String, T>> mapToTupleWithId(Dataset<T> ds,
SerializableSupplier<Function<T, String>> idFn,
Class<T> clazz) {
return ds
.map((MapFunction<T, Tuple2<String, T>>) value -> new Tuple2<>(idFn.get().apply(value), value),
private static <T extends Oaf> Dataset<Tuple2<String, T>> mapToTupleWithId(
Dataset<T> ds, SerializableSupplier<Function<T, String>> idFn, Class<T> clazz) {
return ds.map(
(MapFunction<T, Tuple2<String, T>>)
value -> new Tuple2<>(idFn.get().apply(value), value),
Encoders.tuple(Encoders.STRING(), Encoders.kryo(clazz)));
}
@ -91,15 +113,17 @@ public class PromoteActionPayloadFunctions {
* @param <G> Type of graph table row
* @return Dataset of aggregated graph table rows
*/
public static <G extends Oaf> Dataset<G> groupGraphTableByIdAndMerge(Dataset<G> rowDS,
public static <G extends Oaf> Dataset<G> groupGraphTableByIdAndMerge(
Dataset<G> rowDS,
SerializableSupplier<Function<G, String>> rowIdFn,
SerializableSupplier<BiFunction<G, G, G>> mergeAndGetFn,
SerializableSupplier<G> zeroFn,
SerializableSupplier<Function<G, Boolean>> isNotZeroFn,
Class<G> rowClazz) {
TypedColumn<G, G> aggregator = new TableAggregator<>(zeroFn, mergeAndGetFn, isNotZeroFn, rowClazz).toColumn();
return rowDS
.groupByKey((MapFunction<G, String>) x -> rowIdFn.get().apply(x), Encoders.STRING())
TypedColumn<G, G> aggregator =
new TableAggregator<>(zeroFn, mergeAndGetFn, isNotZeroFn, rowClazz).toColumn();
return rowDS.groupByKey(
(MapFunction<G, String>) x -> rowIdFn.get().apply(x), Encoders.STRING())
.agg(aggregator)
.map((MapFunction<Tuple2<String, G>, G>) Tuple2::_2, Encoders.kryo(rowClazz));
}
@ -115,7 +139,8 @@ public class PromoteActionPayloadFunctions {
private SerializableSupplier<Function<G, Boolean>> isNotZeroFn;
private Class<G> rowClazz;
public TableAggregator(SerializableSupplier<G> zeroFn,
public TableAggregator(
SerializableSupplier<G> zeroFn,
SerializableSupplier<BiFunction<G, G, G>> mergeAndGetFn,
SerializableSupplier<Function<G, Boolean>> isNotZeroFn,
Class<G> rowClazz) {
@ -149,7 +174,8 @@ public class PromoteActionPayloadFunctions {
} else if (!isNotZero.apply(left) && isNotZero.apply(right)) {
return right;
}
throw new RuntimeException("internal aggregation error: left and right objects are zero");
throw new RuntimeException(
"internal aggregation error: left and right objects are zero");
}
@Override

View File

@ -1,10 +1,20 @@
package eu.dnetlib.dhp.actionmanager.partition;
import static eu.dnetlib.dhp.common.ThrowingSupport.rethrowAsRuntimeException;
import static org.apache.spark.sql.functions.*;
import static org.junit.jupiter.api.Assertions.assertIterableEquals;
import static scala.collection.JavaConversions.mutableSeqAsJavaList;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.common.collect.Lists;
import eu.dnetlib.dhp.actionmanager.ISClient;
import eu.dnetlib.dhp.actionmanager.promote.PromoteActionPayloadForGraphTableJobTest;
import eu.dnetlib.dhp.schema.oaf.*;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.*;
import java.util.stream.Collectors;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
@ -25,32 +35,23 @@ import org.mockito.junit.jupiter.MockitoExtension;
import scala.Tuple2;
import scala.collection.mutable.Seq;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.*;
import java.util.stream.Collectors;
import static eu.dnetlib.dhp.common.ThrowingSupport.rethrowAsRuntimeException;
import static org.apache.spark.sql.functions.*;
import static org.junit.jupiter.api.Assertions.assertIterableEquals;
import static scala.collection.JavaConversions.mutableSeqAsJavaList;
@ExtendWith(MockitoExtension.class)
public class PartitionActionSetsByPayloadTypeJobTest {
private static final ClassLoader cl = PartitionActionSetsByPayloadTypeJobTest.class.getClassLoader();
private static final ClassLoader cl =
PartitionActionSetsByPayloadTypeJobTest.class.getClassLoader();
private static Configuration configuration;
private static SparkSession spark;
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
private static final StructType ATOMIC_ACTION_SCHEMA = StructType$.MODULE$.apply(
private static final StructType ATOMIC_ACTION_SCHEMA =
StructType$.MODULE$.apply(
Arrays.asList(
StructField$.MODULE$.apply("clazz", DataTypes.StringType, false, Metadata.empty()),
StructField$.MODULE$.apply("payload", DataTypes.StringType, false, Metadata.empty())
));
StructField$.MODULE$.apply(
"clazz", DataTypes.StringType, false, Metadata.empty()),
StructField$.MODULE$.apply(
"payload", DataTypes.StringType, false, Metadata.empty())));
@BeforeAll
public static void beforeAll() throws IOException {
@ -71,11 +72,11 @@ public class PartitionActionSetsByPayloadTypeJobTest {
@Nested
class Main {
@Mock
private ISClient isClient;
@Mock private ISClient isClient;
@Test
public void shouldPartitionActionSetsByPayloadType(@TempDir Path workingDir) throws Exception {
public void shouldPartitionActionSetsByPayloadType(@TempDir Path workingDir)
throws Exception {
// given
Path inputActionSetsBaseDir = workingDir.resolve("input").resolve("action_sets");
Path outputDir = workingDir.resolve("output");
@ -85,15 +86,16 @@ public class PartitionActionSetsByPayloadTypeJobTest {
List<String> inputActionSetsPaths = resolveInputActionSetPaths(inputActionSetsBaseDir);
// when
Mockito.when(isClient.getLatestRawsetPaths(Mockito.anyString())).thenReturn(inputActionSetsPaths);
Mockito.when(isClient.getLatestRawsetPaths(Mockito.anyString()))
.thenReturn(inputActionSetsPaths);
PartitionActionSetsByPayloadTypeJob job = new PartitionActionSetsByPayloadTypeJob();
job.setIsClient(isClient);
job.run(
Boolean.FALSE,
"", // it can be empty we're mocking the response from isClient to resolve the paths
outputDir.toString()
);
"", // it can be empty we're mocking the response from isClient to resolve the
// paths
outputDir.toString());
// then
Files.exists(outputDir);
@ -110,45 +112,61 @@ public class PartitionActionSetsByPayloadTypeJobTest {
}
}
private List<String> resolveInputActionSetPaths(Path inputActionSetsBaseDir) throws IOException {
private List<String> resolveInputActionSetPaths(Path inputActionSetsBaseDir)
throws IOException {
Path inputActionSetJsonDumpsDir = getInputActionSetJsonDumpsDir();
return Files
.list(inputActionSetJsonDumpsDir)
.map(path -> {
return Files.list(inputActionSetJsonDumpsDir)
.map(
path -> {
String inputActionSetId = path.getFileName().toString();
return inputActionSetsBaseDir.resolve(inputActionSetId).toString();
})
.collect(Collectors.toCollection(ArrayList::new));
}
private static Map<String, List<String>> createActionSets(Path inputActionSetsDir) throws IOException {
private static Map<String, List<String>> createActionSets(Path inputActionSetsDir)
throws IOException {
Path inputActionSetJsonDumpsDir = getInputActionSetJsonDumpsDir();
Map<String, List<String>> oafsByType = new HashMap<>();
Files
.list(inputActionSetJsonDumpsDir)
.forEach(inputActionSetJsonDumpFile -> {
String inputActionSetId = inputActionSetJsonDumpFile.getFileName().toString();
Files.list(inputActionSetJsonDumpsDir)
.forEach(
inputActionSetJsonDumpFile -> {
String inputActionSetId =
inputActionSetJsonDumpFile.getFileName().toString();
Path inputActionSetDir = inputActionSetsDir.resolve(inputActionSetId);
Dataset<String> actionDS = readActionsFromJsonDump(inputActionSetJsonDumpFile.toString())
Dataset<String> actionDS =
readActionsFromJsonDump(inputActionSetJsonDumpFile.toString())
.cache();
writeActionsAsJobInput(actionDS, inputActionSetId, inputActionSetDir.toString());
writeActionsAsJobInput(
actionDS, inputActionSetId, inputActionSetDir.toString());
Map<String, List<String>> actionSetOafsByType = actionDS
.withColumn("atomic_action", from_json(col("value"), ATOMIC_ACTION_SCHEMA))
.select(expr("atomic_action.*"))
.groupBy(col("clazz"))
Map<String, List<String>> actionSetOafsByType =
actionDS
.withColumn(
"atomic_action",
from_json(col("value"), ATOMIC_ACTION_SCHEMA))
.select(expr("atomic_action.*")).groupBy(col("clazz"))
.agg(collect_list(col("payload")).as("payload_list"))
.collectAsList()
.stream()
.map(row -> new AbstractMap.SimpleEntry<>(row.<String>getAs("clazz"),
mutableSeqAsJavaList(row.<Seq<String>>getAs("payload_list"))))
.collect(Collectors.toMap(AbstractMap.SimpleEntry::getKey, AbstractMap.SimpleEntry::getValue));
.collectAsList().stream()
.map(
row ->
new AbstractMap.SimpleEntry<>(
row.<String>getAs("clazz"),
mutableSeqAsJavaList(
row.<Seq<String>>getAs(
"payload_list"))))
.collect(
Collectors.toMap(
AbstractMap.SimpleEntry::getKey,
AbstractMap.SimpleEntry::getValue));
actionSetOafsByType.keySet()
.forEach(x -> {
actionSetOafsByType
.keySet()
.forEach(
x -> {
if (oafsByType.containsKey(x)) {
List<String> collected = new ArrayList<>();
collected.addAll(oafsByType.get(x));
@ -164,38 +182,40 @@ public class PartitionActionSetsByPayloadTypeJobTest {
}
private static Path getInputActionSetJsonDumpsDir() {
return Paths
.get(Objects.requireNonNull(cl.getResource("eu/dnetlib/dhp/actionmanager/partition/input/"))
return Paths.get(
Objects.requireNonNull(
cl.getResource("eu/dnetlib/dhp/actionmanager/partition/input/"))
.getFile());
}
private static Dataset<String> readActionsFromJsonDump(String path) {
return spark
.read()
.textFile(path);
return spark.read().textFile(path);
}
private static void writeActionsAsJobInput(Dataset<String> actionDS,
String inputActionSetId,
String path) {
actionDS
.javaRDD()
private static void writeActionsAsJobInput(
Dataset<String> actionDS, String inputActionSetId, String path) {
actionDS.javaRDD()
.mapToPair(json -> new Tuple2<>(new Text(inputActionSetId), new Text(json)))
.saveAsNewAPIHadoopFile(path,
.saveAsNewAPIHadoopFile(
path,
Text.class,
Text.class,
SequenceFileOutputFormat.class,
configuration);
}
private static <T extends Oaf> void assertForOafType(Path outputDir, Map<String, List<String>> oafsByClassName, Class<T> clazz) {
Path outputDatasetDir = outputDir.resolve(String.format("clazz=%s", clazz.getCanonicalName()));
private static <T extends Oaf> void assertForOafType(
Path outputDir, Map<String, List<String>> oafsByClassName, Class<T> clazz) {
Path outputDatasetDir =
outputDir.resolve(String.format("clazz=%s", clazz.getCanonicalName()));
Files.exists(outputDatasetDir);
List<T> actuals = readActionPayloadFromJobOutput(outputDatasetDir.toString(), clazz).collectAsList();
List<T> actuals =
readActionPayloadFromJobOutput(outputDatasetDir.toString(), clazz).collectAsList();
actuals.sort(Comparator.comparingInt(Object::hashCode));
List<T> expecteds = oafsByClassName.get(clazz.getCanonicalName()).stream()
List<T> expecteds =
oafsByClassName.get(clazz.getCanonicalName()).stream()
.map(json -> mapToOaf(json, clazz))
.sorted(Comparator.comparingInt(Object::hashCode))
.collect(Collectors.toList());
@ -203,19 +223,23 @@ public class PartitionActionSetsByPayloadTypeJobTest {
assertIterableEquals(expecteds, actuals);
}
private static <T extends Oaf> Dataset<T> readActionPayloadFromJobOutput(String path,
Class<T> clazz) {
return spark
.read()
private static <T extends Oaf> Dataset<T> readActionPayloadFromJobOutput(
String path, Class<T> clazz) {
return spark.read()
.parquet(path)
.map((MapFunction<Row, T>) value -> OBJECT_MAPPER.readValue(value.<String>getAs("payload"), clazz),
.map(
(MapFunction<Row, T>)
value ->
OBJECT_MAPPER.readValue(
value.<String>getAs("payload"), clazz),
Encoders.bean(clazz));
}
private static <T extends Oaf> T mapToOaf(String json, Class<T> clazz) {
return rethrowAsRuntimeException(
() -> OBJECT_MAPPER.readValue(json, clazz),
String.format("failed to map json to class: json=%s, class=%s", json, clazz.getCanonicalName())
);
String.format(
"failed to map json to class: json=%s, class=%s",
json, clazz.getCanonicalName()));
}
}

View File

@ -1,17 +1,16 @@
package eu.dnetlib.dhp.actionmanager.promote;
import eu.dnetlib.dhp.common.FunctionalInterfaceSupport.SerializableSupplier;
import eu.dnetlib.dhp.schema.oaf.*;
import org.junit.jupiter.api.Nested;
import org.junit.jupiter.api.Test;
import java.util.function.BiFunction;
import static eu.dnetlib.dhp.actionmanager.promote.MergeAndGet.Strategy;
import static eu.dnetlib.dhp.actionmanager.promote.MergeAndGet.functionFor;
import static org.junit.jupiter.api.Assertions.*;
import static org.mockito.Mockito.*;
import eu.dnetlib.dhp.common.FunctionalInterfaceSupport.SerializableSupplier;
import eu.dnetlib.dhp.schema.oaf.*;
import java.util.function.BiFunction;
import org.junit.jupiter.api.Nested;
import org.junit.jupiter.api.Test;
public class MergeAndGetTest {
@Nested
@ -24,11 +23,11 @@ public class MergeAndGetTest {
Oaf b = mock(Oaf.class);
// when
SerializableSupplier<BiFunction<Oaf, Oaf, Oaf>> fn = functionFor(Strategy.MERGE_FROM_AND_GET);
SerializableSupplier<BiFunction<Oaf, Oaf, Oaf>> fn =
functionFor(Strategy.MERGE_FROM_AND_GET);
// then
assertThrows(RuntimeException.class, () ->
fn.get().apply(a, b));
assertThrows(RuntimeException.class, () -> fn.get().apply(a, b));
}
@Test
@ -38,11 +37,11 @@ public class MergeAndGetTest {
Relation b = mock(Relation.class);
// when
SerializableSupplier<BiFunction<Oaf, Oaf, Oaf>> fn = functionFor(Strategy.MERGE_FROM_AND_GET);
SerializableSupplier<BiFunction<Oaf, Oaf, Oaf>> fn =
functionFor(Strategy.MERGE_FROM_AND_GET);
// then
assertThrows(RuntimeException.class, () ->
fn.get().apply(a, b));
assertThrows(RuntimeException.class, () -> fn.get().apply(a, b));
}
@Test
@ -52,11 +51,11 @@ public class MergeAndGetTest {
OafEntity b = mock(OafEntity.class);
// when
SerializableSupplier<BiFunction<Oaf, Oaf, Oaf>> fn = functionFor(Strategy.MERGE_FROM_AND_GET);
SerializableSupplier<BiFunction<Oaf, Oaf, Oaf>> fn =
functionFor(Strategy.MERGE_FROM_AND_GET);
// then
assertThrows(RuntimeException.class, () ->
fn.get().apply(a, b));
assertThrows(RuntimeException.class, () -> fn.get().apply(a, b));
}
@Test
@ -66,11 +65,11 @@ public class MergeAndGetTest {
Oaf b = mock(Oaf.class);
// when
SerializableSupplier<BiFunction<Oaf, Oaf, Oaf>> fn = functionFor(Strategy.MERGE_FROM_AND_GET);
SerializableSupplier<BiFunction<Oaf, Oaf, Oaf>> fn =
functionFor(Strategy.MERGE_FROM_AND_GET);
// then
assertThrows(RuntimeException.class, () ->
fn.get().apply(a, b));
assertThrows(RuntimeException.class, () -> fn.get().apply(a, b));
}
@Test
@ -80,11 +79,11 @@ public class MergeAndGetTest {
OafEntity b = mock(OafEntity.class);
// when
SerializableSupplier<BiFunction<Oaf, Oaf, Oaf>> fn = functionFor(Strategy.MERGE_FROM_AND_GET);
SerializableSupplier<BiFunction<Oaf, Oaf, Oaf>> fn =
functionFor(Strategy.MERGE_FROM_AND_GET);
// then
assertThrows(RuntimeException.class, () ->
fn.get().apply(a, b));
assertThrows(RuntimeException.class, () -> fn.get().apply(a, b));
}
@Test
@ -94,7 +93,8 @@ public class MergeAndGetTest {
Relation b = mock(Relation.class);
// when
SerializableSupplier<BiFunction<Oaf, Oaf, Oaf>> fn = functionFor(Strategy.MERGE_FROM_AND_GET);
SerializableSupplier<BiFunction<Oaf, Oaf, Oaf>> fn =
functionFor(Strategy.MERGE_FROM_AND_GET);
// then
Oaf x = fn.get().apply(a, b);
@ -110,11 +110,11 @@ public class MergeAndGetTest {
Oaf b = mock(Oaf.class);
// when
SerializableSupplier<BiFunction<Oaf, Oaf, Oaf>> fn = functionFor(Strategy.MERGE_FROM_AND_GET);
SerializableSupplier<BiFunction<Oaf, Oaf, Oaf>> fn =
functionFor(Strategy.MERGE_FROM_AND_GET);
// then
assertThrows(RuntimeException.class, () ->
fn.get().apply(a, b));
assertThrows(RuntimeException.class, () -> fn.get().apply(a, b));
}
@Test
@ -124,30 +124,28 @@ public class MergeAndGetTest {
Relation b = mock(Relation.class);
// when
SerializableSupplier<BiFunction<Oaf, Oaf, Oaf>> fn = functionFor(Strategy.MERGE_FROM_AND_GET);
SerializableSupplier<BiFunction<Oaf, Oaf, Oaf>> fn =
functionFor(Strategy.MERGE_FROM_AND_GET);
// then
assertThrows(RuntimeException.class, () ->
fn.get().apply(a, b));
assertThrows(RuntimeException.class, () -> fn.get().apply(a, b));
}
@Test
public void shouldThrowForOafEntityAndOafEntityButNotSubclasses() {
// given
class OafEntitySub1 extends OafEntity {
}
class OafEntitySub2 extends OafEntity {
}
class OafEntitySub1 extends OafEntity {}
class OafEntitySub2 extends OafEntity {}
OafEntitySub1 a = mock(OafEntitySub1.class);
OafEntitySub2 b = mock(OafEntitySub2.class);
// when
SerializableSupplier<BiFunction<Oaf, Oaf, Oaf>> fn = functionFor(Strategy.MERGE_FROM_AND_GET);
SerializableSupplier<BiFunction<Oaf, Oaf, Oaf>> fn =
functionFor(Strategy.MERGE_FROM_AND_GET);
// then
assertThrows(RuntimeException.class, () ->
fn.get().apply(a, b));
assertThrows(RuntimeException.class, () -> fn.get().apply(a, b));
}
@Test
@ -157,7 +155,8 @@ public class MergeAndGetTest {
OafEntity b = mock(OafEntity.class);
// when
SerializableSupplier<BiFunction<Oaf, Oaf, Oaf>> fn = functionFor(Strategy.MERGE_FROM_AND_GET);
SerializableSupplier<BiFunction<Oaf, Oaf, Oaf>> fn =
functionFor(Strategy.MERGE_FROM_AND_GET);
// then
Oaf x = fn.get().apply(a, b);
@ -177,11 +176,11 @@ public class MergeAndGetTest {
Relation b = mock(Relation.class);
// when
SerializableSupplier<BiFunction<Oaf, Oaf, Oaf>> fn = functionFor(Strategy.SELECT_NEWER_AND_GET);
SerializableSupplier<BiFunction<Oaf, Oaf, Oaf>> fn =
functionFor(Strategy.SELECT_NEWER_AND_GET);
// then
assertThrows(RuntimeException.class, () ->
fn.get().apply(a, b));
assertThrows(RuntimeException.class, () -> fn.get().apply(a, b));
}
@Test
@ -191,11 +190,11 @@ public class MergeAndGetTest {
OafEntity b = mock(OafEntity.class);
// when
SerializableSupplier<BiFunction<Oaf, Oaf, Oaf>> fn = functionFor(Strategy.SELECT_NEWER_AND_GET);
SerializableSupplier<BiFunction<Oaf, Oaf, Oaf>> fn =
functionFor(Strategy.SELECT_NEWER_AND_GET);
// then
assertThrows(RuntimeException.class, () ->
fn.get().apply(a, b));
assertThrows(RuntimeException.class, () -> fn.get().apply(a, b));
}
@Test
@ -205,28 +204,29 @@ public class MergeAndGetTest {
Result b = mock(Result.class);
// when
SerializableSupplier<BiFunction<Oaf, Oaf, Oaf>> fn = functionFor(Strategy.SELECT_NEWER_AND_GET);
SerializableSupplier<BiFunction<Oaf, Oaf, Oaf>> fn =
functionFor(Strategy.SELECT_NEWER_AND_GET);
// then
assertThrows(RuntimeException.class, () ->
fn.get().apply(a, b));
assertThrows(RuntimeException.class, () -> fn.get().apply(a, b));
}
@Test
public void shouldThrowWhenSuperTypeIsNewerForResultAndOafEntity() {
// given
// real types must be used because subclass-superclass resolution does not work for mocks
// real types must be used because subclass-superclass resolution does not work for
// mocks
Dataset a = new Dataset();
a.setLastupdatetimestamp(1L);
Result b = new Result();
b.setLastupdatetimestamp(2L);
// when
SerializableSupplier<BiFunction<Oaf, Oaf, Oaf>> fn = functionFor(Strategy.SELECT_NEWER_AND_GET);
SerializableSupplier<BiFunction<Oaf, Oaf, Oaf>> fn =
functionFor(Strategy.SELECT_NEWER_AND_GET);
// then
assertThrows(RuntimeException.class, () ->
fn.get().apply(a, b));
assertThrows(RuntimeException.class, () -> fn.get().apply(a, b));
}
@Test

View File

@ -1,8 +1,20 @@
package eu.dnetlib.dhp.actionmanager.promote;
import static org.junit.jupiter.api.Assertions.*;
import static org.junit.jupiter.params.provider.Arguments.arguments;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.schema.common.ModelSupport;
import eu.dnetlib.dhp.schema.oaf.*;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.Comparator;
import java.util.List;
import java.util.Objects;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.apache.commons.io.FileUtils;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.function.MapFunction;
@ -14,21 +26,9 @@ import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.Arguments;
import org.junit.jupiter.params.provider.MethodSource;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.Comparator;
import java.util.List;
import java.util.Objects;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import static org.junit.jupiter.api.Assertions.*;
import static org.junit.jupiter.params.provider.Arguments.arguments;
public class PromoteActionPayloadForGraphTableJobTest {
private static final ClassLoader cl = PromoteActionPayloadForGraphTableJobTest.class.getClassLoader();
private static final ClassLoader cl =
PromoteActionPayloadForGraphTableJobTest.class.getClassLoader();
private static SparkSession spark;
@ -52,7 +52,9 @@ public class PromoteActionPayloadForGraphTableJobTest {
@BeforeEach
public void beforeEach() throws IOException {
workingDir = Files.createTempDirectory(PromoteActionPayloadForGraphTableJobTest.class.getSimpleName());
workingDir =
Files.createTempDirectory(
PromoteActionPayloadForGraphTableJobTest.class.getSimpleName());
inputDir = workingDir.resolve("input");
inputGraphRootDir = inputDir.resolve("graph");
inputActionPayloadRootDir = inputDir.resolve("action_payload");
@ -80,35 +82,50 @@ public class PromoteActionPayloadForGraphTableJobTest {
Class<OafEntity> actionPayloadClazz = OafEntity.class;
// when
RuntimeException exception = assertThrows(RuntimeException.class, () ->
PromoteActionPayloadForGraphTableJob.main(new String[]{
RuntimeException exception =
assertThrows(
RuntimeException.class,
() ->
PromoteActionPayloadForGraphTableJob.main(
new String[] {
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-inputGraphTablePath", "",
"-graphTableClassName", rowClazz.getCanonicalName(),
"-inputActionPayloadPath", "",
"-actionPayloadClassName", actionPayloadClazz.getCanonicalName(),
"-actionPayloadClassName",
actionPayloadClazz.getCanonicalName(),
"-outputGraphTablePath", "",
"-mergeAndGetStrategy", MergeAndGet.Strategy.SELECT_NEWER_AND_GET.name()
"-mergeAndGetStrategy",
MergeAndGet.Strategy.SELECT_NEWER_AND_GET
.name()
}));
// then
String msg = String.format("graph table class is not a subclass of action payload class: graph=%s, action=%s",
String msg =
String.format(
"graph table class is not a subclass of action payload class: graph=%s, action=%s",
rowClazz.getCanonicalName(), actionPayloadClazz.getCanonicalName());
assertTrue(exception.getMessage().contains(msg));
}
@ParameterizedTest(name = "strategy: {0}, graph table: {1}, action payload: {2}")
@MethodSource("eu.dnetlib.dhp.actionmanager.promote.PromoteActionPayloadForGraphTableJobTest#promoteJobTestParams")
public void shouldPromoteActionPayloadForGraphTable(MergeAndGet.Strategy strategy,
@MethodSource(
"eu.dnetlib.dhp.actionmanager.promote.PromoteActionPayloadForGraphTableJobTest#promoteJobTestParams")
public void shouldPromoteActionPayloadForGraphTable(
MergeAndGet.Strategy strategy,
Class<? extends Oaf> rowClazz,
Class<? extends Oaf> actionPayloadClazz) throws Exception {
Class<? extends Oaf> actionPayloadClazz)
throws Exception {
// given
Path inputGraphTableDir = createGraphTable(inputGraphRootDir, rowClazz);
Path inputActionPayloadDir = createActionPayload(inputActionPayloadRootDir, rowClazz, actionPayloadClazz);
Path outputGraphTableDir = outputDir.resolve("graph").resolve(rowClazz.getSimpleName().toLowerCase());
Path inputActionPayloadDir =
createActionPayload(inputActionPayloadRootDir, rowClazz, actionPayloadClazz);
Path outputGraphTableDir =
outputDir.resolve("graph").resolve(rowClazz.getSimpleName().toLowerCase());
// when
PromoteActionPayloadForGraphTableJob.main(new String[]{
PromoteActionPayloadForGraphTableJob.main(
new String[] {
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-inputGraphTablePath", inputGraphTableDir.toString(),
"-graphTableClassName", rowClazz.getCanonicalName(),
@ -121,17 +138,22 @@ public class PromoteActionPayloadForGraphTableJobTest {
// then
assertTrue(Files.exists(outputGraphTableDir));
List<? extends Oaf> actualOutputRows = readGraphTableFromJobOutput(outputGraphTableDir.toString(), rowClazz)
.collectAsList()
.stream()
List<? extends Oaf> actualOutputRows =
readGraphTableFromJobOutput(outputGraphTableDir.toString(), rowClazz)
.collectAsList().stream()
.sorted(Comparator.comparingInt(Object::hashCode))
.collect(Collectors.toList());
String expectedOutputGraphTableJsonDumpPath = resultFileLocation(strategy, rowClazz, actionPayloadClazz);
Path expectedOutputGraphTableJsonDumpFile = Paths
.get(Objects.requireNonNull(cl.getResource(expectedOutputGraphTableJsonDumpPath)).getFile());
List<? extends Oaf> expectedOutputRows = readGraphTableFromJsonDump(expectedOutputGraphTableJsonDumpFile.toString(), rowClazz)
.collectAsList()
.stream()
String expectedOutputGraphTableJsonDumpPath =
resultFileLocation(strategy, rowClazz, actionPayloadClazz);
Path expectedOutputGraphTableJsonDumpFile =
Paths.get(
Objects.requireNonNull(
cl.getResource(expectedOutputGraphTableJsonDumpPath))
.getFile());
List<? extends Oaf> expectedOutputRows =
readGraphTableFromJsonDump(
expectedOutputGraphTableJsonDumpFile.toString(), rowClazz)
.collectAsList().stream()
.sorted(Comparator.comparingInt(Object::hashCode))
.collect(Collectors.toList());
assertIterableEquals(expectedOutputRows, actualOutputRows);
@ -140,27 +162,50 @@ public class PromoteActionPayloadForGraphTableJobTest {
public static Stream<Arguments> promoteJobTestParams() {
return Stream.of(
arguments(MergeAndGet.Strategy.MERGE_FROM_AND_GET, eu.dnetlib.dhp.schema.oaf.Dataset.class, eu.dnetlib.dhp.schema.oaf.Dataset.class),
arguments(MergeAndGet.Strategy.MERGE_FROM_AND_GET, eu.dnetlib.dhp.schema.oaf.Dataset.class, eu.dnetlib.dhp.schema.oaf.Result.class),
arguments(MergeAndGet.Strategy.MERGE_FROM_AND_GET, Datasource.class, Datasource.class),
arguments(MergeAndGet.Strategy.MERGE_FROM_AND_GET, Organization.class, Organization.class),
arguments(MergeAndGet.Strategy.MERGE_FROM_AND_GET, OtherResearchProduct.class, OtherResearchProduct.class),
arguments(MergeAndGet.Strategy.MERGE_FROM_AND_GET, OtherResearchProduct.class, Result.class),
arguments(
MergeAndGet.Strategy.MERGE_FROM_AND_GET,
eu.dnetlib.dhp.schema.oaf.Dataset.class,
eu.dnetlib.dhp.schema.oaf.Dataset.class),
arguments(
MergeAndGet.Strategy.MERGE_FROM_AND_GET,
eu.dnetlib.dhp.schema.oaf.Dataset.class,
eu.dnetlib.dhp.schema.oaf.Result.class),
arguments(
MergeAndGet.Strategy.MERGE_FROM_AND_GET,
Datasource.class,
Datasource.class),
arguments(
MergeAndGet.Strategy.MERGE_FROM_AND_GET,
Organization.class,
Organization.class),
arguments(
MergeAndGet.Strategy.MERGE_FROM_AND_GET,
OtherResearchProduct.class,
OtherResearchProduct.class),
arguments(
MergeAndGet.Strategy.MERGE_FROM_AND_GET,
OtherResearchProduct.class,
Result.class),
arguments(MergeAndGet.Strategy.MERGE_FROM_AND_GET, Project.class, Project.class),
arguments(MergeAndGet.Strategy.MERGE_FROM_AND_GET, Publication.class, Publication.class),
arguments(
MergeAndGet.Strategy.MERGE_FROM_AND_GET,
Publication.class,
Publication.class),
arguments(MergeAndGet.Strategy.MERGE_FROM_AND_GET, Publication.class, Result.class),
arguments(MergeAndGet.Strategy.MERGE_FROM_AND_GET, Relation.class, Relation.class),
arguments(MergeAndGet.Strategy.MERGE_FROM_AND_GET, Software.class, Software.class),
arguments(MergeAndGet.Strategy.MERGE_FROM_AND_GET, Software.class, Result.class)
);
arguments(MergeAndGet.Strategy.MERGE_FROM_AND_GET, Software.class, Result.class));
}
private static <G extends Oaf> Path createGraphTable(Path inputGraphRootDir,
Class<G> rowClazz) {
private static <G extends Oaf> Path createGraphTable(
Path inputGraphRootDir, Class<G> rowClazz) {
String inputGraphTableJsonDumpPath = inputGraphTableJsonDumpLocation(rowClazz);
Path inputGraphTableJsonDumpFile = Paths
.get(Objects.requireNonNull(cl.getResource(inputGraphTableJsonDumpPath)).getFile());
Dataset<G> rowDS = readGraphTableFromJsonDump(inputGraphTableJsonDumpFile.toString(), rowClazz);
Path inputGraphTableJsonDumpFile =
Paths.get(
Objects.requireNonNull(cl.getResource(inputGraphTableJsonDumpPath))
.getFile());
Dataset<G> rowDS =
readGraphTableFromJsonDump(inputGraphTableJsonDumpFile.toString(), rowClazz);
String inputGraphTableName = rowClazz.getSimpleName().toLowerCase();
Path inputGraphTableDir = inputGraphRootDir.resolve(inputGraphTableName);
writeGraphTableAaJobInput(rowDS, inputGraphTableDir.toString());
@ -169,71 +214,74 @@ public class PromoteActionPayloadForGraphTableJobTest {
private static String inputGraphTableJsonDumpLocation(Class<? extends Oaf> rowClazz) {
return String.format(
"%s/%s.json", "eu/dnetlib/dhp/actionmanager/promote/input/graph", rowClazz.getSimpleName().toLowerCase());
"%s/%s.json",
"eu/dnetlib/dhp/actionmanager/promote/input/graph",
rowClazz.getSimpleName().toLowerCase());
}
private static <G extends Oaf> Dataset<G> readGraphTableFromJsonDump(String path,
Class<G> rowClazz) {
return spark
.read()
private static <G extends Oaf> Dataset<G> readGraphTableFromJsonDump(
String path, Class<G> rowClazz) {
return spark.read()
.textFile(path)
.map((MapFunction<String, G>) json -> OBJECT_MAPPER.readValue(json, rowClazz), Encoders.bean(rowClazz));
.map(
(MapFunction<String, G>) json -> OBJECT_MAPPER.readValue(json, rowClazz),
Encoders.bean(rowClazz));
}
private static <G extends Oaf> void writeGraphTableAaJobInput(Dataset<G> rowDS,
String path) {
rowDS
.write()
.option("compression", "gzip")
.json(path);
private static <G extends Oaf> void writeGraphTableAaJobInput(Dataset<G> rowDS, String path) {
rowDS.write().option("compression", "gzip").json(path);
}
private static <G extends Oaf, A extends Oaf> Path createActionPayload(Path inputActionPayloadRootDir,
Class<G> rowClazz,
Class<A> actionPayloadClazz) {
String inputActionPayloadJsonDumpPath = inputActionPayloadJsonDumpLocation(rowClazz, actionPayloadClazz);
Path inputActionPayloadJsonDumpFile = Paths
.get(Objects.requireNonNull(cl.getResource(inputActionPayloadJsonDumpPath)).getFile());
Dataset<String> actionPayloadDS = readActionPayloadFromJsonDump(inputActionPayloadJsonDumpFile.toString());
Path inputActionPayloadDir = inputActionPayloadRootDir.resolve(actionPayloadClazz.getSimpleName().toLowerCase());
private static <G extends Oaf, A extends Oaf> Path createActionPayload(
Path inputActionPayloadRootDir, Class<G> rowClazz, Class<A> actionPayloadClazz) {
String inputActionPayloadJsonDumpPath =
inputActionPayloadJsonDumpLocation(rowClazz, actionPayloadClazz);
Path inputActionPayloadJsonDumpFile =
Paths.get(
Objects.requireNonNull(cl.getResource(inputActionPayloadJsonDumpPath))
.getFile());
Dataset<String> actionPayloadDS =
readActionPayloadFromJsonDump(inputActionPayloadJsonDumpFile.toString());
Path inputActionPayloadDir =
inputActionPayloadRootDir.resolve(actionPayloadClazz.getSimpleName().toLowerCase());
writeActionPayloadAsJobInput(actionPayloadDS, inputActionPayloadDir.toString());
return inputActionPayloadDir;
}
private static String inputActionPayloadJsonDumpLocation(Class<? extends Oaf> rowClazz,
Class<? extends Oaf> actionPayloadClazz) {
private static String inputActionPayloadJsonDumpLocation(
Class<? extends Oaf> rowClazz, Class<? extends Oaf> actionPayloadClazz) {
return String.format("eu/dnetlib/dhp/actionmanager/promote/input/action_payload/%s_table/%s.json",
rowClazz.getSimpleName().toLowerCase(), actionPayloadClazz.getSimpleName().toLowerCase());
return String.format(
"eu/dnetlib/dhp/actionmanager/promote/input/action_payload/%s_table/%s.json",
rowClazz.getSimpleName().toLowerCase(),
actionPayloadClazz.getSimpleName().toLowerCase());
}
private static Dataset<String> readActionPayloadFromJsonDump(String path) {
return spark
.read()
.textFile(path);
return spark.read().textFile(path);
}
private static void writeActionPayloadAsJobInput(Dataset<String> actionPayloadDS,
String path) {
actionPayloadDS
.withColumnRenamed("value", "payload")
.write()
.parquet(path);
private static void writeActionPayloadAsJobInput(Dataset<String> actionPayloadDS, String path) {
actionPayloadDS.withColumnRenamed("value", "payload").write().parquet(path);
}
private static <G extends Oaf> Dataset<G> readGraphTableFromJobOutput(String path,
Class<G> rowClazz) {
return spark
.read()
private static <G extends Oaf> Dataset<G> readGraphTableFromJobOutput(
String path, Class<G> rowClazz) {
return spark.read()
.textFile(path)
.map((MapFunction<String, G>) json -> OBJECT_MAPPER.readValue(json, rowClazz), Encoders.bean(rowClazz));
.map(
(MapFunction<String, G>) json -> OBJECT_MAPPER.readValue(json, rowClazz),
Encoders.bean(rowClazz));
}
private static String resultFileLocation(MergeAndGet.Strategy strategy,
private static String resultFileLocation(
MergeAndGet.Strategy strategy,
Class<? extends Oaf> rowClazz,
Class<? extends Oaf> actionPayloadClazz) {
return String
.format("eu/dnetlib/dhp/actionmanager/promote/output/graph/%s/%s/%s_action_payload/result.json",
strategy.name().toLowerCase(), rowClazz.getSimpleName().toLowerCase(), actionPayloadClazz.getSimpleName().toLowerCase());
return String.format(
"eu/dnetlib/dhp/actionmanager/promote/output/graph/%s/%s/%s_action_payload/result.json",
strategy.name().toLowerCase(),
rowClazz.getSimpleName().toLowerCase(),
actionPayloadClazz.getSimpleName().toLowerCase());
}
}

View File

@ -1,7 +1,15 @@
package eu.dnetlib.dhp.actionmanager.promote;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertThrows;
import eu.dnetlib.dhp.common.FunctionalInterfaceSupport.SerializableSupplier;
import eu.dnetlib.dhp.schema.oaf.Oaf;
import java.util.Arrays;
import java.util.List;
import java.util.Objects;
import java.util.function.BiFunction;
import java.util.function.Function;
import org.apache.spark.SparkConf;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Encoders;
@ -11,15 +19,6 @@ import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Nested;
import org.junit.jupiter.api.Test;
import java.util.Arrays;
import java.util.List;
import java.util.Objects;
import java.util.function.BiFunction;
import java.util.function.Function;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertThrows;
public class PromoteActionPayloadFunctionsTest {
private static SparkSession spark;
@ -44,20 +43,20 @@ public class PromoteActionPayloadFunctionsTest {
@Test
public void shouldThrowWhenTableTypeIsNotSubtypeOfActionPayloadType() {
// given
class OafImpl extends Oaf {
}
class OafImpl extends Oaf {}
// when
assertThrows(RuntimeException.class, () ->
PromoteActionPayloadFunctions
.joinGraphTableWithActionPayloadAndMerge(null,
assertThrows(
RuntimeException.class,
() ->
PromoteActionPayloadFunctions.joinGraphTableWithActionPayloadAndMerge(
null,
null,
null,
null,
null,
OafImplSubSub.class,
OafImpl.class
));
OafImpl.class));
}
@Test
@ -68,39 +67,52 @@ public class PromoteActionPayloadFunctionsTest {
String id2 = "id2";
String id3 = "id3";
String id4 = "id4";
List<OafImplSubSub> rowData = Arrays.asList(
List<OafImplSubSub> rowData =
Arrays.asList(
createOafImplSubSub(id0),
createOafImplSubSub(id1),
createOafImplSubSub(id2),
createOafImplSubSub(id3)
);
Dataset<OafImplSubSub> rowDS = spark.createDataset(rowData, Encoders.bean(OafImplSubSub.class));
createOafImplSubSub(id3));
Dataset<OafImplSubSub> rowDS =
spark.createDataset(rowData, Encoders.bean(OafImplSubSub.class));
List<OafImplSubSub> actionPayloadData = Arrays.asList(
List<OafImplSubSub> actionPayloadData =
Arrays.asList(
createOafImplSubSub(id1),
createOafImplSubSub(id2), createOafImplSubSub(id2),
createOafImplSubSub(id3), createOafImplSubSub(id3), createOafImplSubSub(id3),
createOafImplSubSub(id4), createOafImplSubSub(id4), createOafImplSubSub(id4), createOafImplSubSub(id4)
);
Dataset<OafImplSubSub> actionPayloadDS = spark.createDataset(actionPayloadData, Encoders.bean(OafImplSubSub.class));
createOafImplSubSub(id2),
createOafImplSubSub(id2),
createOafImplSubSub(id3),
createOafImplSubSub(id3),
createOafImplSubSub(id3),
createOafImplSubSub(id4),
createOafImplSubSub(id4),
createOafImplSubSub(id4),
createOafImplSubSub(id4));
Dataset<OafImplSubSub> actionPayloadDS =
spark.createDataset(actionPayloadData, Encoders.bean(OafImplSubSub.class));
SerializableSupplier<Function<OafImplSubSub, String>> rowIdFn = () -> OafImplRoot::getId;
SerializableSupplier<Function<OafImplSubSub, String>> actionPayloadIdFn = () -> OafImplRoot::getId;
SerializableSupplier<BiFunction<OafImplSubSub, OafImplSubSub, OafImplSubSub>> mergeAndGetFn = () -> (x, y) -> {
SerializableSupplier<Function<OafImplSubSub, String>> rowIdFn =
() -> OafImplRoot::getId;
SerializableSupplier<Function<OafImplSubSub, String>> actionPayloadIdFn =
() -> OafImplRoot::getId;
SerializableSupplier<BiFunction<OafImplSubSub, OafImplSubSub, OafImplSubSub>>
mergeAndGetFn =
() ->
(x, y) -> {
x.merge(y);
return x;
};
// when
List<OafImplSubSub> results = PromoteActionPayloadFunctions
.joinGraphTableWithActionPayloadAndMerge(rowDS,
List<OafImplSubSub> results =
PromoteActionPayloadFunctions.joinGraphTableWithActionPayloadAndMerge(
rowDS,
actionPayloadDS,
rowIdFn,
actionPayloadIdFn,
mergeAndGetFn,
OafImplSubSub.class,
OafImplSubSub.class
)
OafImplSubSub.class)
.collectAsList();
// then
@ -111,7 +123,8 @@ public class PromoteActionPayloadFunctionsTest {
assertEquals(3, results.stream().filter(x -> x.getId().equals(id3)).count());
assertEquals(4, results.stream().filter(x -> x.getId().equals(id4)).count());
results.forEach(result -> {
results.forEach(
result -> {
switch (result.getId()) {
case "id0":
assertEquals(1, result.getMerged());
@ -138,39 +151,52 @@ public class PromoteActionPayloadFunctionsTest {
String id2 = "id2";
String id3 = "id3";
String id4 = "id4";
List<OafImplSubSub> rowData = Arrays.asList(
List<OafImplSubSub> rowData =
Arrays.asList(
createOafImplSubSub(id0),
createOafImplSubSub(id1),
createOafImplSubSub(id2),
createOafImplSubSub(id3)
);
Dataset<OafImplSubSub> rowDS = spark.createDataset(rowData, Encoders.bean(OafImplSubSub.class));
createOafImplSubSub(id3));
Dataset<OafImplSubSub> rowDS =
spark.createDataset(rowData, Encoders.bean(OafImplSubSub.class));
List<OafImplSub> actionPayloadData = Arrays.asList(
List<OafImplSub> actionPayloadData =
Arrays.asList(
createOafImplSub(id1),
createOafImplSub(id2), createOafImplSub(id2),
createOafImplSub(id3), createOafImplSub(id3), createOafImplSub(id3),
createOafImplSub(id4), createOafImplSub(id4), createOafImplSub(id4), createOafImplSub(id4)
);
Dataset<OafImplSub> actionPayloadDS = spark.createDataset(actionPayloadData, Encoders.bean(OafImplSub.class));
createOafImplSub(id2),
createOafImplSub(id2),
createOafImplSub(id3),
createOafImplSub(id3),
createOafImplSub(id3),
createOafImplSub(id4),
createOafImplSub(id4),
createOafImplSub(id4),
createOafImplSub(id4));
Dataset<OafImplSub> actionPayloadDS =
spark.createDataset(actionPayloadData, Encoders.bean(OafImplSub.class));
SerializableSupplier<Function<OafImplSubSub, String>> rowIdFn = () -> OafImplRoot::getId;
SerializableSupplier<Function<OafImplSub, String>> actionPayloadIdFn = () -> OafImplRoot::getId;
SerializableSupplier<BiFunction<OafImplSubSub, OafImplSub, OafImplSubSub>> mergeAndGetFn = () -> (x, y) -> {
SerializableSupplier<Function<OafImplSubSub, String>> rowIdFn =
() -> OafImplRoot::getId;
SerializableSupplier<Function<OafImplSub, String>> actionPayloadIdFn =
() -> OafImplRoot::getId;
SerializableSupplier<BiFunction<OafImplSubSub, OafImplSub, OafImplSubSub>>
mergeAndGetFn =
() ->
(x, y) -> {
x.merge(y);
return x;
};
// when
List<OafImplSubSub> results = PromoteActionPayloadFunctions
.joinGraphTableWithActionPayloadAndMerge(rowDS,
List<OafImplSubSub> results =
PromoteActionPayloadFunctions.joinGraphTableWithActionPayloadAndMerge(
rowDS,
actionPayloadDS,
rowIdFn,
actionPayloadIdFn,
mergeAndGetFn,
OafImplSubSub.class,
OafImplSub.class
)
OafImplSub.class)
.collectAsList();
// then
@ -181,7 +207,8 @@ public class PromoteActionPayloadFunctionsTest {
assertEquals(3, results.stream().filter(x -> x.getId().equals(id3)).count());
assertEquals(0, results.stream().filter(x -> x.getId().equals(id4)).count());
results.forEach(result -> {
results.forEach(
result -> {
switch (result.getId()) {
case "id0":
assertEquals(1, result.getMerged());
@ -196,7 +223,6 @@ public class PromoteActionPayloadFunctionsTest {
}
});
}
}
@Nested
@ -208,24 +234,34 @@ public class PromoteActionPayloadFunctionsTest {
String id1 = "id1";
String id2 = "id2";
String id3 = "id3";
List<OafImplSubSub> rowData = Arrays.asList(
List<OafImplSubSub> rowData =
Arrays.asList(
createOafImplSubSub(id1),
createOafImplSubSub(id2), createOafImplSubSub(id2),
createOafImplSubSub(id3), createOafImplSubSub(id3), createOafImplSubSub(id3)
);
Dataset<OafImplSubSub> rowDS = spark.createDataset(rowData, Encoders.bean(OafImplSubSub.class));
createOafImplSubSub(id2),
createOafImplSubSub(id2),
createOafImplSubSub(id3),
createOafImplSubSub(id3),
createOafImplSubSub(id3));
Dataset<OafImplSubSub> rowDS =
spark.createDataset(rowData, Encoders.bean(OafImplSubSub.class));
SerializableSupplier<Function<OafImplSubSub, String>> rowIdFn = () -> OafImplRoot::getId;
SerializableSupplier<BiFunction<OafImplSubSub, OafImplSubSub, OafImplSubSub>> mergeAndGetFn = () -> (x, y) -> {
SerializableSupplier<Function<OafImplSubSub, String>> rowIdFn =
() -> OafImplRoot::getId;
SerializableSupplier<BiFunction<OafImplSubSub, OafImplSubSub, OafImplSubSub>>
mergeAndGetFn =
() ->
(x, y) -> {
x.merge(y);
return x;
};
SerializableSupplier<OafImplSubSub> zeroFn = OafImplSubSub::new;
SerializableSupplier<Function<OafImplSubSub, Boolean>> isNotZeroFn = () -> x -> Objects.nonNull(x.getId());
SerializableSupplier<Function<OafImplSubSub, Boolean>> isNotZeroFn =
() -> x -> Objects.nonNull(x.getId());
// when
List<OafImplSubSub> results = PromoteActionPayloadFunctions
.groupGraphTableByIdAndMerge(rowDS,
List<OafImplSubSub> results =
PromoteActionPayloadFunctions.groupGraphTableByIdAndMerge(
rowDS,
rowIdFn,
mergeAndGetFn,
zeroFn,
@ -239,7 +275,8 @@ public class PromoteActionPayloadFunctionsTest {
assertEquals(1, results.stream().filter(x -> x.getId().equals(id2)).count());
assertEquals(1, results.stream().filter(x -> x.getId().equals(id3)).count());
results.forEach(result -> {
results.forEach(
result -> {
switch (result.getId()) {
case "id1":
assertEquals(1, result.getMerged());
@ -255,7 +292,6 @@ public class PromoteActionPayloadFunctionsTest {
}
});
}
}
public static class OafImplRoot extends Oaf {
@ -310,5 +346,4 @@ public class PromoteActionPayloadFunctionsTest {
x.setId(id);
return x;
}
}

View File

@ -7,6 +7,11 @@ import eu.dnetlib.dhp.model.mdstore.Provenance;
import eu.dnetlib.message.Message;
import eu.dnetlib.message.MessageManager;
import eu.dnetlib.message.MessageType;
import java.io.ByteArrayInputStream;
import java.nio.charset.StandardCharsets;
import java.util.HashMap;
import java.util.Map;
import java.util.Objects;
import org.apache.commons.cli.*;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils;
@ -24,49 +29,52 @@ import org.dom4j.Document;
import org.dom4j.Node;
import org.dom4j.io.SAXReader;
import java.io.ByteArrayInputStream;
import java.nio.charset.StandardCharsets;
import java.util.HashMap;
import java.util.Map;
import java.util.Objects;
public class GenerateNativeStoreSparkJob {
public static MetadataRecord parseRecord(
final String input,
final String xpath,
final String encoding,
final Provenance provenance,
final Long dateOfCollection,
final LongAccumulator totalItems,
final LongAccumulator invalidRecords) {
public static MetadataRecord parseRecord (final String input, final String xpath, final String encoding, final Provenance provenance, final Long dateOfCollection, final LongAccumulator totalItems, final LongAccumulator invalidRecords) {
if(totalItems != null)
totalItems.add(1);
if (totalItems != null) totalItems.add(1);
try {
SAXReader reader = new SAXReader();
Document document = reader.read(new ByteArrayInputStream(input.getBytes(StandardCharsets.UTF_8)));
Document document =
reader.read(new ByteArrayInputStream(input.getBytes(StandardCharsets.UTF_8)));
Node node = document.selectSingleNode(xpath);
final String originalIdentifier = node.getText();
if (StringUtils.isBlank(originalIdentifier)) {
if (invalidRecords!= null)
invalidRecords.add(1);
if (invalidRecords != null) invalidRecords.add(1);
return null;
}
return new MetadataRecord(originalIdentifier, encoding, provenance, input, dateOfCollection);
return new MetadataRecord(
originalIdentifier, encoding, provenance, input, dateOfCollection);
} catch (Throwable e) {
if (invalidRecords!= null)
invalidRecords.add(1);
if (invalidRecords != null) invalidRecords.add(1);
e.printStackTrace();
return null;
}
}
public static void main(String[] args) throws Exception {
final ArgumentApplicationParser parser = new ArgumentApplicationParser(IOUtils.toString(GenerateNativeStoreSparkJob.class.getResourceAsStream("/eu/dnetlib/dhp/collection/collection_input_parameters.json")));
final ArgumentApplicationParser parser =
new ArgumentApplicationParser(
IOUtils.toString(
GenerateNativeStoreSparkJob.class.getResourceAsStream(
"/eu/dnetlib/dhp/collection/collection_input_parameters.json")));
parser.parseArgument(args);
final ObjectMapper jsonMapper = new ObjectMapper();
final Provenance provenance = jsonMapper.readValue(parser.get("provenance"), Provenance.class);
final Provenance provenance =
jsonMapper.readValue(parser.get("provenance"), Provenance.class);
final long dateOfCollection = new Long(parser.get("dateOfCollection"));
final SparkSession spark = SparkSession
.builder()
final SparkSession spark =
SparkSession.builder()
.appName("GenerateNativeStoreSparkJob")
.master(parser.get("master"))
.getOrCreate();
@ -74,44 +82,82 @@ public class GenerateNativeStoreSparkJob {
final Map<String, String> ongoingMap = new HashMap<>();
final Map<String, String> reportMap = new HashMap<>();
final boolean test = parser.get("isTest") == null ? false : Boolean.valueOf(parser.get("isTest"));
final boolean test =
parser.get("isTest") == null ? false : Boolean.valueOf(parser.get("isTest"));
final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext());
final JavaPairRDD<IntWritable, Text> inputRDD = sc.sequenceFile(parser.get("input"), IntWritable.class, Text.class);
final JavaPairRDD<IntWritable, Text> inputRDD =
sc.sequenceFile(parser.get("input"), IntWritable.class, Text.class);
final LongAccumulator totalItems = sc.sc().longAccumulator("TotalItems");
final LongAccumulator invalidRecords = sc.sc().longAccumulator("InvalidRecords");
final MessageManager manager = new MessageManager(parser.get("rabbitHost"), parser.get("rabbitUser"), parser.get("rabbitPassword"), false, false, null);
final MessageManager manager =
new MessageManager(
parser.get("rabbitHost"),
parser.get("rabbitUser"),
parser.get("rabbitPassword"),
false,
false,
null);
final JavaRDD<MetadataRecord> mappeRDD = inputRDD.map(item -> parseRecord(item._2().toString(), parser.get("xpath"), parser.get("encoding"), provenance, dateOfCollection, totalItems, invalidRecords))
.filter(Objects::nonNull).distinct();
final JavaRDD<MetadataRecord> mappeRDD =
inputRDD.map(
item ->
parseRecord(
item._2().toString(),
parser.get("xpath"),
parser.get("encoding"),
provenance,
dateOfCollection,
totalItems,
invalidRecords))
.filter(Objects::nonNull)
.distinct();
ongoingMap.put("ongoing", "0");
if (!test) {
manager.sendMessage(new Message(parser.get("workflowId"),"DataFrameCreation", MessageType.ONGOING, ongoingMap ), parser.get("rabbitOngoingQueue"), true, false);
manager.sendMessage(
new Message(
parser.get("workflowId"),
"DataFrameCreation",
MessageType.ONGOING,
ongoingMap),
parser.get("rabbitOngoingQueue"),
true,
false);
}
final Encoder<MetadataRecord> encoder = Encoders.bean(MetadataRecord.class);
final Dataset<MetadataRecord> mdstore = spark.createDataset(mappeRDD.rdd(), encoder);
final LongAccumulator mdStoreRecords = sc.sc().longAccumulator("MDStoreRecords");
mdStoreRecords.add(mdstore.count());
ongoingMap.put("ongoing", ""+ totalItems.value());
ongoingMap.put("ongoing", "" + totalItems.value());
if (!test) {
manager.sendMessage(new Message(parser.get("workflowId"), "DataFrameCreation", MessageType.ONGOING, ongoingMap), parser.get("rabbitOngoingQueue"), true, false);
manager.sendMessage(
new Message(
parser.get("workflowId"),
"DataFrameCreation",
MessageType.ONGOING,
ongoingMap),
parser.get("rabbitOngoingQueue"),
true,
false);
}
mdstore.write().format("parquet").save(parser.get("output"));
reportMap.put("inputItem" , ""+ totalItems.value());
reportMap.put("inputItem", "" + totalItems.value());
reportMap.put("invalidRecords", "" + invalidRecords.value());
reportMap.put("mdStoreSize", "" + mdStoreRecords.value());
if (!test) {
manager.sendMessage(new Message(parser.get("workflowId"), "Collection", MessageType.REPORT, reportMap), parser.get("rabbitReportQueue"), true, false);
manager.sendMessage(
new Message(
parser.get("workflowId"), "Collection", MessageType.REPORT, reportMap),
parser.get("rabbitReportQueue"),
true,
false);
manager.close();
}
}
}

View File

@ -2,7 +2,6 @@ package eu.dnetlib.dhp.collection.plugin;
import eu.dnetlib.collector.worker.model.ApiDescriptor;
import eu.dnetlib.dhp.collection.worker.DnetCollectorException;
import java.util.stream.Stream;
public interface CollectorPlugin {

View File

@ -1,5 +1,11 @@
package eu.dnetlib.dhp.collection.plugin.oai;
import com.google.common.base.Splitter;
import com.google.common.collect.Iterators;
import com.google.common.collect.Lists;
import eu.dnetlib.collector.worker.model.ApiDescriptor;
import eu.dnetlib.dhp.collection.plugin.CollectorPlugin;
import eu.dnetlib.dhp.collection.worker.DnetCollectorException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
@ -7,15 +13,6 @@ import java.util.Spliterator;
import java.util.Spliterators;
import java.util.stream.Stream;
import java.util.stream.StreamSupport;
import com.google.common.base.Splitter;
import com.google.common.collect.Iterators;
import com.google.common.collect.Lists;
import eu.dnetlib.collector.worker.model.ApiDescriptor;
import eu.dnetlib.dhp.collection.plugin.CollectorPlugin;
import eu.dnetlib.dhp.collection.worker.DnetCollectorException;
public class OaiCollectorPlugin implements CollectorPlugin {
@ -24,7 +21,6 @@ public class OaiCollectorPlugin implements CollectorPlugin {
private static final Object OAI_FROM_DATE_PARAM = "fromDate";
private static final Object OAI_UNTIL_DATE_PARAM = "untilDate";
private OaiIteratorFactory oaiIteratorFactory;
@Override
@ -37,30 +33,48 @@ public class OaiCollectorPlugin implements CollectorPlugin {
final List<String> sets = new ArrayList<>();
if (setParam != null) {
sets.addAll(Lists.newArrayList(Splitter.on(",").omitEmptyStrings().trimResults().split(setParam)));
sets.addAll(
Lists.newArrayList(
Splitter.on(",").omitEmptyStrings().trimResults().split(setParam)));
}
if (sets.isEmpty()) {
// If no set is defined, ALL the sets must be harvested
sets.add("");
}
if (baseUrl == null || baseUrl.isEmpty()) { throw new DnetCollectorException("Param 'baseurl' is null or empty"); }
if (baseUrl == null || baseUrl.isEmpty()) {
throw new DnetCollectorException("Param 'baseurl' is null or empty");
}
if (mdFormat == null || mdFormat.isEmpty()) { throw new DnetCollectorException("Param 'mdFormat' is null or empty"); }
if (mdFormat == null || mdFormat.isEmpty()) {
throw new DnetCollectorException("Param 'mdFormat' is null or empty");
}
if (fromDate != null && !fromDate.matches("\\d{4}-\\d{2}-\\d{2}")) { throw new DnetCollectorException("Invalid date (YYYY-MM-DD): " + fromDate); }
if (fromDate != null && !fromDate.matches("\\d{4}-\\d{2}-\\d{2}")) {
throw new DnetCollectorException("Invalid date (YYYY-MM-DD): " + fromDate);
}
if (untilDate != null && !untilDate.matches("\\d{4}-\\d{2}-\\d{2}")) { throw new DnetCollectorException("Invalid date (YYYY-MM-DD): " + untilDate); }
if (untilDate != null && !untilDate.matches("\\d{4}-\\d{2}-\\d{2}")) {
throw new DnetCollectorException("Invalid date (YYYY-MM-DD): " + untilDate);
}
final Iterator<Iterator<String>> iters = sets.stream()
.map(set -> getOaiIteratorFactory().newIterator(baseUrl, mdFormat, set, fromDate, untilDate))
final Iterator<Iterator<String>> iters =
sets.stream()
.map(
set ->
getOaiIteratorFactory()
.newIterator(
baseUrl, mdFormat, set, fromDate,
untilDate))
.iterator();
return StreamSupport.stream(Spliterators.spliteratorUnknownSize(Iterators.concat(iters), Spliterator.ORDERED), false);
return StreamSupport.stream(
Spliterators.spliteratorUnknownSize(Iterators.concat(iters), Spliterator.ORDERED),
false);
}
public OaiIteratorFactory getOaiIteratorFactory() {
if (oaiIteratorFactory == null){
if (oaiIteratorFactory == null) {
oaiIteratorFactory = new OaiIteratorFactory();
}
return oaiIteratorFactory;

View File

@ -1,15 +1,14 @@
package eu.dnetlib.dhp.collection.plugin.oai;
import eu.dnetlib.dhp.collection.worker.DnetCollectorException;
import eu.dnetlib.dhp.collection.worker.utils.HttpConnector;
import eu.dnetlib.dhp.collection.worker.utils.XmlCleaner;
import java.io.StringReader;
import java.io.UnsupportedEncodingException;
import java.net.URLEncoder;
import java.util.Iterator;
import java.util.Queue;
import java.util.concurrent.PriorityBlockingQueue;
import eu.dnetlib.dhp.collection.worker.DnetCollectorException;
import eu.dnetlib.dhp.collection.worker.utils.HttpConnector;
import eu.dnetlib.dhp.collection.worker.utils.XmlCleaner;
import org.apache.commons.lang.StringUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
@ -18,10 +17,10 @@ import org.dom4j.DocumentException;
import org.dom4j.Node;
import org.dom4j.io.SAXReader;
public class OaiIterator implements Iterator<String> {
private static final Log log = LogFactory.getLog(OaiIterator.class); // NOPMD by marko on 11/24/08 5:02 PM
private static final Log log =
LogFactory.getLog(OaiIterator.class); // NOPMD by marko on 11/24/08 5:02 PM
private final Queue<String> queue = new PriorityBlockingQueue<>();
private final SAXReader reader = new SAXReader();
@ -35,7 +34,12 @@ public class OaiIterator implements Iterator<String> {
private boolean started;
private final HttpConnector httpConnector;
public OaiIterator(final String baseUrl, final String mdFormat, final String set, final String fromDate, final String untilDate,
public OaiIterator(
final String baseUrl,
final String mdFormat,
final String set,
final String fromDate,
final String untilDate,
final HttpConnector httpConnector) {
this.baseUrl = baseUrl;
this.mdFormat = mdFormat;
@ -86,7 +90,10 @@ public class OaiIterator implements Iterator<String> {
private String firstPage() throws DnetCollectorException {
try {
String url = baseUrl + "?verb=ListRecords&metadataPrefix=" + URLEncoder.encode(mdFormat, "UTF-8");
String url =
baseUrl
+ "?verb=ListRecords&metadataPrefix="
+ URLEncoder.encode(mdFormat, "UTF-8");
if (set != null && !set.isEmpty()) {
url += "&set=" + URLEncoder.encode(set, "UTF-8");
}
@ -107,17 +114,23 @@ public class OaiIterator implements Iterator<String> {
private String extractResumptionToken(final String xml) {
final String s = StringUtils.substringAfter(xml, "<resumptionToken");
if (s == null) { return null; }
if (s == null) {
return null;
}
final String result = StringUtils.substringBetween(s, ">", "</");
if (result == null) { return null; }
if (result == null) {
return null;
}
return result.trim();
}
private String otherPages(final String resumptionToken) throws DnetCollectorException {
try {
return downloadPage(baseUrl + "?verb=ListRecords&resumptionToken=" + URLEncoder.encode(resumptionToken, "UTF-8"));
return downloadPage(
baseUrl
+ "?verb=ListRecords&resumptionToken="
+ URLEncoder.encode(resumptionToken, "UTF-8"));
} catch (final UnsupportedEncodingException e) {
throw new DnetCollectorException(e);
}
@ -136,12 +149,16 @@ public class OaiIterator implements Iterator<String> {
doc = reader.read(new StringReader(cleaned));
} catch (final DocumentException e1) {
final String resumptionToken = extractResumptionToken(xml);
if (resumptionToken == null) { throw new DnetCollectorException("Error parsing cleaned document:" + cleaned, e1); }
if (resumptionToken == null) {
throw new DnetCollectorException(
"Error parsing cleaned document:" + cleaned, e1);
}
return resumptionToken;
}
}
final Node errorNode = doc.selectSingleNode("/*[local-name()='OAI-PMH']/*[local-name()='error']");
final Node errorNode =
doc.selectSingleNode("/*[local-name()='OAI-PMH']/*[local-name()='error']");
if (errorNode != null) {
final String code = errorNode.valueOf("@code");
if ("noRecordsMatch".equalsIgnoreCase(code.trim())) {
@ -152,12 +169,11 @@ public class OaiIterator implements Iterator<String> {
}
}
for (final Object o : doc.selectNodes("//*[local-name()='ListRecords']/*[local-name()='record']")) {
for (final Object o :
doc.selectNodes("//*[local-name()='ListRecords']/*[local-name()='record']")) {
queue.add(((Node) o).asXML());
}
return doc.valueOf("//*[local-name()='resumptionToken']");
}
}

View File

@ -1,25 +1,23 @@
package eu.dnetlib.dhp.collection.plugin.oai;
import eu.dnetlib.dhp.collection.worker.utils.HttpConnector;
import java.util.Iterator;
public class OaiIteratorFactory {
private HttpConnector httpConnector;
public Iterator<String> newIterator(final String baseUrl, final String mdFormat, final String set, final String fromDate, final String untilDate) {
public Iterator<String> newIterator(
final String baseUrl,
final String mdFormat,
final String set,
final String fromDate,
final String untilDate) {
return new OaiIterator(baseUrl, mdFormat, set, fromDate, untilDate, getHttpConnector());
}
private HttpConnector getHttpConnector() {
if (httpConnector== null)
httpConnector = new HttpConnector();
if (httpConnector == null) httpConnector = new HttpConnector();
return httpConnector;
}
}

View File

@ -2,16 +2,18 @@ package eu.dnetlib.dhp.collection.worker;
public class DnetCollectorException extends Exception {
/**
*
*/
/** */
private static final long serialVersionUID = -290723075076039757L;
public DnetCollectorException() {
super();
}
public DnetCollectorException(final String message, final Throwable cause, final boolean enableSuppression, final boolean writableStackTrace) {
public DnetCollectorException(
final String message,
final Throwable cause,
final boolean enableSuppression,
final boolean writableStackTrace) {
super(message, cause, enableSuppression, writableStackTrace);
}
@ -26,5 +28,4 @@ public class DnetCollectorException extends Exception {
public DnetCollectorException(final Throwable cause) {
super(cause);
}
}

View File

@ -2,13 +2,17 @@ package eu.dnetlib.dhp.collection.worker;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.collector.worker.model.ApiDescriptor;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.collection.plugin.CollectorPlugin;
import eu.dnetlib.dhp.collection.worker.utils.CollectorPluginFactory;
import eu.dnetlib.message.Message;
import eu.dnetlib.message.MessageManager;
import eu.dnetlib.message.MessageType;
import java.io.IOException;
import java.net.URI;
import java.util.HashMap;
import java.util.Map;
import java.util.concurrent.atomic.AtomicInteger;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
@ -18,37 +22,34 @@ import org.apache.hadoop.io.Text;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.net.URI;
import java.util.HashMap;
import java.util.Map;
import java.util.concurrent.atomic.AtomicInteger;
public class DnetCollectorWorker {
private static final Logger log = LoggerFactory.getLogger(DnetCollectorWorker.class);
private final CollectorPluginFactory collectorPluginFactory;
private final ArgumentApplicationParser argumentParser;
private final MessageManager manager;
public DnetCollectorWorker(final CollectorPluginFactory collectorPluginFactory, final ArgumentApplicationParser argumentParser, final MessageManager manager) throws DnetCollectorException {
public DnetCollectorWorker(
final CollectorPluginFactory collectorPluginFactory,
final ArgumentApplicationParser argumentParser,
final MessageManager manager)
throws DnetCollectorException {
this.collectorPluginFactory = collectorPluginFactory;
this.argumentParser = argumentParser;
this.manager = manager;
}
public void collect() throws DnetCollectorException {
try {
final ObjectMapper jsonMapper = new ObjectMapper();
final ApiDescriptor api = jsonMapper.readValue(argumentParser.get("apidescriptor"), ApiDescriptor.class);
final ApiDescriptor api =
jsonMapper.readValue(argumentParser.get("apidescriptor"), ApiDescriptor.class);
final CollectorPlugin plugin = collectorPluginFactory.getPluginByProtocol(api.getProtocol());
final CollectorPlugin plugin =
collectorPluginFactory.getPluginByProtocol(api.getProtocol());
final String hdfsuri = argumentParser.get("namenode");
@ -62,7 +63,7 @@ public class DnetCollectorWorker {
System.setProperty("HADOOP_USER_NAME", argumentParser.get("userHDFS"));
System.setProperty("hadoop.home.dir", "/");
//Get the filesystem - HDFS
// Get the filesystem - HDFS
FileSystem.get(URI.create(hdfsuri), conf);
Path hdfswritepath = new Path(argumentParser.get("hdfsPath"));
@ -71,19 +72,35 @@ public class DnetCollectorWorker {
final Map<String, String> ongoingMap = new HashMap<>();
final Map<String, String> reportMap = new HashMap<>();
final AtomicInteger counter = new AtomicInteger(0);
try (SequenceFile.Writer writer = SequenceFile.createWriter(conf,
SequenceFile.Writer.file(hdfswritepath), SequenceFile.Writer.keyClass(IntWritable.class),
try (SequenceFile.Writer writer =
SequenceFile.createWriter(
conf,
SequenceFile.Writer.file(hdfswritepath),
SequenceFile.Writer.keyClass(IntWritable.class),
SequenceFile.Writer.valueClass(Text.class))) {
final IntWritable key = new IntWritable(counter.get());
final Text value = new Text();
plugin.collect(api).forEach(content -> {
plugin.collect(api)
.forEach(
content -> {
key.set(counter.getAndIncrement());
value.set(content);
if (counter.get() % 10 == 0) {
try {
ongoingMap.put("ongoing", "" + counter.get());
log.debug("Sending message: "+ manager.sendMessage(new Message(argumentParser.get("workflowId"), "Collection", MessageType.ONGOING, ongoingMap), argumentParser.get("rabbitOngoingQueue"), true, false));
log.debug(
"Sending message: "
+ manager.sendMessage(
new Message(
argumentParser.get(
"workflowId"),
"Collection",
MessageType.ONGOING,
ongoingMap),
argumentParser.get(
"rabbitOngoingQueue"),
true,
false));
} catch (Exception e) {
log.error("Error on sending message ", e);
}
@ -93,21 +110,31 @@ public class DnetCollectorWorker {
} catch (IOException e) {
throw new RuntimeException(e);
}
});
}
ongoingMap.put("ongoing", "" + counter.get());
manager.sendMessage(new Message(argumentParser.get("workflowId"), "Collection", MessageType.ONGOING, ongoingMap), argumentParser.get("rabbitOngoingQueue"), true, false);
manager.sendMessage(
new Message(
argumentParser.get("workflowId"),
"Collection",
MessageType.ONGOING,
ongoingMap),
argumentParser.get("rabbitOngoingQueue"),
true,
false);
reportMap.put("collected", "" + counter.get());
manager.sendMessage(new Message(argumentParser.get("workflowId"), "Collection", MessageType.REPORT, reportMap), argumentParser.get("rabbitOngoingQueue"), true, false);
manager.sendMessage(
new Message(
argumentParser.get("workflowId"),
"Collection",
MessageType.REPORT,
reportMap),
argumentParser.get("rabbitOngoingQueue"),
true,
false);
manager.close();
} catch (Throwable e) {
throw new DnetCollectorException("Error on collecting ",e);
throw new DnetCollectorException("Error on collecting ", e);
}
}
}

View File

@ -1,6 +1,5 @@
package eu.dnetlib.dhp.collection.worker;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.collection.worker.utils.CollectorPluginFactory;
import eu.dnetlib.message.MessageManager;
@ -8,16 +7,13 @@ import org.apache.commons.io.IOUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* DnetCollectortWorkerApplication is the main class responsible to start
* the Dnet Collection into HDFS.
* This module will be executed on the hadoop cluster and taking in input some parameters
* that tells it which is the right collector plugin to use and where store the data into HDFS path
* DnetCollectortWorkerApplication is the main class responsible to start the Dnet Collection into
* HDFS. This module will be executed on the hadoop cluster and taking in input some parameters that
* tells it which is the right collector plugin to use and where store the data into HDFS path
*
* @author Sandro La Bruzzo
*/
public class DnetCollectorWorkerApplication {
private static final Logger log = LoggerFactory.getLogger(DnetCollectorWorkerApplication.class);
@ -26,22 +22,27 @@ public class DnetCollectorWorkerApplication {
private static ArgumentApplicationParser argumentParser;
/**
* @param args
*/
/** @param args */
public static void main(final String[] args) throws Exception {
argumentParser= new ArgumentApplicationParser(IOUtils.toString(DnetCollectorWorker.class.getResourceAsStream("/eu/dnetlib/collector/worker/collector_parameter.json")));
argumentParser =
new ArgumentApplicationParser(
IOUtils.toString(
DnetCollectorWorker.class.getResourceAsStream(
"/eu/dnetlib/collector/worker/collector_parameter.json")));
argumentParser.parseArgument(args);
log.info("hdfsPath =" + argumentParser.get("hdfsPath"));
log.info("json = " + argumentParser.get("apidescriptor"));
final MessageManager manager = new MessageManager(argumentParser.get("rabbitHost"), argumentParser.get("rabbitUser"), argumentParser.get("rabbitPassword"), false, false, null);
final DnetCollectorWorker worker = new DnetCollectorWorker(collectorPluginFactory, argumentParser, manager);
final MessageManager manager =
new MessageManager(
argumentParser.get("rabbitHost"),
argumentParser.get("rabbitUser"),
argumentParser.get("rabbitPassword"),
false,
false,
null);
final DnetCollectorWorker worker =
new DnetCollectorWorker(collectorPluginFactory, argumentParser, manager);
worker.collect();
}
}

View File

@ -15,5 +15,4 @@ public class CollectorPluginErrorLogList extends LinkedList<String> {
}
return log;
}
}

View File

@ -4,19 +4,16 @@ import eu.dnetlib.dhp.collection.plugin.CollectorPlugin;
import eu.dnetlib.dhp.collection.plugin.oai.OaiCollectorPlugin;
import eu.dnetlib.dhp.collection.worker.DnetCollectorException;
;
public class CollectorPluginFactory {
public CollectorPlugin getPluginByProtocol(final String protocol) throws DnetCollectorException {
if (protocol==null) throw new DnetCollectorException("protocol cannot be null");
switch (protocol.toLowerCase().trim()){
public CollectorPlugin getPluginByProtocol(final String protocol)
throws DnetCollectorException {
if (protocol == null) throw new DnetCollectorException("protocol cannot be null");
switch (protocol.toLowerCase().trim()) {
case "oai":
return new OaiCollectorPlugin();
default:
throw new DnetCollectorException("UNknown protocol");
}
}
}

View File

@ -1,15 +1,6 @@
package eu.dnetlib.dhp.collection.worker.utils;
import eu.dnetlib.dhp.collection.worker.DnetCollectorException;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang.math.NumberUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import javax.net.ssl.HttpsURLConnection;
import javax.net.ssl.SSLContext;
import javax.net.ssl.TrustManager;
import javax.net.ssl.X509TrustManager;
import java.io.IOException;
import java.io.InputStream;
import java.net.*;
@ -17,7 +8,14 @@ import java.security.GeneralSecurityException;
import java.security.cert.X509Certificate;
import java.util.List;
import java.util.Map;
import javax.net.ssl.HttpsURLConnection;
import javax.net.ssl.SSLContext;
import javax.net.ssl.TrustManager;
import javax.net.ssl.X509TrustManager;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang.math.NumberUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
public class HttpConnector {
@ -38,11 +36,9 @@ public class HttpConnector {
/**
* Given the URL returns the content via HTTP GET
*
* @param requestUrl
* the URL
* @param requestUrl the URL
* @return the content of the downloaded resource
* @throws DnetCollectorException
* when retrying more than maxNumberOfRetry times
* @throws DnetCollectorException when retrying more than maxNumberOfRetry times
*/
public String getInputSource(final String requestUrl) throws DnetCollectorException {
return attemptDownlaodAsString(requestUrl, 1, new CollectorPluginErrorLogList());
@ -51,17 +47,19 @@ public class HttpConnector {
/**
* Given the URL returns the content as a stream via HTTP GET
*
* @param requestUrl
* the URL
* @param requestUrl the URL
* @return the content of the downloaded resource as InputStream
* @throws DnetCollectorException
* when retrying more than maxNumberOfRetry times
* @throws DnetCollectorException when retrying more than maxNumberOfRetry times
*/
public InputStream getInputSourceAsStream(final String requestUrl) throws DnetCollectorException {
public InputStream getInputSourceAsStream(final String requestUrl)
throws DnetCollectorException {
return attemptDownload(requestUrl, 1, new CollectorPluginErrorLogList());
}
private String attemptDownlaodAsString(final String requestUrl, final int retryNumber, final CollectorPluginErrorLogList errorList)
private String attemptDownlaodAsString(
final String requestUrl,
final int retryNumber,
final CollectorPluginErrorLogList errorList)
throws DnetCollectorException {
try {
final InputStream s = attemptDownload(requestUrl, 1, new CollectorPluginErrorLogList());
@ -80,17 +78,24 @@ public class HttpConnector {
}
}
private InputStream attemptDownload(final String requestUrl, final int retryNumber, final CollectorPluginErrorLogList errorList)
private InputStream attemptDownload(
final String requestUrl,
final int retryNumber,
final CollectorPluginErrorLogList errorList)
throws DnetCollectorException {
if (retryNumber > maxNumberOfRetry) { throw new DnetCollectorException("Max number of retries exceeded. Cause: \n " + errorList); }
if (retryNumber > maxNumberOfRetry) {
throw new DnetCollectorException(
"Max number of retries exceeded. Cause: \n " + errorList);
}
log.debug("Downloading " + requestUrl + " - try: " + retryNumber);
try {
InputStream input = null;
try {
final HttpURLConnection urlConn = (HttpURLConnection) new URL(requestUrl).openConnection();
final HttpURLConnection urlConn =
(HttpURLConnection) new URL(requestUrl).openConnection();
urlConn.setInstanceFollowRedirects(false);
urlConn.setReadTimeout(readTimeOut * 1000);
urlConn.addRequestProperty("User-Agent", userAgent);
@ -100,22 +105,35 @@ public class HttpConnector {
}
final int retryAfter = obtainRetryAfter(urlConn.getHeaderFields());
if (retryAfter > 0 && urlConn.getResponseCode() == HttpURLConnection.HTTP_UNAVAILABLE) {
if (retryAfter > 0
&& urlConn.getResponseCode() == HttpURLConnection.HTTP_UNAVAILABLE) {
log.warn("waiting and repeating request after " + retryAfter + " sec.");
Thread.sleep(retryAfter * 1000);
errorList.add("503 Service Unavailable");
urlConn.disconnect();
return attemptDownload(requestUrl, retryNumber + 1, errorList);
} else if (urlConn.getResponseCode() == HttpURLConnection.HTTP_MOVED_PERM || urlConn.getResponseCode() == HttpURLConnection.HTTP_MOVED_TEMP) {
} else if (urlConn.getResponseCode() == HttpURLConnection.HTTP_MOVED_PERM
|| urlConn.getResponseCode() == HttpURLConnection.HTTP_MOVED_TEMP) {
final String newUrl = obtainNewLocation(urlConn.getHeaderFields());
log.debug("The requested url has been moved to " + newUrl);
errorList.add(String.format("%s %s. Moved to: %s", urlConn.getResponseCode(), urlConn.getResponseMessage(), newUrl));
errorList.add(
String.format(
"%s %s. Moved to: %s",
urlConn.getResponseCode(),
urlConn.getResponseMessage(),
newUrl));
urlConn.disconnect();
return attemptDownload(newUrl, retryNumber + 1, errorList);
} else if (urlConn.getResponseCode() != HttpURLConnection.HTTP_OK) {
log.error(String.format("HTTP error: %s %s", urlConn.getResponseCode(), urlConn.getResponseMessage()));
log.error(
String.format(
"HTTP error: %s %s",
urlConn.getResponseCode(), urlConn.getResponseMessage()));
Thread.sleep(defaultDelay * 1000);
errorList.add(String.format("%s %s", urlConn.getResponseCode(), urlConn.getResponseMessage()));
errorList.add(
String.format(
"%s %s",
urlConn.getResponseCode(), urlConn.getResponseMessage()));
urlConn.disconnect();
return attemptDownload(requestUrl, retryNumber + 1, errorList);
} else {
@ -148,30 +166,44 @@ public class HttpConnector {
private int obtainRetryAfter(final Map<String, List<String>> headerMap) {
for (final String key : headerMap.keySet()) {
if (key != null && key.toLowerCase().equals("retry-after") && headerMap.get(key).size() > 0
&& NumberUtils.isNumber(headerMap.get(key).get(0))) { return Integer.parseInt(headerMap.get(key).get(0)) + 10; }
if (key != null
&& key.toLowerCase().equals("retry-after")
&& headerMap.get(key).size() > 0
&& NumberUtils.isNumber(headerMap.get(key).get(0))) {
return Integer.parseInt(headerMap.get(key).get(0)) + 10;
}
}
return -1;
}
private String obtainNewLocation(final Map<String, List<String>> headerMap) throws DnetCollectorException {
private String obtainNewLocation(final Map<String, List<String>> headerMap)
throws DnetCollectorException {
for (final String key : headerMap.keySet()) {
if (key != null && key.toLowerCase().equals("location") && headerMap.get(key).size() > 0) { return headerMap.get(key).get(0); }
if (key != null
&& key.toLowerCase().equals("location")
&& headerMap.get(key).size() > 0) {
return headerMap.get(key).get(0);
}
throw new DnetCollectorException("The requested url has been MOVED, but 'location' param is MISSING");
}
throw new DnetCollectorException(
"The requested url has been MOVED, but 'location' param is MISSING");
}
/**
* register for https scheme; this is a workaround and not intended for the use in trusted environments
* register for https scheme; this is a workaround and not intended for the use in trusted
* environments
*/
public void initTrustManager() {
final X509TrustManager tm = new X509TrustManager() {
final X509TrustManager tm =
new X509TrustManager() {
@Override
public void checkClientTrusted(final X509Certificate[] xcs, final String string) {}
public void checkClientTrusted(
final X509Certificate[] xcs, final String string) {}
@Override
public void checkServerTrusted(final X509Certificate[] xcs, final String string) {}
public void checkServerTrusted(
final X509Certificate[] xcs, final String string) {}
@Override
public X509Certificate[] getAcceptedIssuers() {
@ -180,7 +212,7 @@ public class HttpConnector {
};
try {
final SSLContext ctx = SSLContext.getInstance("TLS");
ctx.init(null, new TrustManager[] { tm }, null);
ctx.init(null, new TrustManager[] {tm}, null);
HttpsURLConnection.setDefaultSSLSocketFactory(ctx.getSocketFactory());
} catch (final GeneralSecurityException e) {
log.fatal(e);
@ -215,5 +247,4 @@ public class HttpConnector {
public String getResponseType() {
return responseType;
}
}

View File

@ -6,26 +6,24 @@ import java.util.Map;
import java.util.Set;
import java.util.regex.Pattern;
/**
* @author jochen, Andreas Czerniak
*
*/
/** @author jochen, Andreas Czerniak */
public class XmlCleaner {
/**
* Pattern for numeric entities.
*/
private static Pattern validCharacterEntityPattern = Pattern.compile("^&#x?\\d{2,4};"); //$NON-NLS-1$
// private static Pattern validCharacterEntityPattern = Pattern.compile("^&#?\\d{2,4};"); //$NON-NLS-1$
/** Pattern for numeric entities. */
private static Pattern validCharacterEntityPattern =
Pattern.compile("^&#x?\\d{2,4};"); // $NON-NLS-1$
// private static Pattern validCharacterEntityPattern = Pattern.compile("^&#?\\d{2,4};");
// //$NON-NLS-1$
// see https://www.w3.org/TR/REC-xml/#charsets , not only limited to &#11;
private static Pattern invalidControlCharPattern = Pattern.compile("&#x?1[0-9a-fA-F];");
/**
* Pattern that negates the allowable XML 4 byte unicode characters. Valid are: #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] |
* [#x10000-#x10FFFF]
* Pattern that negates the allowable XML 4 byte unicode characters. Valid are: #x9 | #xA | #xD
* | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]
*/
private static Pattern invalidCharacterPattern = Pattern.compile("[^\t\r\n\u0020-\uD7FF\uE000-\uFFFD]"); //$NON-NLS-1$
private static Pattern invalidCharacterPattern =
Pattern.compile("[^\t\r\n\u0020-\uD7FF\uE000-\uFFFD]"); // $NON-NLS-1$
// Map entities to their unicode equivalent
private static Set<String> goodEntities = new HashSet<>();
@ -33,159 +31,289 @@ public class XmlCleaner {
static {
// pre-defined XML entities
goodEntities.add("&quot;"); //$NON-NLS-1$ // quotation mark
goodEntities.add("&amp;"); //$NON-NLS-1$ // ampersand
goodEntities.add("&lt;"); //$NON-NLS-1$ // less-than sign
goodEntities.add("&gt;"); //$NON-NLS-1$ // greater-than sign
goodEntities.add("&quot;"); // $NON-NLS-1$ // quotation mark
goodEntities.add("&amp;"); // $NON-NLS-1$ // ampersand
goodEntities.add("&lt;"); // $NON-NLS-1$ // less-than sign
goodEntities.add("&gt;"); // $NON-NLS-1$ // greater-than sign
// control entities
// badEntities.put("&#11;", "");
badEntities.put("&#127;", " "); //$NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
badEntities.put("&#128;", " "); //$NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
badEntities.put("&#129;", " "); //$NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
badEntities.put("&#130;", " "); //$NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
badEntities.put("&#131;", " "); //$NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
badEntities.put("&#132;", " "); //$NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
badEntities.put("&#133;", " "); //$NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
badEntities.put("&#134;", " "); //$NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
badEntities.put("&#135;", " "); //$NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
badEntities.put("&#136;", " "); //$NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
badEntities.put("&#137;", " "); //$NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
badEntities.put("&#138;", " "); //$NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
badEntities.put("&#139;", " "); //$NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
badEntities.put("&#140;", " "); //$NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
badEntities.put("&#141;", " "); //$NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
badEntities.put("&#142;", " "); //$NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
badEntities.put("&#143;", " "); //$NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
badEntities.put("&#144;", " "); //$NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
badEntities.put("&#145;", " "); //$NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
badEntities.put("&#146;", " "); //$NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
badEntities.put("&#147;", " "); //$NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
badEntities.put("&#148;", " "); //$NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
badEntities.put("&#149;", " "); //$NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
badEntities.put("&#150;", " "); //$NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
badEntities.put("&#151;", " "); //$NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
badEntities.put("&#152;", " "); //$NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
badEntities.put("&#153;", " "); //$NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
badEntities.put("&#154;", " "); //$NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
badEntities.put("&#155;", " "); //$NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
badEntities.put("&#156;", " "); //$NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
badEntities.put("&#157;", " "); //$NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
badEntities.put("&#158;", " "); //$NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
badEntities.put("&#159;", " "); //$NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
badEntities.put("&#127;", " "); // $NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
badEntities.put("&#128;", " "); // $NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
badEntities.put("&#129;", " "); // $NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
badEntities.put("&#130;", " "); // $NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
badEntities.put("&#131;", " "); // $NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
badEntities.put("&#132;", " "); // $NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
badEntities.put("&#133;", " "); // $NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
badEntities.put("&#134;", " "); // $NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
badEntities.put("&#135;", " "); // $NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
badEntities.put("&#136;", " "); // $NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
badEntities.put("&#137;", " "); // $NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
badEntities.put("&#138;", " "); // $NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
badEntities.put("&#139;", " "); // $NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
badEntities.put("&#140;", " "); // $NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
badEntities.put("&#141;", " "); // $NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
badEntities.put("&#142;", " "); // $NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
badEntities.put("&#143;", " "); // $NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
badEntities.put("&#144;", " "); // $NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
badEntities.put("&#145;", " "); // $NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
badEntities.put("&#146;", " "); // $NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
badEntities.put("&#147;", " "); // $NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
badEntities.put("&#148;", " "); // $NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
badEntities.put("&#149;", " "); // $NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
badEntities.put("&#150;", " "); // $NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
badEntities.put("&#151;", " "); // $NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
badEntities.put("&#152;", " "); // $NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
badEntities.put("&#153;", " "); // $NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
badEntities.put("&#154;", " "); // $NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
badEntities.put("&#155;", " "); // $NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
badEntities.put("&#156;", " "); // $NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
badEntities.put("&#157;", " "); // $NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
badEntities.put("&#158;", " "); // $NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
badEntities.put("&#159;", " "); // $NON-NLS-1$ //$NON-NLS-2$ // illegal HTML character
// misc entities
badEntities.put("&euro;", "\u20AC"); //$NON-NLS-1$ //$NON-NLS-2$ // euro
badEntities.put("&lsquo;", "\u2018"); //$NON-NLS-1$ //$NON-NLS-2$ // left single quotation mark
badEntities.put("&rsquo;", "\u2019"); //$NON-NLS-1$ //$NON-NLS-2$ // right single quotation mark
badEntities.put("&euro;", "\u20AC"); // $NON-NLS-1$ //$NON-NLS-2$ // euro
badEntities.put(
"&lsquo;", "\u2018"); // $NON-NLS-1$ //$NON-NLS-2$ // left single quotation mark
badEntities.put(
"&rsquo;", "\u2019"); // $NON-NLS-1$ //$NON-NLS-2$ // right single quotation mark
// Latin 1 entities
badEntities.put("&nbsp;", "\u00A0"); //$NON-NLS-1$ //$NON-NLS-2$ // no-break space
badEntities.put("&iexcl;", "\u00A1"); //$NON-NLS-1$ //$NON-NLS-2$ // inverted exclamation mark
badEntities.put("&cent;", "\u00A2"); //$NON-NLS-1$ //$NON-NLS-2$ // cent sign
badEntities.put("&pound;", "\u00A3"); //$NON-NLS-1$ //$NON-NLS-2$ // pound sign
badEntities.put("&curren;", "\u00A4"); //$NON-NLS-1$ //$NON-NLS-2$ // currency sign
badEntities.put("&yen;", "\u00A5"); //$NON-NLS-1$ //$NON-NLS-2$ // yen sign
badEntities.put("&brvbar;", "\u00A6"); //$NON-NLS-1$ //$NON-NLS-2$ // broken vertical bar
badEntities.put("&sect;", "\u00A7"); //$NON-NLS-1$ //$NON-NLS-2$ // section sign
badEntities.put("&uml;", "\u00A8"); //$NON-NLS-1$ //$NON-NLS-2$ // diaeresis
badEntities.put("&copy;", "\u00A9"); //$NON-NLS-1$ //$NON-NLS-2$ // copyright sign
badEntities.put("&ordf;", "\u00AA"); //$NON-NLS-1$ //$NON-NLS-2$ // feminine ordinal indicator
badEntities.put("&laquo;", "\u00AB"); //$NON-NLS-1$ //$NON-NLS-2$ // left-pointing double angle quotation mark
badEntities.put("&not;", "\u00AC"); //$NON-NLS-1$ //$NON-NLS-2$ // not sign
badEntities.put("&shy;", "\u00AD"); //$NON-NLS-1$ //$NON-NLS-2$ // soft hyphen
badEntities.put("&reg;", "\u00AE"); //$NON-NLS-1$ //$NON-NLS-2$ // registered sign
badEntities.put("&macr;", "\u00AF"); //$NON-NLS-1$ //$NON-NLS-2$ // macron
badEntities.put("&deg;", "\u00B0"); //$NON-NLS-1$ //$NON-NLS-2$ // degree sign
badEntities.put("&plusmn;", "\u00B1"); //$NON-NLS-1$ //$NON-NLS-2$ // plus-minus sign
badEntities.put("&sup2;", "\u00B2"); //$NON-NLS-1$ //$NON-NLS-2$ // superscript two
badEntities.put("&sup3;", "\u00B3"); //$NON-NLS-1$ //$NON-NLS-2$ // superscript three
badEntities.put("&acute;", "\u00B4"); //$NON-NLS-1$ //$NON-NLS-2$ // acute accent
badEntities.put("&micro;", "\u00B5"); //$NON-NLS-1$ //$NON-NLS-2$ // micro sign
badEntities.put("&para;", "\u00B6"); //$NON-NLS-1$ //$NON-NLS-2$ // pilcrow sign
badEntities.put("&middot;", "\u00B7"); //$NON-NLS-1$ //$NON-NLS-2$ // middle dot
badEntities.put("&cedil;", "\u00B8"); //$NON-NLS-1$ //$NON-NLS-2$ // cedilla
badEntities.put("&sup1;", "\u00B9"); //$NON-NLS-1$ //$NON-NLS-2$ // superscript one
badEntities.put("&ordm;", "\u00BA"); //$NON-NLS-1$ //$NON-NLS-2$ // masculine ordinal indicator
badEntities.put("&raquo;", "\u00BB"); //$NON-NLS-1$ //$NON-NLS-2$ // right-pointing double angle quotation mark
badEntities.put("&frac14;", "\u00BC"); //$NON-NLS-1$ //$NON-NLS-2$ // vulgar fraction one quarter
badEntities.put("&frac12;", "\u00BD"); //$NON-NLS-1$ //$NON-NLS-2$ // vulgar fraction one half
badEntities.put("&frac34;", "\u00BE"); //$NON-NLS-1$ //$NON-NLS-2$ // vulgar fraction three quarters
badEntities.put("&iquest;", "\u00BF"); //$NON-NLS-1$ //$NON-NLS-2$ // inverted question mark
badEntities.put("&Agrave;", "\u00C0"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter A with grave
badEntities.put("&Aacute;", "\u00C1"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter A with acute
badEntities.put("&Acirc;", "\u00C2"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter A with circumflex
badEntities.put("&Atilde;", "\u00C3"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter A with tilde
badEntities.put("&Auml;", "\u00C4"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter A with diaeresis
badEntities.put("&Aring;", "\u00C5"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter A with ring above
badEntities.put("&AElig;", "\u00C6"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter AE
badEntities.put("&Ccedil;", "\u00C7"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter C with cedilla
badEntities.put("&Egrave;", "\u00C8"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter E with grave
badEntities.put("&Eacute;", "\u00C9"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter E with acute
badEntities.put("&Ecirc;", "\u00CA"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter E with circumflex
badEntities.put("&Euml;", "\u00CB"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter E with diaeresis
badEntities.put("&Igrave;", "\u00CC"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter I with grave
badEntities.put("&Iacute;", "\u00CD"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter I with acute
badEntities.put("&Icirc;", "\u00CE"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter I with circumflex
badEntities.put("&Iuml;", "\u00CF"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter I with diaeresis
badEntities.put("&ETH;", "\u00D0"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter ETH
badEntities.put("&Ntilde;", "\u00D1"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter N with tilde
badEntities.put("&Ograve;", "\u00D2"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter O with grave
badEntities.put("&Oacute;", "\u00D3"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter O with acute
badEntities.put("&Ocirc;", "\u00D4"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter O with circumflex
badEntities.put("&Otilde;", "\u00D5"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter O with tilde
badEntities.put("&Ouml;", "\u00D6"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter O with diaeresis
badEntities.put("&times;", "\u00D7"); //$NON-NLS-1$ //$NON-NLS-2$ // multiplication sign
badEntities.put("&Oslash;", "\u00D8"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter O with stroke
badEntities.put("&Ugrave;", "\u00D9"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter U with grave
badEntities.put("&Uacute;", "\u00DA"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter U with acute
badEntities.put("&Ucirc;", "\u00DB"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter U with circumflex
badEntities.put("&Uuml;", "\u00DC"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter U with diaeresis
badEntities.put("&Yacute;", "\u00DD"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter Y with acute
badEntities.put("&THORN;", "\u00DE"); //$NON-NLS-1$ //$NON-NLS-2$ // latin capital letter THORN
badEntities.put("&szlig;", "\u00DF"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter sharp s
badEntities.put("&agrave;", "\u00E0"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter a with grave
badEntities.put("&aacute;", "\u00E1"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter a with acute
badEntities.put("&acirc;", "\u00E2"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter a with circumflex
badEntities.put("&atilde;", "\u00E3"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter a with tilde
badEntities.put("&auml;", "\u00E4"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter a with diaeresis
badEntities.put("&aring;", "\u00E5"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter a with ring above
badEntities.put("&aelig;", "\u00E6"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter ae
badEntities.put("&ccedil;", "\u00E7"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter c with cedilla
badEntities.put("&egrave;", "\u00E8"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter e with grave
badEntities.put("&eacute;", "\u00E9"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter e with acute
badEntities.put("&ecirc;", "\u00EA"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter e with circumflex
badEntities.put("&euml;", "\u00EB"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter e with diaeresis
badEntities.put("&igrave;", "\u00EC"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter i with grave
badEntities.put("&iacute;", "\u00ED"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter i with acute
badEntities.put("&icirc;", "\u00EE"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter i with circumflex
badEntities.put("&iuml;", "\u00EF"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter i with diaeresis
badEntities.put("&eth;", "\u00F0"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter eth
badEntities.put("&ntilde;", "\u00F1"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter n with tilde
badEntities.put("&ograve;", "\u00F2"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter o with grave
badEntities.put("&oacute;", "\u00F3"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter o with acute
badEntities.put("&ocirc;", "\u00F4"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter o with circumflex
badEntities.put("&otilde;", "\u00F5"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter o with tilde
badEntities.put("&ouml;", "\u00F6"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter o with diaeresis
badEntities.put("&divide;", "\u00F7"); //$NON-NLS-1$ //$NON-NLS-2$ // division sign
badEntities.put("&oslash;", "\u00F8"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter o with stroke
badEntities.put("&ugrave;", "\u00F9"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter u with grave
badEntities.put("&uacute;", "\u00FA"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter u with acute
badEntities.put("&ucirc;", "\u00FB"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter u with circumflex
badEntities.put("&uuml;", "\u00FC"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter u with diaeresis
badEntities.put("&yacute;", "\u00FD"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter y with acute
badEntities.put("&thorn;", "\u00FE"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter thorn
badEntities.put("&yuml;", "\u00FF"); //$NON-NLS-1$ //$NON-NLS-2$ // latin small letter y with diaeresis
badEntities.put("&nbsp;", "\u00A0"); // $NON-NLS-1$ //$NON-NLS-2$ // no-break space
badEntities.put(
"&iexcl;", "\u00A1"); // $NON-NLS-1$ //$NON-NLS-2$ // inverted exclamation mark
badEntities.put("&cent;", "\u00A2"); // $NON-NLS-1$ //$NON-NLS-2$ // cent sign
badEntities.put("&pound;", "\u00A3"); // $NON-NLS-1$ //$NON-NLS-2$ // pound sign
badEntities.put("&curren;", "\u00A4"); // $NON-NLS-1$ //$NON-NLS-2$ // currency sign
badEntities.put("&yen;", "\u00A5"); // $NON-NLS-1$ //$NON-NLS-2$ // yen sign
badEntities.put("&brvbar;", "\u00A6"); // $NON-NLS-1$ //$NON-NLS-2$ // broken vertical bar
badEntities.put("&sect;", "\u00A7"); // $NON-NLS-1$ //$NON-NLS-2$ // section sign
badEntities.put("&uml;", "\u00A8"); // $NON-NLS-1$ //$NON-NLS-2$ // diaeresis
badEntities.put("&copy;", "\u00A9"); // $NON-NLS-1$ //$NON-NLS-2$ // copyright sign
badEntities.put(
"&ordf;", "\u00AA"); // $NON-NLS-1$ //$NON-NLS-2$ // feminine ordinal indicator
badEntities.put(
"&laquo;",
"\u00AB"); //$NON-NLS-1$ //$NON-NLS-2$ // left-pointing double angle quotation mark
badEntities.put("&not;", "\u00AC"); // $NON-NLS-1$ //$NON-NLS-2$ // not sign
badEntities.put("&shy;", "\u00AD"); // $NON-NLS-1$ //$NON-NLS-2$ // soft hyphen
badEntities.put("&reg;", "\u00AE"); // $NON-NLS-1$ //$NON-NLS-2$ // registered sign
badEntities.put("&macr;", "\u00AF"); // $NON-NLS-1$ //$NON-NLS-2$ // macron
badEntities.put("&deg;", "\u00B0"); // $NON-NLS-1$ //$NON-NLS-2$ // degree sign
badEntities.put("&plusmn;", "\u00B1"); // $NON-NLS-1$ //$NON-NLS-2$ // plus-minus sign
badEntities.put("&sup2;", "\u00B2"); // $NON-NLS-1$ //$NON-NLS-2$ // superscript two
badEntities.put("&sup3;", "\u00B3"); // $NON-NLS-1$ //$NON-NLS-2$ // superscript three
badEntities.put("&acute;", "\u00B4"); // $NON-NLS-1$ //$NON-NLS-2$ // acute accent
badEntities.put("&micro;", "\u00B5"); // $NON-NLS-1$ //$NON-NLS-2$ // micro sign
badEntities.put("&para;", "\u00B6"); // $NON-NLS-1$ //$NON-NLS-2$ // pilcrow sign
badEntities.put("&middot;", "\u00B7"); // $NON-NLS-1$ //$NON-NLS-2$ // middle dot
badEntities.put("&cedil;", "\u00B8"); // $NON-NLS-1$ //$NON-NLS-2$ // cedilla
badEntities.put("&sup1;", "\u00B9"); // $NON-NLS-1$ //$NON-NLS-2$ // superscript one
badEntities.put(
"&ordm;", "\u00BA"); // $NON-NLS-1$ //$NON-NLS-2$ // masculine ordinal indicator
badEntities.put(
"&raquo;",
"\u00BB"); //$NON-NLS-1$ //$NON-NLS-2$ // right-pointing double angle quotation mark
badEntities.put(
"&frac14;", "\u00BC"); // $NON-NLS-1$ //$NON-NLS-2$ // vulgar fraction one quarter
badEntities.put(
"&frac12;", "\u00BD"); // $NON-NLS-1$ //$NON-NLS-2$ // vulgar fraction one half
badEntities.put(
"&frac34;",
"\u00BE"); // $NON-NLS-1$ //$NON-NLS-2$ // vulgar fraction three quarters
badEntities.put(
"&iquest;", "\u00BF"); // $NON-NLS-1$ //$NON-NLS-2$ // inverted question mark
badEntities.put(
"&Agrave;",
"\u00C0"); // $NON-NLS-1$ //$NON-NLS-2$ // latin capital letter A with grave
badEntities.put(
"&Aacute;",
"\u00C1"); // $NON-NLS-1$ //$NON-NLS-2$ // latin capital letter A with acute
badEntities.put(
"&Acirc;",
"\u00C2"); // $NON-NLS-1$ //$NON-NLS-2$ // latin capital letter A with circumflex
badEntities.put(
"&Atilde;",
"\u00C3"); // $NON-NLS-1$ //$NON-NLS-2$ // latin capital letter A with tilde
badEntities.put(
"&Auml;",
"\u00C4"); // $NON-NLS-1$ //$NON-NLS-2$ // latin capital letter A with diaeresis
badEntities.put(
"&Aring;",
"\u00C5"); // $NON-NLS-1$ //$NON-NLS-2$ // latin capital letter A with ring above
badEntities.put(
"&AElig;", "\u00C6"); // $NON-NLS-1$ //$NON-NLS-2$ // latin capital letter AE
badEntities.put(
"&Ccedil;",
"\u00C7"); // $NON-NLS-1$ //$NON-NLS-2$ // latin capital letter C with cedilla
badEntities.put(
"&Egrave;",
"\u00C8"); // $NON-NLS-1$ //$NON-NLS-2$ // latin capital letter E with grave
badEntities.put(
"&Eacute;",
"\u00C9"); // $NON-NLS-1$ //$NON-NLS-2$ // latin capital letter E with acute
badEntities.put(
"&Ecirc;",
"\u00CA"); // $NON-NLS-1$ //$NON-NLS-2$ // latin capital letter E with circumflex
badEntities.put(
"&Euml;",
"\u00CB"); // $NON-NLS-1$ //$NON-NLS-2$ // latin capital letter E with diaeresis
badEntities.put(
"&Igrave;",
"\u00CC"); // $NON-NLS-1$ //$NON-NLS-2$ // latin capital letter I with grave
badEntities.put(
"&Iacute;",
"\u00CD"); // $NON-NLS-1$ //$NON-NLS-2$ // latin capital letter I with acute
badEntities.put(
"&Icirc;",
"\u00CE"); // $NON-NLS-1$ //$NON-NLS-2$ // latin capital letter I with circumflex
badEntities.put(
"&Iuml;",
"\u00CF"); // $NON-NLS-1$ //$NON-NLS-2$ // latin capital letter I with diaeresis
badEntities.put("&ETH;", "\u00D0"); // $NON-NLS-1$ //$NON-NLS-2$ // latin capital letter ETH
badEntities.put(
"&Ntilde;",
"\u00D1"); // $NON-NLS-1$ //$NON-NLS-2$ // latin capital letter N with tilde
badEntities.put(
"&Ograve;",
"\u00D2"); // $NON-NLS-1$ //$NON-NLS-2$ // latin capital letter O with grave
badEntities.put(
"&Oacute;",
"\u00D3"); // $NON-NLS-1$ //$NON-NLS-2$ // latin capital letter O with acute
badEntities.put(
"&Ocirc;",
"\u00D4"); // $NON-NLS-1$ //$NON-NLS-2$ // latin capital letter O with circumflex
badEntities.put(
"&Otilde;",
"\u00D5"); // $NON-NLS-1$ //$NON-NLS-2$ // latin capital letter O with tilde
badEntities.put(
"&Ouml;",
"\u00D6"); // $NON-NLS-1$ //$NON-NLS-2$ // latin capital letter O with diaeresis
badEntities.put("&times;", "\u00D7"); // $NON-NLS-1$ //$NON-NLS-2$ // multiplication sign
badEntities.put(
"&Oslash;",
"\u00D8"); // $NON-NLS-1$ //$NON-NLS-2$ // latin capital letter O with stroke
badEntities.put(
"&Ugrave;",
"\u00D9"); // $NON-NLS-1$ //$NON-NLS-2$ // latin capital letter U with grave
badEntities.put(
"&Uacute;",
"\u00DA"); // $NON-NLS-1$ //$NON-NLS-2$ // latin capital letter U with acute
badEntities.put(
"&Ucirc;",
"\u00DB"); // $NON-NLS-1$ //$NON-NLS-2$ // latin capital letter U with circumflex
badEntities.put(
"&Uuml;",
"\u00DC"); // $NON-NLS-1$ //$NON-NLS-2$ // latin capital letter U with diaeresis
badEntities.put(
"&Yacute;",
"\u00DD"); // $NON-NLS-1$ //$NON-NLS-2$ // latin capital letter Y with acute
badEntities.put(
"&THORN;", "\u00DE"); // $NON-NLS-1$ //$NON-NLS-2$ // latin capital letter THORN
badEntities.put(
"&szlig;", "\u00DF"); // $NON-NLS-1$ //$NON-NLS-2$ // latin small letter sharp s
badEntities.put(
"&agrave;",
"\u00E0"); // $NON-NLS-1$ //$NON-NLS-2$ // latin small letter a with grave
badEntities.put(
"&aacute;",
"\u00E1"); // $NON-NLS-1$ //$NON-NLS-2$ // latin small letter a with acute
badEntities.put(
"&acirc;",
"\u00E2"); // $NON-NLS-1$ //$NON-NLS-2$ // latin small letter a with circumflex
badEntities.put(
"&atilde;",
"\u00E3"); // $NON-NLS-1$ //$NON-NLS-2$ // latin small letter a with tilde
badEntities.put(
"&auml;",
"\u00E4"); // $NON-NLS-1$ //$NON-NLS-2$ // latin small letter a with diaeresis
badEntities.put(
"&aring;",
"\u00E5"); // $NON-NLS-1$ //$NON-NLS-2$ // latin small letter a with ring above
badEntities.put("&aelig;", "\u00E6"); // $NON-NLS-1$ //$NON-NLS-2$ // latin small letter ae
badEntities.put(
"&ccedil;",
"\u00E7"); // $NON-NLS-1$ //$NON-NLS-2$ // latin small letter c with cedilla
badEntities.put(
"&egrave;",
"\u00E8"); // $NON-NLS-1$ //$NON-NLS-2$ // latin small letter e with grave
badEntities.put(
"&eacute;",
"\u00E9"); // $NON-NLS-1$ //$NON-NLS-2$ // latin small letter e with acute
badEntities.put(
"&ecirc;",
"\u00EA"); // $NON-NLS-1$ //$NON-NLS-2$ // latin small letter e with circumflex
badEntities.put(
"&euml;",
"\u00EB"); // $NON-NLS-1$ //$NON-NLS-2$ // latin small letter e with diaeresis
badEntities.put(
"&igrave;",
"\u00EC"); // $NON-NLS-1$ //$NON-NLS-2$ // latin small letter i with grave
badEntities.put(
"&iacute;",
"\u00ED"); // $NON-NLS-1$ //$NON-NLS-2$ // latin small letter i with acute
badEntities.put(
"&icirc;",
"\u00EE"); // $NON-NLS-1$ //$NON-NLS-2$ // latin small letter i with circumflex
badEntities.put(
"&iuml;",
"\u00EF"); // $NON-NLS-1$ //$NON-NLS-2$ // latin small letter i with diaeresis
badEntities.put("&eth;", "\u00F0"); // $NON-NLS-1$ //$NON-NLS-2$ // latin small letter eth
badEntities.put(
"&ntilde;",
"\u00F1"); // $NON-NLS-1$ //$NON-NLS-2$ // latin small letter n with tilde
badEntities.put(
"&ograve;",
"\u00F2"); // $NON-NLS-1$ //$NON-NLS-2$ // latin small letter o with grave
badEntities.put(
"&oacute;",
"\u00F3"); // $NON-NLS-1$ //$NON-NLS-2$ // latin small letter o with acute
badEntities.put(
"&ocirc;",
"\u00F4"); // $NON-NLS-1$ //$NON-NLS-2$ // latin small letter o with circumflex
badEntities.put(
"&otilde;",
"\u00F5"); // $NON-NLS-1$ //$NON-NLS-2$ // latin small letter o with tilde
badEntities.put(
"&ouml;",
"\u00F6"); // $NON-NLS-1$ //$NON-NLS-2$ // latin small letter o with diaeresis
badEntities.put("&divide;", "\u00F7"); // $NON-NLS-1$ //$NON-NLS-2$ // division sign
badEntities.put(
"&oslash;",
"\u00F8"); // $NON-NLS-1$ //$NON-NLS-2$ // latin small letter o with stroke
badEntities.put(
"&ugrave;",
"\u00F9"); // $NON-NLS-1$ //$NON-NLS-2$ // latin small letter u with grave
badEntities.put(
"&uacute;",
"\u00FA"); // $NON-NLS-1$ //$NON-NLS-2$ // latin small letter u with acute
badEntities.put(
"&ucirc;",
"\u00FB"); // $NON-NLS-1$ //$NON-NLS-2$ // latin small letter u with circumflex
badEntities.put(
"&uuml;",
"\u00FC"); // $NON-NLS-1$ //$NON-NLS-2$ // latin small letter u with diaeresis
badEntities.put(
"&yacute;",
"\u00FD"); // $NON-NLS-1$ //$NON-NLS-2$ // latin small letter y with acute
badEntities.put(
"&thorn;", "\u00FE"); // $NON-NLS-1$ //$NON-NLS-2$ // latin small letter thorn
badEntities.put(
"&yuml;",
"\u00FF"); // $NON-NLS-1$ //$NON-NLS-2$ // latin small letter y with diaeresis
}
/**
* For each entity in the input that is not allowed in XML, replace the entity with its unicode equivalent or remove it. For each
* instance of a bare {@literal &}, replace it with {@literal &amp;<br/>
* } XML only allows 4 entities: {@literal &amp;amp;}, {@literal &amp;quot;}, {@literal &amp;lt;} and {@literal &amp;gt;}.
* For each entity in the input that is not allowed in XML, replace the entity with its unicode
* equivalent or remove it. For each instance of a bare {@literal &}, replace it with {@literal
* &amp;<br/> } XML only allows 4 entities: {@literal &amp;amp;}, {@literal &amp;quot;},
* {@literal &amp;lt;} and {@literal &amp;gt;}.
*
* @param broken
* the string to handle entities
* @param broken the string to handle entities
* @return the string with entities appropriately fixed up
*/
static public String cleanAllEntities(final String broken) {
if (broken == null) { return null; }
public static String cleanAllEntities(final String broken) {
if (broken == null) {
return null;
}
String working = invalidControlCharPattern.matcher(broken).replaceAll("");
working = invalidCharacterPattern.matcher(working).replaceAll("");
@ -206,7 +334,10 @@ public class XmlCleaner {
int i = amp + 1;
while (true) {
// if we are at the end of the string then just escape the '&';
if (i >= working.length()) { return working.substring(0, amp) + "&amp;" + working.substring(amp + 1); //$NON-NLS-1$
if (i >= working.length()) {
return working.substring(0, amp)
+ "&amp;"
+ working.substring(amp + 1); // $NON-NLS-1$
}
// if we have come to a ; then we have an entity
// If it is something that xml can't handle then replace it.
@ -220,7 +351,10 @@ public class XmlCleaner {
// Did we end an entity without finding a closing ;
// Then treat it as an '&' that needs to be replaced with &amp;
if (!Character.isLetterOrDigit(c)) {
working = working.substring(0, amp) + "&amp;" + working.substring(amp + 1); //$NON-NLS-1$
working =
working.substring(0, amp)
+ "&amp;"
+ working.substring(amp + 1); // $NON-NLS-1$
amp = i + 4; // account for the 4 extra characters
break;
}
@ -241,18 +375,22 @@ public class XmlCleaner {
}
/**
* Replace entity with its unicode equivalent, if it is not a valid XML entity. Otherwise strip it out. XML only allows 4 entities:
* &amp;amp;, &amp;quot;, &amp;lt; and &amp;gt;.
* Replace entity with its unicode equivalent, if it is not a valid XML entity. Otherwise strip
* it out. XML only allows 4 entities: &amp;amp;, &amp;quot;, &amp;lt; and &amp;gt;.
*
* @param entity
* the entity to be replaced
* @return the substitution for the entity, either itself, the unicode equivalent or an empty string.
* @param entity the entity to be replaced
* @return the substitution for the entity, either itself, the unicode equivalent or an empty
* string.
*/
private static String handleEntity(final String entity) {
if (goodEntities.contains(entity)) { return entity; }
if (goodEntities.contains(entity)) {
return entity;
}
final String replace = badEntities.get(entity);
if (replace != null) { return replace; }
if (replace != null) {
return replace;
}
return replace != null ? replace : "";
}

View File

@ -1,7 +1,6 @@
package eu.dnetlib.dhp.migration.actions;
import eu.dnetlib.data.proto.FieldTypeProtos.Qualifier;
import java.util.Comparator;
public class LicenseComparator implements Comparator<Qualifier> {
@ -45,5 +44,4 @@ public class LicenseComparator implements Comparator<Qualifier> {
// Else (but unlikely), lexicographical ordering will do.
return lClass.compareTo(rClass);
}
}

View File

@ -6,6 +6,11 @@ import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.utils.ISLookupClientFactory;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
import java.io.File;
import java.io.FileOutputStream;
import java.io.OutputStream;
import java.util.*;
import java.util.stream.Collectors;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.logging.Log;
@ -17,12 +22,6 @@ import org.apache.hadoop.tools.DistCp;
import org.apache.hadoop.tools.DistCpOptions;
import org.apache.hadoop.util.ToolRunner;
import java.io.File;
import java.io.FileOutputStream;
import java.io.OutputStream;
import java.util.*;
import java.util.stream.Collectors;
public class MigrateActionSet {
private static final Log log = LogFactory.getLog(MigrateActionSet.class);
@ -34,8 +33,10 @@ public class MigrateActionSet {
private static Boolean DEFAULT_TRANSFORM_ONLY = false;
public static void main(String[] args) throws Exception {
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
IOUtils.toString(MigrateActionSet.class.getResourceAsStream(
final ArgumentApplicationParser parser =
new ArgumentApplicationParser(
IOUtils.toString(
MigrateActionSet.class.getResourceAsStream(
"/eu/dnetlib/dhp/migration/migrate_actionsets_parameters.json")));
parser.parseArgument(args);
@ -63,10 +64,12 @@ public class MigrateActionSet {
ISLookUpService isLookUp = ISLookupClientFactory.getLookUpService(isLookupUrl);
Configuration conf = getConfiguration(distcp_task_timeout, distcp_memory_mb, distcp_num_maps);
Configuration conf =
getConfiguration(distcp_task_timeout, distcp_memory_mb, distcp_num_maps);
FileSystem targetFS = FileSystem.get(conf);
Configuration sourceConf = getConfiguration(distcp_task_timeout, distcp_memory_mb, distcp_num_maps);
Configuration sourceConf =
getConfiguration(distcp_task_timeout, distcp_memory_mb, distcp_num_maps);
sourceConf.set(FileSystem.FS_DEFAULT_NAME_KEY, sourceNN);
FileSystem sourceFS = FileSystem.get(sourceConf);
@ -75,14 +78,20 @@ public class MigrateActionSet {
List<Path> targetPaths = new ArrayList<>();
final List<Path> sourcePaths = getSourcePaths(sourceNN, isLookUp);
log.info(String.format("paths to process:\n%s", sourcePaths.stream().map(p -> p.toString()).collect(Collectors.joining("\n"))));
for(Path source : sourcePaths) {
log.info(
String.format(
"paths to process:\n%s",
sourcePaths.stream()
.map(p -> p.toString())
.collect(Collectors.joining("\n"))));
for (Path source : sourcePaths) {
if (!sourceFS.exists(source)) {
log.warn(String.format("skipping unexisting path: %s", source));
} else {
LinkedList<String> pathQ = Lists.newLinkedList(Splitter.on(SEPARATOR).split(source.toUri().getPath()));
LinkedList<String> pathQ =
Lists.newLinkedList(Splitter.on(SEPARATOR).split(source.toUri().getPath()));
final String rawSet = pathQ.pollLast();
log.info(String.format("got RAWSET: %s", rawSet));
@ -91,7 +100,14 @@ public class MigrateActionSet {
final String actionSetDirectory = pathQ.pollLast();
final Path targetPath = new Path(targetNN + workDir + SEPARATOR + actionSetDirectory + SEPARATOR + rawSet);
final Path targetPath =
new Path(
targetNN
+ workDir
+ SEPARATOR
+ actionSetDirectory
+ SEPARATOR
+ rawSet);
log.info(String.format("using TARGET PATH: %s", targetPath));
@ -99,7 +115,13 @@ public class MigrateActionSet {
if (targetFS.exists(targetPath)) {
targetFS.delete(targetPath, true);
}
runDistcp(distcp_num_maps, distcp_memory_mb, distcp_task_timeout, conf, source, targetPath);
runDistcp(
distcp_num_maps,
distcp_memory_mb,
distcp_task_timeout,
conf,
source,
targetPath);
}
targetPaths.add(targetPath);
@ -107,19 +129,25 @@ public class MigrateActionSet {
}
}
props.setProperty(TARGET_PATHS, targetPaths
.stream()
.map(p -> p.toString())
.collect(Collectors.joining(",")));
props.setProperty(
TARGET_PATHS,
targetPaths.stream().map(p -> p.toString()).collect(Collectors.joining(",")));
File file = new File(System.getProperty("oozie.action.output.properties"));
try(OutputStream os = new FileOutputStream(file)) {
try (OutputStream os = new FileOutputStream(file)) {
props.store(os, "");
}
System.out.println(file.getAbsolutePath());
}
private void runDistcp(Integer distcp_num_maps, String distcp_memory_mb, String distcp_task_timeout, Configuration conf, Path source, Path targetPath) throws Exception {
private void runDistcp(
Integer distcp_num_maps,
String distcp_memory_mb,
String distcp_task_timeout,
Configuration conf,
Path source,
Path targetPath)
throws Exception {
final DistCpOptions op = new DistCpOptions(source, targetPath);
op.setMaxMaps(distcp_num_maps);
@ -127,20 +155,25 @@ public class MigrateActionSet {
op.preserve(DistCpOptions.FileAttribute.REPLICATION);
op.preserve(DistCpOptions.FileAttribute.CHECKSUMTYPE);
int res = ToolRunner.run(new DistCp(conf, op), new String[]{
int res =
ToolRunner.run(
new DistCp(conf, op),
new String[] {
"-Dmapred.task.timeout=" + distcp_task_timeout,
"-Dmapreduce.map.memory.mb=" + distcp_memory_mb,
"-pb",
"-m " + distcp_num_maps,
source.toString(),
targetPath.toString()});
targetPath.toString()
});
if (res != 0) {
throw new RuntimeException(String.format("distcp exited with code %s", res));
}
}
private Configuration getConfiguration(String distcp_task_timeout, String distcp_memory_mb, Integer distcp_num_maps) {
private Configuration getConfiguration(
String distcp_task_timeout, String distcp_memory_mb, Integer distcp_num_maps) {
final Configuration conf = new Configuration();
conf.set("dfs.webhdfs.socket.connect-timeout", distcp_task_timeout);
conf.set("dfs.webhdfs.socket.read-timeout", distcp_task_timeout);
@ -151,20 +184,20 @@ public class MigrateActionSet {
return conf;
}
private List<Path> getSourcePaths(String sourceNN, ISLookUpService isLookUp) throws ISLookUpException {
String XQUERY = "distinct-values(\n" +
"let $basePath := collection('/db/DRIVER/ServiceResources/ActionManagerServiceResourceType')//SERVICE_PROPERTIES/PROPERTY[@key = 'basePath']/@value/string()\n" +
"for $x in collection('/db/DRIVER/ActionManagerSetDSResources/ActionManagerSetDSResourceType') \n" +
"let $setDir := $x//SET/@directory/string()\n" +
"let $rawSet := $x//RAW_SETS/LATEST/@id/string()\n" +
"return concat($basePath, '/', $setDir, '/', $rawSet))";
private List<Path> getSourcePaths(String sourceNN, ISLookUpService isLookUp)
throws ISLookUpException {
String XQUERY =
"distinct-values(\n"
+ "let $basePath := collection('/db/DRIVER/ServiceResources/ActionManagerServiceResourceType')//SERVICE_PROPERTIES/PROPERTY[@key = 'basePath']/@value/string()\n"
+ "for $x in collection('/db/DRIVER/ActionManagerSetDSResources/ActionManagerSetDSResourceType') \n"
+ "let $setDir := $x//SET/@directory/string()\n"
+ "let $rawSet := $x//RAW_SETS/LATEST/@id/string()\n"
+ "return concat($basePath, '/', $setDir, '/', $rawSet))";
log.info(String.format("running xquery:\n%s", XQUERY));
return isLookUp.quickSearchProfile(XQUERY)
.stream()
return isLookUp.quickSearchProfile(XQUERY).stream()
.map(p -> sourceNN + p)
.map(Path::new)
.collect(Collectors.toList());
}
}

View File

@ -4,12 +4,11 @@ import com.google.common.collect.Lists;
import com.googlecode.protobuf.format.JsonFormat;
import eu.dnetlib.data.proto.*;
import eu.dnetlib.dhp.schema.oaf.*;
import org.apache.commons.lang3.StringUtils;
import java.io.Serializable;
import java.util.List;
import java.util.Optional;
import java.util.stream.Collectors;
import org.apache.commons.lang3.StringUtils;
public class ProtoConverter implements Serializable {
@ -42,10 +41,12 @@ public class ProtoConverter implements Serializable {
rel.setRelType(r.getRelType().toString());
rel.setSubRelType(r.getSubRelType().toString());
rel.setRelClass(r.getRelClass());
rel.setCollectedFrom(r.getCollectedfromCount() > 0 ?
r.getCollectedfromList().stream()
rel.setCollectedFrom(
r.getCollectedfromCount() > 0
? r.getCollectedfromList().stream()
.map(kv -> mapKV(kv))
.collect(Collectors.toList()) : null);
.collect(Collectors.toList())
: null);
return rel;
}
@ -71,8 +72,7 @@ public class ProtoConverter implements Serializable {
final ResultProtos.Result r = oaf.getEntity().getResult();
if (r.getInstanceCount() > 0) {
return r.getInstanceList()
.stream()
return r.getInstanceList().stream()
.map(i -> convertInstance(i))
.collect(Collectors.toList());
}
@ -96,13 +96,14 @@ public class ProtoConverter implements Serializable {
}
private static Organization convertOrganization(OafProtos.Oaf oaf) {
final OrganizationProtos.Organization.Metadata m = oaf.getEntity().getOrganization().getMetadata();
final OrganizationProtos.Organization.Metadata m =
oaf.getEntity().getOrganization().getMetadata();
final Organization org = setOaf(new Organization(), oaf);
setEntity(org, oaf);
org.setLegalshortname(mapStringField(m.getLegalshortname()));
org.setLegalname(mapStringField(m.getLegalname()));
org.setAlternativeNames(m.getAlternativeNamesList().
stream()
org.setAlternativeNames(
m.getAlternativeNamesList().stream()
.map(ProtoConverter::mapStringField)
.collect(Collectors.toList()));
org.setWebsiteurl(mapStringField(m.getWebsiteurl()));
@ -112,7 +113,8 @@ public class ProtoConverter implements Serializable {
org.setEcnonprofit(mapStringField(m.getEcnonprofit()));
org.setEcresearchorganization(mapStringField(m.getEcresearchorganization()));
org.setEchighereducation(mapStringField(m.getEchighereducation()));
org.setEcinternationalorganizationeurinterests(mapStringField(m.getEcinternationalorganizationeurinterests()));
org.setEcinternationalorganizationeurinterests(
mapStringField(m.getEcinternationalorganizationeurinterests()));
org.setEcinternationalorganization(mapStringField(m.getEcinternationalorganization()));
org.setEcenterprise(mapStringField(m.getEcenterprise()));
org.setEcsmevalidated(mapStringField(m.getEcsmevalidated()));
@ -123,11 +125,12 @@ public class ProtoConverter implements Serializable {
}
private static Datasource convertDataSource(OafProtos.Oaf oaf) {
final DatasourceProtos.Datasource.Metadata m = oaf.getEntity().getDatasource().getMetadata();
final DatasourceProtos.Datasource.Metadata m =
oaf.getEntity().getDatasource().getMetadata();
final Datasource datasource = setOaf(new Datasource(), oaf);
setEntity(datasource, oaf);
datasource.setAccessinfopackage(m.getAccessinfopackageList()
.stream()
datasource.setAccessinfopackage(
m.getAccessinfopackageList().stream()
.map(ProtoConverter::mapStringField)
.collect(Collectors.toList()));
datasource.setCertificates(mapStringField(m.getCertificates()));
@ -148,12 +151,12 @@ public class ProtoConverter implements Serializable {
datasource.setLogourl(mapStringField(m.getLogourl()));
datasource.setMissionstatementurl(mapStringField(m.getMissionstatementurl()));
datasource.setNamespaceprefix(mapStringField(m.getNamespaceprefix()));
datasource.setOdcontenttypes(m.getOdcontenttypesList()
.stream()
datasource.setOdcontenttypes(
m.getOdcontenttypesList().stream()
.map(ProtoConverter::mapStringField)
.collect(Collectors.toList()));
datasource.setOdlanguages(m.getOdlanguagesList()
.stream()
datasource.setOdlanguages(
m.getOdlanguagesList().stream()
.map(ProtoConverter::mapStringField)
.collect(Collectors.toList()));
datasource.setOdnumberofitems(mapStringField(m.getOdnumberofitems()));
@ -162,23 +165,22 @@ public class ProtoConverter implements Serializable {
datasource.setOfficialname(mapStringField(m.getOfficialname()));
datasource.setOpenairecompatibility(mapQualifier(m.getOpenairecompatibility()));
datasource.setPidsystems(mapStringField(m.getPidsystems()));
datasource.setPolicies(m.getPoliciesList()
.stream()
datasource.setPolicies(
m.getPoliciesList().stream()
.map(ProtoConverter::mapKV)
.collect(Collectors.toList()));
datasource.setQualitymanagementkind(mapStringField(m.getQualitymanagementkind()));
datasource.setReleaseenddate(mapStringField(m.getReleaseenddate()));
datasource.setServiceprovider(mapBoolField(m.getServiceprovider()));
datasource.setReleasestartdate(mapStringField(m.getReleasestartdate()));
datasource.setSubjects(m.getSubjectsList()
.stream()
datasource.setSubjects(
m.getSubjectsList().stream()
.map(ProtoConverter::mapStructuredProperty)
.collect(Collectors.toList()));
datasource.setVersioning(mapBoolField(m.getVersioning()));
datasource.setWebsiteurl(mapStringField(m.getWebsiteurl()));
datasource.setJournal(mapJournal(m.getJournal()));
return datasource;
}
@ -204,12 +206,14 @@ public class ProtoConverter implements Serializable {
project.setFundedamount(m.getFundedamount());
project.setTotalcost(m.getTotalcost());
project.setKeywords(mapStringField(m.getKeywords()));
project.setSubjects(m.getSubjectsList().stream()
project.setSubjects(
m.getSubjectsList().stream()
.map(sp -> mapStructuredProperty(sp))
.collect(Collectors.toList()));
project.setTitle(mapStringField(m.getTitle()));
project.setWebsiteurl(mapStringField(m.getWebsiteurl()));
project.setFundingtree(m.getFundingtreeList().stream()
project.setFundingtree(
m.getFundingtreeList().stream()
.map(f -> mapStringField(f))
.collect(Collectors.toList()));
project.setJsonextrainfo(mapStringField(m.getJsonextrainfo()));
@ -242,12 +246,12 @@ public class ProtoConverter implements Serializable {
setEntity(software, oaf);
setResult(software, oaf);
software.setDocumentationUrl(m.getDocumentationUrlList()
.stream()
software.setDocumentationUrl(
m.getDocumentationUrlList().stream()
.map(ProtoConverter::mapStringField)
.collect(Collectors.toList()));
software.setLicense(m.getLicenseList()
.stream()
software.setLicense(
m.getLicenseList().stream()
.map(ProtoConverter::mapStructuredProperty)
.collect(Collectors.toList()));
software.setCodeRepositoryUrl(mapStringField(m.getCodeRepositoryUrl()));
@ -260,16 +264,16 @@ public class ProtoConverter implements Serializable {
OtherResearchProduct otherResearchProducts = setOaf(new OtherResearchProduct(), oaf);
setEntity(otherResearchProducts, oaf);
setResult(otherResearchProducts, oaf);
otherResearchProducts.setContactperson(m.getContactpersonList()
.stream()
otherResearchProducts.setContactperson(
m.getContactpersonList().stream()
.map(ProtoConverter::mapStringField)
.collect(Collectors.toList()));
otherResearchProducts.setContactgroup(m.getContactgroupList()
.stream()
otherResearchProducts.setContactgroup(
m.getContactgroupList().stream()
.map(ProtoConverter::mapStringField)
.collect(Collectors.toList()));
otherResearchProducts.setTool(m.getToolList()
.stream()
otherResearchProducts.setTool(
m.getToolList().stream()
.map(ProtoConverter::mapStringField)
.collect(Collectors.toList()));
@ -298,12 +302,11 @@ public class ProtoConverter implements Serializable {
dataset.setVersion(mapStringField(m.getVersion()));
dataset.setLastmetadataupdate(mapStringField(m.getLastmetadataupdate()));
dataset.setMetadataversionnumber(mapStringField(m.getMetadataversionnumber()));
dataset.setGeolocation(m.getGeolocationList()
.stream()
dataset.setGeolocation(
m.getGeolocationList().stream()
.map(ProtoConverter::mapGeolocation)
.collect(Collectors.toList()));
return dataset;
}
public static <T extends Oaf> T setOaf(T oaf, OafProtos.Oaf o) {
@ -313,100 +316,103 @@ public class ProtoConverter implements Serializable {
}
public static <T extends OafEntity> T setEntity(T entity, OafProtos.Oaf oaf) {
//setting Entity fields
// setting Entity fields
final OafProtos.OafEntity e = oaf.getEntity();
entity.setId(e.getId());
entity.setOriginalId(e.getOriginalIdList());
entity.setCollectedfrom(e.getCollectedfromList()
.stream()
entity.setCollectedfrom(
e.getCollectedfromList().stream()
.map(ProtoConverter::mapKV)
.collect(Collectors.toList()));
entity.setPid(e.getPidList().stream()
entity.setPid(
e.getPidList().stream()
.map(ProtoConverter::mapStructuredProperty)
.collect(Collectors.toList()));
entity.setDateofcollection(e.getDateofcollection());
entity.setDateoftransformation(e.getDateoftransformation());
entity.setExtraInfo(e.getExtraInfoList()
.stream()
entity.setExtraInfo(
e.getExtraInfoList().stream()
.map(ProtoConverter::mapExtraInfo)
.collect(Collectors.toList()));
return entity;
}
public static <T extends Result> T setResult(T entity, OafProtos.Oaf oaf) {
//setting Entity fields
// setting Entity fields
final ResultProtos.Result.Metadata m = oaf.getEntity().getResult().getMetadata();
entity.setAuthor(m.getAuthorList()
.stream()
entity.setAuthor(
m.getAuthorList().stream()
.map(ProtoConverter::mapAuthor)
.collect(Collectors.toList()));
entity.setResulttype(mapQualifier(m.getResulttype()));
entity.setLanguage(mapQualifier(m.getLanguage()));
entity.setCountry(m.getCountryList()
.stream()
entity.setCountry(
m.getCountryList().stream()
.map(ProtoConverter::mapQualifierAsCountry)
.collect(Collectors.toList()));
entity.setSubject(m.getSubjectList()
.stream()
entity.setSubject(
m.getSubjectList().stream()
.map(ProtoConverter::mapStructuredProperty)
.collect(Collectors.toList()));
entity.setTitle(m.getTitleList()
.stream()
entity.setTitle(
m.getTitleList().stream()
.map(ProtoConverter::mapStructuredProperty)
.collect(Collectors.toList()));
entity.setRelevantdate(m.getRelevantdateList()
.stream()
entity.setRelevantdate(
m.getRelevantdateList().stream()
.map(ProtoConverter::mapStructuredProperty)
.collect(Collectors.toList()));
entity.setDescription(m.getDescriptionList()
.stream()
entity.setDescription(
m.getDescriptionList().stream()
.map(ProtoConverter::mapStringField)
.collect(Collectors.toList()));
entity.setDateofacceptance(mapStringField(m.getDateofacceptance()));
entity.setPublisher(mapStringField(m.getPublisher()));
entity.setEmbargoenddate(mapStringField(m.getEmbargoenddate()));
entity.setSource(m.getSourceList()
.stream()
entity.setSource(
m.getSourceList().stream()
.map(ProtoConverter::mapStringField)
.collect(Collectors.toList()));
entity.setFulltext(m.getFulltextList()
.stream()
entity.setFulltext(
m.getFulltextList().stream()
.map(ProtoConverter::mapStringField)
.collect(Collectors.toList()));
entity.setFormat(m.getFormatList()
.stream()
entity.setFormat(
m.getFormatList().stream()
.map(ProtoConverter::mapStringField)
.collect(Collectors.toList()));
entity.setContributor(m.getContributorList()
.stream()
entity.setContributor(
m.getContributorList().stream()
.map(ProtoConverter::mapStringField)
.collect(Collectors.toList()));
entity.setResourcetype(mapQualifier(m.getResourcetype()));
entity.setCoverage(m.getCoverageList()
.stream()
entity.setCoverage(
m.getCoverageList().stream()
.map(ProtoConverter::mapStringField)
.collect(Collectors.toList()));
entity.setContext(m.getContextList()
.stream()
entity.setContext(
m.getContextList().stream()
.map(ProtoConverter::mapContext)
.collect(Collectors.toList()));
entity.setBestaccessright(getBestAccessRights(oaf.getEntity().getResult().getInstanceList()));
entity.setBestaccessright(
getBestAccessRights(oaf.getEntity().getResult().getInstanceList()));
return entity;
}
private static Qualifier getBestAccessRights(List<ResultProtos.Result.Instance> instanceList) {
if (instanceList != null) {
final Optional<FieldTypeProtos.Qualifier> min = instanceList.stream()
.map(i -> i.getAccessright()).min(new LicenseComparator());
final Optional<FieldTypeProtos.Qualifier> min =
instanceList.stream().map(i -> i.getAccessright()).min(new LicenseComparator());
final Qualifier rights = min.isPresent() ? mapQualifier(min.get()) : new Qualifier();
if (StringUtils.isBlank(rights.getClassid())) {
rights.setClassid(UNKNOWN);
}
if (StringUtils.isBlank(rights.getClassname()) || UNKNOWN.equalsIgnoreCase(rights.getClassname())) {
if (StringUtils.isBlank(rights.getClassname())
|| UNKNOWN.equalsIgnoreCase(rights.getClassname())) {
rights.setClassname(NOT_AVAILABLE);
}
if (StringUtils.isBlank(rights.getSchemeid())) {
@ -425,14 +431,13 @@ public class ProtoConverter implements Serializable {
final Context entity = new Context();
entity.setId(context.getId());
entity.setDataInfo(context.getDataInfoList()
.stream()
entity.setDataInfo(
context.getDataInfoList().stream()
.map(ProtoConverter::mapDataInfo)
.collect(Collectors.toList()));
return entity;
}
public static KeyValue mapKV(FieldTypeProtos.KeyValue kv) {
final KeyValue keyValue = new KeyValue();
keyValue.setKey(kv.getKey());
@ -495,7 +500,8 @@ public class ProtoConverter implements Serializable {
return entity;
}
public static OriginDescription mapOriginalDescription(FieldTypeProtos.OAIProvenance.OriginDescription originDescription) {
public static OriginDescription mapOriginalDescription(
FieldTypeProtos.OAIProvenance.OriginDescription originDescription) {
final OriginDescription originDescriptionResult = new OriginDescription();
originDescriptionResult.setHarvestDate(originDescription.getHarvestDate());
originDescriptionResult.setAltered(originDescription.getAltered());
@ -550,9 +556,10 @@ public class ProtoConverter implements Serializable {
entity.setName(author.getName());
entity.setSurname(author.getSurname());
entity.setRank(author.getRank());
entity.setPid(author.getPidList()
.stream()
.map(kv -> {
entity.setPid(
author.getPidList().stream()
.map(
kv -> {
final StructuredProperty sp = new StructuredProperty();
sp.setValue(kv.getValue());
final Qualifier q = new Qualifier();
@ -562,12 +569,11 @@ public class ProtoConverter implements Serializable {
return sp;
})
.collect(Collectors.toList()));
entity.setAffiliation(author.getAffiliationList()
.stream()
entity.setAffiliation(
author.getAffiliationList().stream()
.map(ProtoConverter::mapStringField)
.collect(Collectors.toList()));
return entity;
}
public static GeoLocation mapGeolocation(ResultProtos.Result.GeoLocation geoLocation) {

View File

@ -5,14 +5,17 @@ import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.common.base.Splitter;
import com.google.common.collect.Lists;
import com.google.protobuf.InvalidProtocolBufferException;
import eu.dnetlib.dhp.schema.action.AtomicAction;
import eu.dnetlib.data.proto.OafProtos;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.schema.action.AtomicAction;
import eu.dnetlib.dhp.schema.oaf.*;
import eu.dnetlib.dhp.utils.ISLookupClientFactory;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
import java.io.IOException;
import java.io.Serializable;
import java.util.LinkedList;
import java.util.Objects;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.logging.Log;
@ -21,16 +24,9 @@ import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.compress.GzipCodec;
import org.apache.hadoop.mapred.SequenceFileOutputFormat;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.SparkSession;
import scala.Tuple2;
import java.io.IOException;
import java.io.Serializable;
import java.util.LinkedList;
import java.util.Objects;
public class TransformActions implements Serializable {
@ -38,8 +34,10 @@ public class TransformActions implements Serializable {
private static final String SEPARATOR = "/";
public static void main(String[] args) throws Exception {
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
IOUtils.toString(MigrateActionSet.class.getResourceAsStream(
final ArgumentApplicationParser parser =
new ArgumentApplicationParser(
IOUtils.toString(
MigrateActionSet.class.getResourceAsStream(
"/eu/dnetlib/dhp/migration/transform_actionsets_parameters.json")));
parser.parseArgument(args);
@ -60,18 +58,25 @@ public class TransformActions implements Serializable {
final String targetBaseDir = getTargetBaseDir(isLookupUrl);
try(SparkSession spark = getSparkSession(parser)) {
try (SparkSession spark = getSparkSession(parser)) {
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
final FileSystem fs = FileSystem.get(spark.sparkContext().hadoopConfiguration());
for(String sourcePath : Lists.newArrayList(Splitter.on(",").split(inputPaths))) {
for (String sourcePath : Lists.newArrayList(Splitter.on(",").split(inputPaths))) {
LinkedList<String> pathQ = Lists.newLinkedList(Splitter.on(SEPARATOR).split(sourcePath));
LinkedList<String> pathQ =
Lists.newLinkedList(Splitter.on(SEPARATOR).split(sourcePath));
final String rawset = pathQ.pollLast();
final String actionSetDirectory = pathQ.pollLast();
final Path targetDirectory = new Path(targetBaseDir + SEPARATOR + actionSetDirectory + SEPARATOR + rawset);
final Path targetDirectory =
new Path(
targetBaseDir
+ SEPARATOR
+ actionSetDirectory
+ SEPARATOR
+ rawset);
if (fs.exists(targetDirectory)) {
log.info(String.format("found target directory '%s", targetDirectory));
@ -79,10 +84,16 @@ public class TransformActions implements Serializable {
log.info(String.format("deleted target directory '%s", targetDirectory));
}
log.info(String.format("transforming actions from '%s' to '%s'", sourcePath, targetDirectory));
log.info(
String.format(
"transforming actions from '%s' to '%s'",
sourcePath, targetDirectory));
sc.sequenceFile(sourcePath, Text.class, Text.class)
.map(a -> eu.dnetlib.actionmanager.actions.AtomicAction.fromJSON(a._2().toString()))
.map(
a ->
eu.dnetlib.actionmanager.actions.AtomicAction.fromJSON(
a._2().toString()))
.map(a -> doTransform(a))
.filter(Objects::isNull)
.filter(a -> a.getPayload() == null)
@ -92,7 +103,8 @@ public class TransformActions implements Serializable {
}
}
private Text transformAction(eu.dnetlib.actionmanager.actions.AtomicAction aa) throws InvalidProtocolBufferException, JsonProcessingException {
private Text transformAction(eu.dnetlib.actionmanager.actions.AtomicAction aa)
throws InvalidProtocolBufferException, JsonProcessingException {
final Text out = new Text();
final ObjectMapper mapper = new ObjectMapper();
if (aa.getTargetValue() != null && aa.getTargetValue().length > 0) {
@ -135,7 +147,8 @@ public class TransformActions implements Serializable {
return new AtomicAction<>(Relation.class, rel);
}
private AtomicAction doTransform(eu.dnetlib.actionmanager.actions.AtomicAction aa) throws InvalidProtocolBufferException {
private AtomicAction doTransform(eu.dnetlib.actionmanager.actions.AtomicAction aa)
throws InvalidProtocolBufferException {
final OafProtos.Oaf proto_oaf = OafProtos.Oaf.parseFrom(aa.getTargetValue());
final Oaf oaf = ProtoConverter.convert(proto_oaf);
switch (proto_oaf.getKind()) {
@ -148,14 +161,21 @@ public class TransformActions implements Serializable {
case project:
return new AtomicAction<>(Project.class, (Project) oaf);
case result:
final String resulttypeid = proto_oaf.getEntity().getResult().getMetadata().getResulttype().getClassid();
final String resulttypeid =
proto_oaf
.getEntity()
.getResult()
.getMetadata()
.getResulttype()
.getClassid();
switch (resulttypeid) {
case "publication":
return new AtomicAction<>(Publication.class, (Publication) oaf);
case "software":
return new AtomicAction<>(Software.class, (Software) oaf);
case "other":
return new AtomicAction<>(OtherResearchProduct.class, (OtherResearchProduct) oaf);
return new AtomicAction<>(
OtherResearchProduct.class, (OtherResearchProduct) oaf);
case "dataset":
return new AtomicAction<>(Dataset.class, (Dataset) oaf);
default:
@ -163,7 +183,8 @@ public class TransformActions implements Serializable {
return new AtomicAction<>(Result.class, (Result) oaf);
}
default:
throw new IllegalArgumentException("invalid entity type: " + proto_oaf.getEntity().getType());
throw new IllegalArgumentException(
"invalid entity type: " + proto_oaf.getEntity().getType());
}
case relation:
return new AtomicAction<>(Relation.class, (Relation) oaf);
@ -174,15 +195,15 @@ public class TransformActions implements Serializable {
private String getTargetBaseDir(String isLookupUrl) throws ISLookUpException {
ISLookUpService isLookUp = ISLookupClientFactory.getLookUpService(isLookupUrl);
String XQUERY = "collection('/db/DRIVER/ServiceResources/ActionManagerServiceResourceType')//SERVICE_PROPERTIES/PROPERTY[@key = 'basePath']/@value/string()";
String XQUERY =
"collection('/db/DRIVER/ServiceResources/ActionManagerServiceResourceType')//SERVICE_PROPERTIES/PROPERTY[@key = 'basePath']/@value/string()";
return isLookUp.getResourceProfileByQuery(XQUERY);
}
private static SparkSession getSparkSession(ArgumentApplicationParser parser) {
SparkConf conf = new SparkConf();
return SparkSession
.builder()
return SparkSession.builder()
.appName(TransformActions.class.getSimpleName())
.master(parser.get("master"))
.config(conf)

View File

@ -3,30 +3,33 @@ package eu.dnetlib.dhp.transformation;
import eu.dnetlib.dhp.model.mdstore.MetadataRecord;
import eu.dnetlib.dhp.transformation.functions.Cleaner;
import eu.dnetlib.dhp.transformation.vocabulary.Vocabulary;
import java.io.ByteArrayInputStream;
import java.io.StringWriter;
import java.util.Map;
import javax.xml.transform.stream.StreamSource;
import net.sf.saxon.s9api.*;
import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.util.LongAccumulator;
import javax.xml.transform.stream.StreamSource;
import java.io.ByteArrayInputStream;
import java.io.StringWriter;
import java.util.Map;
public class TransformFunction implements MapFunction<MetadataRecord, MetadataRecord> {
private final LongAccumulator totalItems;
private final LongAccumulator errorItems;
private final LongAccumulator transformedItems;
private final String transformationRule;
private final Cleaner cleanFunction;
private final long dateOfTransformation;
public TransformFunction(LongAccumulator totalItems, LongAccumulator errorItems, LongAccumulator transformedItems, final String transformationRule, long dateOfTransformation, final Map<String, Vocabulary> vocabularies) throws Exception {
this.totalItems= totalItems;
public TransformFunction(
LongAccumulator totalItems,
LongAccumulator errorItems,
LongAccumulator transformedItems,
final String transformationRule,
long dateOfTransformation,
final Map<String, Vocabulary> vocabularies)
throws Exception {
this.totalItems = totalItems;
this.errorItems = errorItems;
this.transformedItems = transformedItems;
this.transformationRule = transformationRule;
@ -41,13 +44,21 @@ public class TransformFunction implements MapFunction<MetadataRecord, MetadataRe
Processor processor = new Processor(false);
processor.registerExtensionFunction(cleanFunction);
final XsltCompiler comp = processor.newXsltCompiler();
XsltExecutable xslt = comp.compile(new StreamSource(new ByteArrayInputStream(transformationRule.getBytes())));
XdmNode source = processor.newDocumentBuilder().build(new StreamSource(new ByteArrayInputStream(value.getBody().getBytes())));
XsltExecutable xslt =
comp.compile(
new StreamSource(
new ByteArrayInputStream(transformationRule.getBytes())));
XdmNode source =
processor
.newDocumentBuilder()
.build(
new StreamSource(
new ByteArrayInputStream(value.getBody().getBytes())));
XsltTransformer trans = xslt.load();
trans.setInitialContextNode(source);
final StringWriter output = new StringWriter();
Serializer out = processor.newSerializer(output);
out.setOutputProperty(Serializer.Property.METHOD,"xml");
out.setOutputProperty(Serializer.Property.METHOD, "xml");
out.setOutputProperty(Serializer.Property.INDENT, "yes");
trans.setDestination(out);
trans.transform();
@ -56,12 +67,9 @@ public class TransformFunction implements MapFunction<MetadataRecord, MetadataRe
value.setDateOfTransformation(dateOfTransformation);
transformedItems.add(1);
return value;
}catch (Throwable e) {
} catch (Throwable e) {
errorItems.add(1);
return null;
}
}
}

View File

@ -1,7 +1,6 @@
package eu.dnetlib.dhp.transformation;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.collection.GenerateNativeStoreSparkJob;
import eu.dnetlib.dhp.model.mdstore.MetadataRecord;
import eu.dnetlib.dhp.transformation.vocabulary.Vocabulary;
import eu.dnetlib.dhp.transformation.vocabulary.VocabularyHelper;
@ -9,6 +8,10 @@ import eu.dnetlib.dhp.utils.DHPUtils;
import eu.dnetlib.message.Message;
import eu.dnetlib.message.MessageManager;
import eu.dnetlib.message.MessageType;
import java.io.ByteArrayInputStream;
import java.util.HashMap;
import java.util.Map;
import java.util.Objects;
import org.apache.commons.cli.*;
import org.apache.commons.io.IOUtils;
import org.apache.spark.sql.Dataset;
@ -21,69 +24,79 @@ import org.dom4j.DocumentException;
import org.dom4j.Node;
import org.dom4j.io.SAXReader;
import java.io.ByteArrayInputStream;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Map;
import java.util.Objects;
public class TransformSparkJobNode {
public static void main(String[] args) throws Exception {
final ArgumentApplicationParser parser = new ArgumentApplicationParser(IOUtils.toString(TransformSparkJobNode.class.getResourceAsStream("/eu/dnetlib/dhp/transformation/transformation_input_parameters.json")));
final ArgumentApplicationParser parser =
new ArgumentApplicationParser(
IOUtils.toString(
TransformSparkJobNode.class.getResourceAsStream(
"/eu/dnetlib/dhp/transformation/transformation_input_parameters.json")));
parser.parseArgument(args);
final String inputPath = parser.get("input");
final String outputPath = parser.get("output");
final String workflowId = parser.get("workflowId");
final String trasformationRule = extractXSLTFromTR(Objects.requireNonNull(DHPUtils.decompressString(parser.get("transformationRule"))));
final String trasformationRule =
extractXSLTFromTR(
Objects.requireNonNull(
DHPUtils.decompressString(parser.get("transformationRule"))));
final String master = parser.get("master");
final String rabbitUser = parser.get("rabbitUser");
final String rabbitPassword = parser.get("rabbitPassword");
final String rabbitHost = parser.get("rabbitHost");
final String rabbitReportQueue = parser.get("rabbitReportQueue");
final long dateOfCollection = new Long(parser.get("dateOfCollection"));
final boolean test = parser.get("isTest") == null?false: Boolean.valueOf(parser.get("isTest"));
final boolean test =
parser.get("isTest") == null ? false : Boolean.valueOf(parser.get("isTest"));
final SparkSession spark = SparkSession
.builder()
final SparkSession spark =
SparkSession.builder()
.appName("TransformStoreSparkJob")
.master(master)
.getOrCreate();
final Encoder<MetadataRecord> encoder = Encoders.bean(MetadataRecord.class);
final Dataset<MetadataRecord> mdstoreInput = spark.read().format("parquet").load(inputPath).as(encoder);
final Dataset<MetadataRecord> mdstoreInput =
spark.read().format("parquet").load(inputPath).as(encoder);
final LongAccumulator totalItems = spark.sparkContext().longAccumulator("TotalItems");
final LongAccumulator errorItems = spark.sparkContext().longAccumulator("errorItems");
final LongAccumulator transformedItems = spark.sparkContext().longAccumulator("transformedItems");
final LongAccumulator transformedItems =
spark.sparkContext().longAccumulator("transformedItems");
final Map<String, Vocabulary> vocabularies = new HashMap<>();
vocabularies.put("dnet:languages", VocabularyHelper.getVocabularyFromAPI("dnet:languages"));
final TransformFunction transformFunction = new TransformFunction(totalItems, errorItems, transformedItems, trasformationRule, dateOfCollection, vocabularies) ;
final TransformFunction transformFunction =
new TransformFunction(
totalItems,
errorItems,
transformedItems,
trasformationRule,
dateOfCollection,
vocabularies);
mdstoreInput.map(transformFunction, encoder).write().format("parquet").save(outputPath);
if (rabbitHost != null) {
System.out.println("SEND FINAL REPORT");
final Map<String, String> reportMap = new HashMap<>();
reportMap.put("inputItem" , ""+ totalItems.value());
reportMap.put("inputItem", "" + totalItems.value());
reportMap.put("invalidRecords", "" + errorItems.value());
reportMap.put("mdStoreSize", "" + transformedItems.value());
System.out.println(new Message(workflowId, "Transform", MessageType.REPORT, reportMap));
if (!test) {
final MessageManager manager = new MessageManager(rabbitHost, rabbitUser, rabbitPassword, false, false, null);
manager.sendMessage(new Message(workflowId, "Transform", MessageType.REPORT, reportMap), rabbitReportQueue, true, false);
final MessageManager manager =
new MessageManager(
rabbitHost, rabbitUser, rabbitPassword, false, false, null);
manager.sendMessage(
new Message(workflowId, "Transform", MessageType.REPORT, reportMap),
rabbitReportQueue,
true,
false);
manager.close();
}
}
}
private static String extractXSLTFromTR(final String tr) throws DocumentException {
SAXReader reader = new SAXReader();
Document document = reader.read(new ByteArrayInputStream(tr.getBytes()));

View File

@ -2,18 +2,15 @@ package eu.dnetlib.dhp.transformation.functions;
import eu.dnetlib.dhp.transformation.vocabulary.Term;
import eu.dnetlib.dhp.transformation.vocabulary.Vocabulary;
import java.util.Map;
import java.util.Optional;
import net.sf.saxon.s9api.*;
import scala.Serializable;
import java.util.Map;
import java.util.Optional;
public class Cleaner implements ExtensionFunction, Serializable {
private final Map<String, Vocabulary> vocabularies;
public Cleaner(Map<String, Vocabulary> vocabularies) {
this.vocabularies = vocabularies;
}
@ -30,20 +27,22 @@ public class Cleaner implements ExtensionFunction, Serializable {
@Override
public SequenceType[] getArgumentTypes() {
return new SequenceType[]
{
return new SequenceType[] {
SequenceType.makeSequenceType(ItemType.STRING, OccurrenceIndicator.ONE),
SequenceType.makeSequenceType(ItemType.STRING, OccurrenceIndicator.ONE)
};
}
@Override
public XdmValue call(XdmValue[] xdmValues) throws SaxonApiException {
final String currentValue = xdmValues[0].itemAt(0).getStringValue();
final String vocabularyName =xdmValues[1].itemAt(0).getStringValue();
Optional<Term> cleanedValue = vocabularies.get(vocabularyName).getTerms().stream().filter(it -> it.getNativeName().equalsIgnoreCase(currentValue)).findAny();
final String vocabularyName = xdmValues[1].itemAt(0).getStringValue();
Optional<Term> cleanedValue =
vocabularies.get(vocabularyName).getTerms().stream()
.filter(it -> it.getNativeName().equalsIgnoreCase(currentValue))
.findAny();
return new XdmAtomicValue(cleanedValue.isPresent()?cleanedValue.get().getCode():currentValue);
return new XdmAtomicValue(
cleanedValue.isPresent() ? cleanedValue.get().getCode() : currentValue);
}
}

Some files were not shown because too many files have changed in this diff Show More