forked from D-Net/dnet-hadoop
Merge remote-tracking branch 'upstream/master' into usage-stats-export-wf-v2
This commit is contained in:
commit
195111119e
|
@ -7,6 +7,8 @@
|
||||||
*.iws
|
*.iws
|
||||||
*~
|
*~
|
||||||
.vscode
|
.vscode
|
||||||
|
.metals
|
||||||
|
.bloop
|
||||||
.classpath
|
.classpath
|
||||||
/*/.classpath
|
/*/.classpath
|
||||||
/*/*/.classpath
|
/*/*/.classpath
|
||||||
|
@ -23,4 +25,10 @@
|
||||||
/build
|
/build
|
||||||
spark-warehouse
|
spark-warehouse
|
||||||
/**/job-override.properties
|
/**/job-override.properties
|
||||||
|
<<<<<<< HEAD
|
||||||
/**/*.log
|
/**/*.log
|
||||||
|
=======
|
||||||
|
/**/*.log
|
||||||
|
/**/.factorypath
|
||||||
|
|
||||||
|
>>>>>>> upstream/master
|
||||||
|
|
|
@ -1,2 +1,2 @@
|
||||||
# dnet-hadoop
|
# dnet-hadoop
|
||||||
Dnet-hadoop is a tool for
|
Dnet-hadoop is the project that defined all the OOZIE workflows for the OpenAIRE Graph construction, processing, provisioning.
|
|
@ -8,8 +8,6 @@ import java.util.List;
|
||||||
import org.apache.commons.lang.ArrayUtils;
|
import org.apache.commons.lang.ArrayUtils;
|
||||||
import org.apache.commons.lang.StringUtils;
|
import org.apache.commons.lang.StringUtils;
|
||||||
import org.apache.maven.plugin.AbstractMojo;
|
import org.apache.maven.plugin.AbstractMojo;
|
||||||
import org.apache.maven.plugin.MojoExecutionException;
|
|
||||||
import org.apache.maven.plugin.MojoFailureException;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Generates oozie properties which were not provided from commandline.
|
* Generates oozie properties which were not provided from commandline.
|
||||||
|
@ -27,7 +25,7 @@ public class GenerateOoziePropertiesMojo extends AbstractMojo {
|
||||||
};
|
};
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void execute() throws MojoExecutionException, MojoFailureException {
|
public void execute() {
|
||||||
if (System.getProperties().containsKey(PROPERTY_NAME_WF_SOURCE_DIR)
|
if (System.getProperties().containsKey(PROPERTY_NAME_WF_SOURCE_DIR)
|
||||||
&& !System.getProperties().containsKey(PROPERTY_NAME_SANDBOX_NAME)) {
|
&& !System.getProperties().containsKey(PROPERTY_NAME_SANDBOX_NAME)) {
|
||||||
String generatedSandboxName = generateSandboxName(
|
String generatedSandboxName = generateSandboxName(
|
||||||
|
@ -46,24 +44,24 @@ public class GenerateOoziePropertiesMojo extends AbstractMojo {
|
||||||
/**
|
/**
|
||||||
* Generates sandbox name from workflow source directory.
|
* Generates sandbox name from workflow source directory.
|
||||||
*
|
*
|
||||||
* @param wfSourceDir
|
* @param wfSourceDir workflow source directory
|
||||||
* @return generated sandbox name
|
* @return generated sandbox name
|
||||||
*/
|
*/
|
||||||
private String generateSandboxName(String wfSourceDir) {
|
private String generateSandboxName(String wfSourceDir) {
|
||||||
// utilize all dir names until finding one of the limiters
|
// utilize all dir names until finding one of the limiters
|
||||||
List<String> sandboxNameParts = new ArrayList<String>();
|
List<String> sandboxNameParts = new ArrayList<>();
|
||||||
String[] tokens = StringUtils.split(wfSourceDir, File.separatorChar);
|
String[] tokens = StringUtils.split(wfSourceDir, File.separatorChar);
|
||||||
ArrayUtils.reverse(tokens);
|
ArrayUtils.reverse(tokens);
|
||||||
if (tokens.length > 0) {
|
if (tokens.length > 0) {
|
||||||
for (String token : tokens) {
|
for (String token : tokens) {
|
||||||
for (String limiter : limiters) {
|
for (String limiter : limiters) {
|
||||||
if (limiter.equals(token)) {
|
if (limiter.equals(token)) {
|
||||||
return sandboxNameParts.size() > 0
|
return !sandboxNameParts.isEmpty()
|
||||||
? StringUtils.join(sandboxNameParts.toArray())
|
? StringUtils.join(sandboxNameParts.toArray())
|
||||||
: null;
|
: null;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (sandboxNameParts.size() > 0) {
|
if (!sandboxNameParts.isEmpty()) {
|
||||||
sandboxNameParts.add(0, File.separator);
|
sandboxNameParts.add(0, File.separator);
|
||||||
}
|
}
|
||||||
sandboxNameParts.add(0, token);
|
sandboxNameParts.add(0, token);
|
||||||
|
|
|
@ -16,6 +16,7 @@ import java.io.File;
|
||||||
import java.io.FileInputStream;
|
import java.io.FileInputStream;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.InputStream;
|
import java.io.InputStream;
|
||||||
|
import java.nio.charset.StandardCharsets;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
|
@ -289,7 +290,7 @@ public class WritePredefinedProjectProperties extends AbstractMojo {
|
||||||
*/
|
*/
|
||||||
protected List<String> getEscapeChars(String escapeChars) {
|
protected List<String> getEscapeChars(String escapeChars) {
|
||||||
List<String> tokens = getListFromCSV(escapeChars);
|
List<String> tokens = getListFromCSV(escapeChars);
|
||||||
List<String> realTokens = new ArrayList<String>();
|
List<String> realTokens = new ArrayList<>();
|
||||||
for (String token : tokens) {
|
for (String token : tokens) {
|
||||||
String realToken = getRealToken(token);
|
String realToken = getRealToken(token);
|
||||||
realTokens.add(realToken);
|
realTokens.add(realToken);
|
||||||
|
@ -324,7 +325,7 @@ public class WritePredefinedProjectProperties extends AbstractMojo {
|
||||||
* @return content
|
* @return content
|
||||||
*/
|
*/
|
||||||
protected String getContent(String comment, Properties properties, List<String> escapeTokens) {
|
protected String getContent(String comment, Properties properties, List<String> escapeTokens) {
|
||||||
List<String> names = new ArrayList<String>(properties.stringPropertyNames());
|
List<String> names = new ArrayList<>(properties.stringPropertyNames());
|
||||||
Collections.sort(names);
|
Collections.sort(names);
|
||||||
StringBuilder sb = new StringBuilder();
|
StringBuilder sb = new StringBuilder();
|
||||||
if (!StringUtils.isBlank(comment)) {
|
if (!StringUtils.isBlank(comment)) {
|
||||||
|
@ -352,7 +353,7 @@ public class WritePredefinedProjectProperties extends AbstractMojo {
|
||||||
throws MojoExecutionException {
|
throws MojoExecutionException {
|
||||||
try {
|
try {
|
||||||
String content = getContent(comment, properties, escapeTokens);
|
String content = getContent(comment, properties, escapeTokens);
|
||||||
FileUtils.writeStringToFile(file, content, ENCODING_UTF8);
|
FileUtils.writeStringToFile(file, content, StandardCharsets.UTF_8);
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
throw new MojoExecutionException("Error creating properties file", e);
|
throw new MojoExecutionException("Error creating properties file", e);
|
||||||
}
|
}
|
||||||
|
@ -399,9 +400,9 @@ public class WritePredefinedProjectProperties extends AbstractMojo {
|
||||||
*/
|
*/
|
||||||
protected static final List<String> getListFromCSV(String csv) {
|
protected static final List<String> getListFromCSV(String csv) {
|
||||||
if (StringUtils.isBlank(csv)) {
|
if (StringUtils.isBlank(csv)) {
|
||||||
return new ArrayList<String>();
|
return new ArrayList<>();
|
||||||
}
|
}
|
||||||
List<String> list = new ArrayList<String>();
|
List<String> list = new ArrayList<>();
|
||||||
String[] tokens = StringUtils.split(csv, ",");
|
String[] tokens = StringUtils.split(csv, ",");
|
||||||
for (String token : tokens) {
|
for (String token : tokens) {
|
||||||
list.add(token.trim());
|
list.add(token.trim());
|
||||||
|
|
|
@ -9,18 +9,18 @@ import org.junit.jupiter.api.BeforeEach;
|
||||||
import org.junit.jupiter.api.Test;
|
import org.junit.jupiter.api.Test;
|
||||||
|
|
||||||
/** @author mhorst, claudio.atzori */
|
/** @author mhorst, claudio.atzori */
|
||||||
public class GenerateOoziePropertiesMojoTest {
|
class GenerateOoziePropertiesMojoTest {
|
||||||
|
|
||||||
private final GenerateOoziePropertiesMojo mojo = new GenerateOoziePropertiesMojo();
|
private final GenerateOoziePropertiesMojo mojo = new GenerateOoziePropertiesMojo();
|
||||||
|
|
||||||
@BeforeEach
|
@BeforeEach
|
||||||
public void clearSystemProperties() {
|
void clearSystemProperties() {
|
||||||
System.clearProperty(PROPERTY_NAME_SANDBOX_NAME);
|
System.clearProperty(PROPERTY_NAME_SANDBOX_NAME);
|
||||||
System.clearProperty(PROPERTY_NAME_WF_SOURCE_DIR);
|
System.clearProperty(PROPERTY_NAME_WF_SOURCE_DIR);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testExecuteEmpty() throws Exception {
|
void testExecuteEmpty() throws Exception {
|
||||||
// execute
|
// execute
|
||||||
mojo.execute();
|
mojo.execute();
|
||||||
|
|
||||||
|
@ -29,7 +29,7 @@ public class GenerateOoziePropertiesMojoTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testExecuteSandboxNameAlreadySet() throws Exception {
|
void testExecuteSandboxNameAlreadySet() throws Exception {
|
||||||
// given
|
// given
|
||||||
String workflowSourceDir = "eu/dnetlib/dhp/wf/transformers";
|
String workflowSourceDir = "eu/dnetlib/dhp/wf/transformers";
|
||||||
String sandboxName = "originalSandboxName";
|
String sandboxName = "originalSandboxName";
|
||||||
|
@ -44,7 +44,7 @@ public class GenerateOoziePropertiesMojoTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testExecuteEmptyWorkflowSourceDir() throws Exception {
|
void testExecuteEmptyWorkflowSourceDir() throws Exception {
|
||||||
// given
|
// given
|
||||||
String workflowSourceDir = "";
|
String workflowSourceDir = "";
|
||||||
System.setProperty(PROPERTY_NAME_WF_SOURCE_DIR, workflowSourceDir);
|
System.setProperty(PROPERTY_NAME_WF_SOURCE_DIR, workflowSourceDir);
|
||||||
|
@ -57,7 +57,7 @@ public class GenerateOoziePropertiesMojoTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testExecuteNullSandboxNameGenerated() throws Exception {
|
void testExecuteNullSandboxNameGenerated() throws Exception {
|
||||||
// given
|
// given
|
||||||
String workflowSourceDir = "eu/dnetlib/dhp/";
|
String workflowSourceDir = "eu/dnetlib/dhp/";
|
||||||
System.setProperty(PROPERTY_NAME_WF_SOURCE_DIR, workflowSourceDir);
|
System.setProperty(PROPERTY_NAME_WF_SOURCE_DIR, workflowSourceDir);
|
||||||
|
@ -70,7 +70,7 @@ public class GenerateOoziePropertiesMojoTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testExecute() throws Exception {
|
void testExecute() throws Exception {
|
||||||
// given
|
// given
|
||||||
String workflowSourceDir = "eu/dnetlib/dhp/wf/transformers";
|
String workflowSourceDir = "eu/dnetlib/dhp/wf/transformers";
|
||||||
System.setProperty(PROPERTY_NAME_WF_SOURCE_DIR, workflowSourceDir);
|
System.setProperty(PROPERTY_NAME_WF_SOURCE_DIR, workflowSourceDir);
|
||||||
|
@ -83,7 +83,7 @@ public class GenerateOoziePropertiesMojoTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testExecuteWithoutRoot() throws Exception {
|
void testExecuteWithoutRoot() throws Exception {
|
||||||
// given
|
// given
|
||||||
String workflowSourceDir = "wf/transformers";
|
String workflowSourceDir = "wf/transformers";
|
||||||
System.setProperty(PROPERTY_NAME_WF_SOURCE_DIR, workflowSourceDir);
|
System.setProperty(PROPERTY_NAME_WF_SOURCE_DIR, workflowSourceDir);
|
||||||
|
|
|
@ -20,7 +20,7 @@ import org.mockito.junit.jupiter.MockitoExtension;
|
||||||
|
|
||||||
/** @author mhorst, claudio.atzori */
|
/** @author mhorst, claudio.atzori */
|
||||||
@ExtendWith(MockitoExtension.class)
|
@ExtendWith(MockitoExtension.class)
|
||||||
public class WritePredefinedProjectPropertiesTest {
|
class WritePredefinedProjectPropertiesTest {
|
||||||
|
|
||||||
@Mock
|
@Mock
|
||||||
private MavenProject mavenProject;
|
private MavenProject mavenProject;
|
||||||
|
@ -39,7 +39,7 @@ public class WritePredefinedProjectPropertiesTest {
|
||||||
// ----------------------------------- TESTS ---------------------------------------------
|
// ----------------------------------- TESTS ---------------------------------------------
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testExecuteEmpty() throws Exception {
|
void testExecuteEmpty() throws Exception {
|
||||||
// execute
|
// execute
|
||||||
mojo.execute();
|
mojo.execute();
|
||||||
|
|
||||||
|
@ -50,7 +50,7 @@ public class WritePredefinedProjectPropertiesTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testExecuteWithProjectProperties() throws Exception {
|
void testExecuteWithProjectProperties() throws Exception {
|
||||||
// given
|
// given
|
||||||
String key = "projectPropertyKey";
|
String key = "projectPropertyKey";
|
||||||
String value = "projectPropertyValue";
|
String value = "projectPropertyValue";
|
||||||
|
@ -70,7 +70,7 @@ public class WritePredefinedProjectPropertiesTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test()
|
@Test()
|
||||||
public void testExecuteWithProjectPropertiesAndInvalidOutputFile(@TempDir File testFolder) {
|
void testExecuteWithProjectPropertiesAndInvalidOutputFile(@TempDir File testFolder) {
|
||||||
// given
|
// given
|
||||||
String key = "projectPropertyKey";
|
String key = "projectPropertyKey";
|
||||||
String value = "projectPropertyValue";
|
String value = "projectPropertyValue";
|
||||||
|
@ -84,7 +84,7 @@ public class WritePredefinedProjectPropertiesTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testExecuteWithProjectPropertiesExclusion(@TempDir File testFolder) throws Exception {
|
void testExecuteWithProjectPropertiesExclusion(@TempDir File testFolder) throws Exception {
|
||||||
// given
|
// given
|
||||||
String key = "projectPropertyKey";
|
String key = "projectPropertyKey";
|
||||||
String value = "projectPropertyValue";
|
String value = "projectPropertyValue";
|
||||||
|
@ -108,7 +108,7 @@ public class WritePredefinedProjectPropertiesTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testExecuteWithProjectPropertiesInclusion(@TempDir File testFolder) throws Exception {
|
void testExecuteWithProjectPropertiesInclusion(@TempDir File testFolder) throws Exception {
|
||||||
// given
|
// given
|
||||||
String key = "projectPropertyKey";
|
String key = "projectPropertyKey";
|
||||||
String value = "projectPropertyValue";
|
String value = "projectPropertyValue";
|
||||||
|
@ -132,7 +132,7 @@ public class WritePredefinedProjectPropertiesTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testExecuteIncludingPropertyKeysFromFile(@TempDir File testFolder) throws Exception {
|
void testExecuteIncludingPropertyKeysFromFile(@TempDir File testFolder) throws Exception {
|
||||||
// given
|
// given
|
||||||
String key = "projectPropertyKey";
|
String key = "projectPropertyKey";
|
||||||
String value = "projectPropertyValue";
|
String value = "projectPropertyValue";
|
||||||
|
@ -164,7 +164,7 @@ public class WritePredefinedProjectPropertiesTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testExecuteIncludingPropertyKeysFromClasspathResource(@TempDir File testFolder)
|
void testExecuteIncludingPropertyKeysFromClasspathResource(@TempDir File testFolder)
|
||||||
throws Exception {
|
throws Exception {
|
||||||
// given
|
// given
|
||||||
String key = "projectPropertyKey";
|
String key = "projectPropertyKey";
|
||||||
|
@ -194,7 +194,7 @@ public class WritePredefinedProjectPropertiesTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testExecuteIncludingPropertyKeysFromBlankLocation() {
|
void testExecuteIncludingPropertyKeysFromBlankLocation() {
|
||||||
// given
|
// given
|
||||||
String key = "projectPropertyKey";
|
String key = "projectPropertyKey";
|
||||||
String value = "projectPropertyValue";
|
String value = "projectPropertyValue";
|
||||||
|
@ -214,7 +214,7 @@ public class WritePredefinedProjectPropertiesTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testExecuteIncludingPropertyKeysFromXmlFile(@TempDir File testFolder)
|
void testExecuteIncludingPropertyKeysFromXmlFile(@TempDir File testFolder)
|
||||||
throws Exception {
|
throws Exception {
|
||||||
// given
|
// given
|
||||||
String key = "projectPropertyKey";
|
String key = "projectPropertyKey";
|
||||||
|
@ -247,7 +247,7 @@ public class WritePredefinedProjectPropertiesTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testExecuteIncludingPropertyKeysFromInvalidXmlFile(@TempDir File testFolder)
|
void testExecuteIncludingPropertyKeysFromInvalidXmlFile(@TempDir File testFolder)
|
||||||
throws Exception {
|
throws Exception {
|
||||||
// given
|
// given
|
||||||
String key = "projectPropertyKey";
|
String key = "projectPropertyKey";
|
||||||
|
@ -273,7 +273,7 @@ public class WritePredefinedProjectPropertiesTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testExecuteWithQuietModeOn(@TempDir File testFolder) throws Exception {
|
void testExecuteWithQuietModeOn(@TempDir File testFolder) throws Exception {
|
||||||
// given
|
// given
|
||||||
mojo.setQuiet(true);
|
mojo.setQuiet(true);
|
||||||
mojo.setIncludePropertyKeysFromFiles(new String[] {
|
mojo.setIncludePropertyKeysFromFiles(new String[] {
|
||||||
|
@ -290,7 +290,7 @@ public class WritePredefinedProjectPropertiesTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testExecuteIncludingPropertyKeysFromInvalidFile() {
|
void testExecuteIncludingPropertyKeysFromInvalidFile() {
|
||||||
// given
|
// given
|
||||||
mojo.setIncludePropertyKeysFromFiles(new String[] {
|
mojo.setIncludePropertyKeysFromFiles(new String[] {
|
||||||
"invalid location"
|
"invalid location"
|
||||||
|
@ -301,7 +301,7 @@ public class WritePredefinedProjectPropertiesTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testExecuteWithEnvironmentProperties(@TempDir File testFolder) throws Exception {
|
void testExecuteWithEnvironmentProperties(@TempDir File testFolder) throws Exception {
|
||||||
// given
|
// given
|
||||||
mojo.setIncludeEnvironmentVariables(true);
|
mojo.setIncludeEnvironmentVariables(true);
|
||||||
|
|
||||||
|
@ -318,7 +318,7 @@ public class WritePredefinedProjectPropertiesTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testExecuteWithSystemProperties(@TempDir File testFolder) throws Exception {
|
void testExecuteWithSystemProperties(@TempDir File testFolder) throws Exception {
|
||||||
// given
|
// given
|
||||||
String key = "systemPropertyKey";
|
String key = "systemPropertyKey";
|
||||||
String value = "systemPropertyValue";
|
String value = "systemPropertyValue";
|
||||||
|
@ -337,7 +337,7 @@ public class WritePredefinedProjectPropertiesTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testExecuteWithSystemPropertiesAndEscapeChars(@TempDir File testFolder)
|
void testExecuteWithSystemPropertiesAndEscapeChars(@TempDir File testFolder)
|
||||||
throws Exception {
|
throws Exception {
|
||||||
// given
|
// given
|
||||||
String key = "systemPropertyKey ";
|
String key = "systemPropertyKey ";
|
||||||
|
|
|
@ -22,9 +22,20 @@
|
||||||
<id>dnet45-releases</id>
|
<id>dnet45-releases</id>
|
||||||
<url>https://maven.d4science.org/nexus/content/repositories/dnet45-releases</url>
|
<url>https://maven.d4science.org/nexus/content/repositories/dnet45-releases</url>
|
||||||
</repository>
|
</repository>
|
||||||
|
<site>
|
||||||
|
<id>DHPSite</id>
|
||||||
|
<url>${dhp.site.stage.path}/dhp-build/dhp-code-style</url>
|
||||||
|
</site>
|
||||||
</distributionManagement>
|
</distributionManagement>
|
||||||
|
|
||||||
<build>
|
<build>
|
||||||
|
<extensions>
|
||||||
|
<extension>
|
||||||
|
<groupId>org.apache.maven.wagon</groupId>
|
||||||
|
<artifactId>wagon-ssh</artifactId>
|
||||||
|
<version>2.10</version>
|
||||||
|
</extension>
|
||||||
|
</extensions>
|
||||||
<pluginManagement>
|
<pluginManagement>
|
||||||
<plugins>
|
<plugins>
|
||||||
<plugin>
|
<plugin>
|
||||||
|
@ -35,7 +46,7 @@
|
||||||
<plugin>
|
<plugin>
|
||||||
<groupId>org.apache.maven.plugins</groupId>
|
<groupId>org.apache.maven.plugins</groupId>
|
||||||
<artifactId>maven-site-plugin</artifactId>
|
<artifactId>maven-site-plugin</artifactId>
|
||||||
<version>3.7.1</version>
|
<version>3.9.1</version>
|
||||||
</plugin>
|
</plugin>
|
||||||
</plugins>
|
</plugins>
|
||||||
</pluginManagement>
|
</pluginManagement>
|
||||||
|
@ -43,6 +54,7 @@
|
||||||
|
|
||||||
<properties>
|
<properties>
|
||||||
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
|
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
|
||||||
|
<dhp.site.stage.path>sftp://dnet-hadoop@static-web.d4science.org/dnet-hadoop</dhp.site.stage.path>
|
||||||
</properties>
|
</properties>
|
||||||
|
|
||||||
</project>
|
</project>
|
|
@ -0,0 +1,21 @@
|
||||||
|
<?xml version="1.0" encoding="ISO-8859-1"?>
|
||||||
|
<project xmlns="http://maven.apache.org/DECORATION/1.8.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||||
|
xsi:schemaLocation="http://maven.apache.org/DECORATION/1.8.0 https://maven.apache.org/xsd/decoration-1.8.0.xsd"
|
||||||
|
name="DHP-Aggregation">
|
||||||
|
<skin>
|
||||||
|
<groupId>org.apache.maven.skins</groupId>
|
||||||
|
<artifactId>maven-fluido-skin</artifactId>
|
||||||
|
<version>1.8</version>
|
||||||
|
</skin>
|
||||||
|
<poweredBy>
|
||||||
|
<logo name="OpenAIRE Research Graph" href="https://graph.openaire.eu/"
|
||||||
|
img="https://graph.openaire.eu/assets/common-assets/logo-large-graph.png"/>
|
||||||
|
</poweredBy>
|
||||||
|
<body>
|
||||||
|
<links>
|
||||||
|
<item name="Code" href="https://code-repo.d4science.org/" />
|
||||||
|
</links>
|
||||||
|
<menu ref="modules" />
|
||||||
|
<menu ref="reports"/>
|
||||||
|
</body>
|
||||||
|
</project>
|
|
@ -10,6 +10,9 @@
|
||||||
<packaging>pom</packaging>
|
<packaging>pom</packaging>
|
||||||
|
|
||||||
<description>This module is a container for the build tools used in dnet-hadoop</description>
|
<description>This module is a container for the build tools used in dnet-hadoop</description>
|
||||||
|
<properties>
|
||||||
|
<maven.javadoc.skip>true</maven.javadoc.skip>
|
||||||
|
</properties>
|
||||||
|
|
||||||
<modules>
|
<modules>
|
||||||
<module>dhp-code-style</module>
|
<module>dhp-code-style</module>
|
||||||
|
@ -17,4 +20,12 @@
|
||||||
<module>dhp-build-properties-maven-plugin</module>
|
<module>dhp-build-properties-maven-plugin</module>
|
||||||
</modules>
|
</modules>
|
||||||
|
|
||||||
|
|
||||||
|
<distributionManagement>
|
||||||
|
<site>
|
||||||
|
<id>DHPSite</id>
|
||||||
|
<url>${dhp.site.stage.path}/dhp-build/</url>
|
||||||
|
</site>
|
||||||
|
</distributionManagement>
|
||||||
|
|
||||||
</project>
|
</project>
|
||||||
|
|
|
@ -0,0 +1,22 @@
|
||||||
|
<?xml version="1.0" encoding="ISO-8859-1"?>
|
||||||
|
<project xmlns="http://maven.apache.org/DECORATION/1.8.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||||
|
xsi:schemaLocation="http://maven.apache.org/DECORATION/1.8.0 https://maven.apache.org/xsd/decoration-1.8.0.xsd"
|
||||||
|
name="DHP-Aggregation">
|
||||||
|
<skin>
|
||||||
|
<groupId>org.apache.maven.skins</groupId>
|
||||||
|
<artifactId>maven-fluido-skin</artifactId>
|
||||||
|
<version>1.8</version>
|
||||||
|
</skin>
|
||||||
|
<poweredBy>
|
||||||
|
<logo name="OpenAIRE Research Graph" href="https://graph.openaire.eu/"
|
||||||
|
img="https://graph.openaire.eu/assets/common-assets/logo-large-graph.png"/>
|
||||||
|
</poweredBy>
|
||||||
|
<body>
|
||||||
|
<links>
|
||||||
|
<item name="Code" href="https://code-repo.d4science.org/" />
|
||||||
|
</links>
|
||||||
|
|
||||||
|
<menu ref="modules" />
|
||||||
|
<menu ref="reports"/>
|
||||||
|
</body>
|
||||||
|
</project>
|
|
@ -7,12 +7,57 @@
|
||||||
<artifactId>dhp</artifactId>
|
<artifactId>dhp</artifactId>
|
||||||
<version>1.2.4-SNAPSHOT</version>
|
<version>1.2.4-SNAPSHOT</version>
|
||||||
<relativePath>../pom.xml</relativePath>
|
<relativePath>../pom.xml</relativePath>
|
||||||
|
|
||||||
</parent>
|
</parent>
|
||||||
|
|
||||||
<artifactId>dhp-common</artifactId>
|
<artifactId>dhp-common</artifactId>
|
||||||
<packaging>jar</packaging>
|
<packaging>jar</packaging>
|
||||||
|
|
||||||
|
<distributionManagement>
|
||||||
|
<site>
|
||||||
|
<id>DHPSite</id>
|
||||||
|
<url>${dhp.site.stage.path}/dhp-common</url>
|
||||||
|
</site>
|
||||||
|
</distributionManagement>
|
||||||
|
|
||||||
<description>This module contains common utilities meant to be used across the dnet-hadoop submodules</description>
|
<description>This module contains common utilities meant to be used across the dnet-hadoop submodules</description>
|
||||||
|
<build>
|
||||||
|
<plugins>
|
||||||
|
<plugin>
|
||||||
|
<groupId>net.alchim31.maven</groupId>
|
||||||
|
<artifactId>scala-maven-plugin</artifactId>
|
||||||
|
<version>${net.alchim31.maven.version}</version>
|
||||||
|
<executions>
|
||||||
|
<execution>
|
||||||
|
<id>scala-compile-first</id>
|
||||||
|
<phase>initialize</phase>
|
||||||
|
<goals>
|
||||||
|
<goal>add-source</goal>
|
||||||
|
<goal>compile</goal>
|
||||||
|
</goals>
|
||||||
|
</execution>
|
||||||
|
<execution>
|
||||||
|
<id>scala-test-compile</id>
|
||||||
|
<phase>process-test-resources</phase>
|
||||||
|
<goals>
|
||||||
|
<goal>testCompile</goal>
|
||||||
|
</goals>
|
||||||
|
</execution>
|
||||||
|
<execution>
|
||||||
|
<id>scala-doc</id>
|
||||||
|
<phase>process-resources</phase> <!-- or wherever -->
|
||||||
|
<goals>
|
||||||
|
<goal>doc</goal>
|
||||||
|
</goals>
|
||||||
|
</execution>
|
||||||
|
</executions>
|
||||||
|
<configuration>
|
||||||
|
<scalaVersion>${scala.version}</scalaVersion>
|
||||||
|
</configuration>
|
||||||
|
</plugin>
|
||||||
|
</plugins>
|
||||||
|
|
||||||
|
</build>
|
||||||
|
|
||||||
<dependencies>
|
<dependencies>
|
||||||
|
|
||||||
|
@ -20,6 +65,15 @@
|
||||||
<groupId>org.apache.hadoop</groupId>
|
<groupId>org.apache.hadoop</groupId>
|
||||||
<artifactId>hadoop-common</artifactId>
|
<artifactId>hadoop-common</artifactId>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>com.github.sisyphsu</groupId>
|
||||||
|
<artifactId>dateparser</artifactId>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>me.xuender</groupId>
|
||||||
|
<artifactId>unidecode</artifactId>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.spark</groupId>
|
<groupId>org.apache.spark</groupId>
|
||||||
<artifactId>spark-core_2.11</artifactId>
|
<artifactId>spark-core_2.11</artifactId>
|
||||||
|
@ -53,11 +107,6 @@
|
||||||
<groupId>com.fasterxml.jackson.core</groupId>
|
<groupId>com.fasterxml.jackson.core</groupId>
|
||||||
<artifactId>jackson-databind</artifactId>
|
<artifactId>jackson-databind</artifactId>
|
||||||
</dependency>
|
</dependency>
|
||||||
<!-- https://mvnrepository.com/artifact/com.rabbitmq/amqp-client -->
|
|
||||||
<dependency>
|
|
||||||
<groupId>com.rabbitmq</groupId>
|
|
||||||
<artifactId>amqp-client</artifactId>
|
|
||||||
</dependency>
|
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>net.sf.saxon</groupId>
|
<groupId>net.sf.saxon</groupId>
|
||||||
<artifactId>Saxon-HE</artifactId>
|
<artifactId>Saxon-HE</artifactId>
|
||||||
|
@ -98,10 +147,25 @@
|
||||||
<artifactId>dnet-pace-core</artifactId>
|
<artifactId>dnet-pace-core</artifactId>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.httpcomponents</groupId>
|
||||||
|
<artifactId>httpclient</artifactId>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.mongodb</groupId>
|
||||||
|
<artifactId>mongo-java-driver</artifactId>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>eu.dnetlib.dhp</groupId>
|
<groupId>eu.dnetlib.dhp</groupId>
|
||||||
<artifactId>dhp-schemas</artifactId>
|
<artifactId>dhp-schemas</artifactId>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
|
<dependency>
|
||||||
|
<groupId>com.opencsv</groupId>
|
||||||
|
<artifactId>opencsv</artifactId>
|
||||||
|
</dependency>
|
||||||
</dependencies>
|
</dependencies>
|
||||||
|
|
||||||
</project>
|
</project>
|
||||||
|
|
|
@ -1,10 +1,7 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.application;
|
package eu.dnetlib.dhp.application;
|
||||||
|
|
||||||
import java.io.ByteArrayInputStream;
|
import java.io.*;
|
||||||
import java.io.ByteArrayOutputStream;
|
|
||||||
import java.io.Serializable;
|
|
||||||
import java.io.StringWriter;
|
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
import java.util.zip.GZIPInputStream;
|
import java.util.zip.GZIPInputStream;
|
||||||
import java.util.zip.GZIPOutputStream;
|
import java.util.zip.GZIPOutputStream;
|
||||||
|
@ -12,17 +9,21 @@ import java.util.zip.GZIPOutputStream;
|
||||||
import org.apache.commons.cli.*;
|
import org.apache.commons.cli.*;
|
||||||
import org.apache.commons.codec.binary.Base64;
|
import org.apache.commons.codec.binary.Base64;
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
|
||||||
public class ArgumentApplicationParser implements Serializable {
|
public class ArgumentApplicationParser implements Serializable {
|
||||||
|
|
||||||
|
private static final Logger log = LoggerFactory.getLogger(ArgumentApplicationParser.class);
|
||||||
|
|
||||||
private final Options options = new Options();
|
private final Options options = new Options();
|
||||||
private final Map<String, String> objectMap = new HashMap<>();
|
private final Map<String, String> objectMap = new HashMap<>();
|
||||||
|
|
||||||
private final List<String> compressedValues = new ArrayList<>();
|
private final List<String> compressedValues = new ArrayList<>();
|
||||||
|
|
||||||
public ArgumentApplicationParser(final String json_configuration) throws Exception {
|
public ArgumentApplicationParser(final String json_configuration) throws IOException {
|
||||||
final ObjectMapper mapper = new ObjectMapper();
|
final ObjectMapper mapper = new ObjectMapper();
|
||||||
final OptionsParameter[] configuration = mapper.readValue(json_configuration, OptionsParameter[].class);
|
final OptionsParameter[] configuration = mapper.readValue(json_configuration, OptionsParameter[].class);
|
||||||
createOptionMap(configuration);
|
createOptionMap(configuration);
|
||||||
|
@ -33,7 +34,6 @@ public class ArgumentApplicationParser implements Serializable {
|
||||||
}
|
}
|
||||||
|
|
||||||
private void createOptionMap(final OptionsParameter[] configuration) {
|
private void createOptionMap(final OptionsParameter[] configuration) {
|
||||||
|
|
||||||
Arrays
|
Arrays
|
||||||
.stream(configuration)
|
.stream(configuration)
|
||||||
.map(
|
.map(
|
||||||
|
@ -47,10 +47,6 @@ public class ArgumentApplicationParser implements Serializable {
|
||||||
return o;
|
return o;
|
||||||
})
|
})
|
||||||
.forEach(options::addOption);
|
.forEach(options::addOption);
|
||||||
|
|
||||||
// HelpFormatter formatter = new HelpFormatter();
|
|
||||||
// formatter.printHelp("myapp", null, options, null, true);
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public static String decompressValue(final String abstractCompressed) {
|
public static String decompressValue(final String abstractCompressed) {
|
||||||
|
@ -60,13 +56,13 @@ public class ArgumentApplicationParser implements Serializable {
|
||||||
final StringWriter stringWriter = new StringWriter();
|
final StringWriter stringWriter = new StringWriter();
|
||||||
IOUtils.copy(gis, stringWriter);
|
IOUtils.copy(gis, stringWriter);
|
||||||
return stringWriter.toString();
|
return stringWriter.toString();
|
||||||
} catch (Throwable e) {
|
} catch (IOException e) {
|
||||||
System.out.println("Wrong value to decompress:" + abstractCompressed);
|
log.error("Wrong value to decompress: {}", abstractCompressed);
|
||||||
throw new RuntimeException(e);
|
throw new IllegalArgumentException(e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public static String compressArgument(final String value) throws Exception {
|
public static String compressArgument(final String value) throws IOException {
|
||||||
ByteArrayOutputStream out = new ByteArrayOutputStream();
|
ByteArrayOutputStream out = new ByteArrayOutputStream();
|
||||||
GZIPOutputStream gzip = new GZIPOutputStream(out);
|
GZIPOutputStream gzip = new GZIPOutputStream(out);
|
||||||
gzip.write(value.getBytes());
|
gzip.write(value.getBytes());
|
||||||
|
@ -74,7 +70,7 @@ public class ArgumentApplicationParser implements Serializable {
|
||||||
return java.util.Base64.getEncoder().encodeToString(out.toByteArray());
|
return java.util.Base64.getEncoder().encodeToString(out.toByteArray());
|
||||||
}
|
}
|
||||||
|
|
||||||
public void parseArgument(final String[] args) throws Exception {
|
public void parseArgument(final String[] args) throws ParseException {
|
||||||
CommandLineParser parser = new BasicParser();
|
CommandLineParser parser = new BasicParser();
|
||||||
CommandLine cmd = parser.parse(options, args);
|
CommandLine cmd = parser.parse(options, args);
|
||||||
Arrays
|
Arrays
|
||||||
|
|
|
@ -9,9 +9,6 @@ public class OptionsParameter {
|
||||||
private boolean paramRequired;
|
private boolean paramRequired;
|
||||||
private boolean compressed;
|
private boolean compressed;
|
||||||
|
|
||||||
public OptionsParameter() {
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getParamName() {
|
public String getParamName() {
|
||||||
return paramName;
|
return paramName;
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,72 @@
|
||||||
|
package eu.dnetlib.dhp.application
|
||||||
|
|
||||||
|
import scala.io.Source
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This is the main Interface SparkApplication
|
||||||
|
* where all the Spark Scala class should inherit
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
trait SparkScalaApplication {
|
||||||
|
/**
|
||||||
|
* This is the path in the classpath of the json
|
||||||
|
* describes all the argument needed to run
|
||||||
|
*/
|
||||||
|
val propertyPath: String
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Utility to parse the arguments using the
|
||||||
|
* property json in the classpath identified from
|
||||||
|
* the variable propertyPath
|
||||||
|
*
|
||||||
|
* @param args the list of arguments
|
||||||
|
*/
|
||||||
|
def parseArguments(args: Array[String]): ArgumentApplicationParser = {
|
||||||
|
val parser = new ArgumentApplicationParser(Source.fromInputStream(getClass.getResourceAsStream(propertyPath)).mkString)
|
||||||
|
parser.parseArgument(args)
|
||||||
|
parser
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Here all the spark applications runs this method
|
||||||
|
* where the whole logic of the spark node is defined
|
||||||
|
*/
|
||||||
|
def run(): Unit
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
import org.apache.spark.SparkConf
|
||||||
|
import org.apache.spark.sql.SparkSession
|
||||||
|
import org.slf4j.Logger
|
||||||
|
|
||||||
|
abstract class AbstractScalaApplication (val propertyPath:String, val args:Array[String], log:Logger) extends SparkScalaApplication {
|
||||||
|
|
||||||
|
var parser: ArgumentApplicationParser = null
|
||||||
|
|
||||||
|
var spark:SparkSession = null
|
||||||
|
|
||||||
|
|
||||||
|
def initialize():SparkScalaApplication = {
|
||||||
|
parser = parseArguments(args)
|
||||||
|
spark = createSparkSession()
|
||||||
|
this
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Utility for creating a spark session starting from parser
|
||||||
|
*
|
||||||
|
* @return a spark Session
|
||||||
|
*/
|
||||||
|
private def createSparkSession():SparkSession = {
|
||||||
|
require(parser!= null)
|
||||||
|
|
||||||
|
val conf:SparkConf = new SparkConf()
|
||||||
|
val master = parser.get("master")
|
||||||
|
log.info(s"Creating Spark session: Master: $master")
|
||||||
|
SparkSession.builder().config(conf)
|
||||||
|
.appName(getClass.getSimpleName)
|
||||||
|
.master(master)
|
||||||
|
.getOrCreate()
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -1,5 +1,5 @@
|
||||||
|
|
||||||
package eu.dnetlib.collector.worker.model;
|
package eu.dnetlib.dhp.collection;
|
||||||
|
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
@ -34,7 +34,7 @@ public class ApiDescriptor {
|
||||||
return params;
|
return params;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setParams(final HashMap<String, String> params) {
|
public void setParams(final Map<String, String> params) {
|
||||||
this.params = params;
|
this.params = params;
|
||||||
}
|
}
|
||||||
|
|
|
@ -12,6 +12,9 @@ public class Constants {
|
||||||
|
|
||||||
public static String COAR_ACCESS_RIGHT_SCHEMA = "http://vocabularies.coar-repositories.org/documentation/access_rights/";
|
public static String COAR_ACCESS_RIGHT_SCHEMA = "http://vocabularies.coar-repositories.org/documentation/access_rights/";
|
||||||
|
|
||||||
|
private Constants() {
|
||||||
|
}
|
||||||
|
|
||||||
static {
|
static {
|
||||||
accessRightsCoarMap.put("OPEN", "c_abf2");
|
accessRightsCoarMap.put("OPEN", "c_abf2");
|
||||||
accessRightsCoarMap.put("RESTRICTED", "c_16ec");
|
accessRightsCoarMap.put("RESTRICTED", "c_16ec");
|
||||||
|
@ -27,4 +30,32 @@ public class Constants {
|
||||||
coarCodeLabelMap.put("c_f1cf", "EMBARGO");
|
coarCodeLabelMap.put("c_f1cf", "EMBARGO");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static final String SEQUENCE_FILE_NAME = "/sequence_file";
|
||||||
|
public static final String REPORT_FILE_NAME = "/report";
|
||||||
|
public static final String MDSTORE_DATA_PATH = "/store";
|
||||||
|
public static final String MDSTORE_SIZE_PATH = "/size";
|
||||||
|
|
||||||
|
public static final String COLLECTION_MODE = "collectionMode";
|
||||||
|
public static final String METADATA_ENCODING = "metadataEncoding";
|
||||||
|
public static final String OOZIE_WF_PATH = "oozieWfPath";
|
||||||
|
public static final String DNET_MESSAGE_MGR_URL = "dnetMessageManagerURL";
|
||||||
|
|
||||||
|
public static final String MAX_NUMBER_OF_RETRY = "maxNumberOfRetry";
|
||||||
|
public static final String REQUEST_DELAY = "requestDelay";
|
||||||
|
public static final String RETRY_DELAY = "retryDelay";
|
||||||
|
public static final String CONNECT_TIMEOUT = "connectTimeOut";
|
||||||
|
public static final String READ_TIMEOUT = "readTimeOut";
|
||||||
|
public static final String FROM_DATE_OVERRIDE = "fromDateOverride";
|
||||||
|
public static final String UNTIL_DATE_OVERRIDE = "untilDateOverride";
|
||||||
|
|
||||||
|
public static final String CONTENT_TOTALITEMS = "TotalItems";
|
||||||
|
public static final String CONTENT_INVALIDRECORDS = "InvalidRecords";
|
||||||
|
public static final String CONTENT_TRANSFORMEDRECORDS = "transformedItems";
|
||||||
|
|
||||||
|
// IETF Draft and used by Repositories like ZENODO , not included in APACHE HTTP java packages
|
||||||
|
// see https://ietf-wg-httpapi.github.io/ratelimit-headers/draft-ietf-httpapi-ratelimit-headers.html
|
||||||
|
public static final String HTTPHEADER_IETF_DRAFT_RATELIMIT_LIMIT = "X-RateLimit-Limit";
|
||||||
|
public static final String HTTPHEADER_IETF_DRAFT_RATELIMIT_REMAINING = "X-RateLimit-Remaining";
|
||||||
|
public static final String HTTPHEADER_IETF_DRAFT_RATELIMIT_RESET = "X-RateLimit-Reset";
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -14,7 +14,7 @@ public class DbClient implements Closeable {
|
||||||
|
|
||||||
private static final Log log = LogFactory.getLog(DbClient.class);
|
private static final Log log = LogFactory.getLog(DbClient.class);
|
||||||
|
|
||||||
private Connection connection;
|
private final Connection connection;
|
||||||
|
|
||||||
public DbClient(final String address, final String login, final String password) {
|
public DbClient(final String address, final String login, final String password) {
|
||||||
|
|
||||||
|
|
|
@ -84,7 +84,7 @@ public class GraphResultMapper implements Serializable {
|
||||||
.setDocumentationUrl(
|
.setDocumentationUrl(
|
||||||
value
|
value
|
||||||
.stream()
|
.stream()
|
||||||
.map(v -> v.getValue())
|
.map(Field::getValue)
|
||||||
.collect(Collectors.toList())));
|
.collect(Collectors.toList())));
|
||||||
|
|
||||||
Optional
|
Optional
|
||||||
|
@ -100,20 +100,20 @@ public class GraphResultMapper implements Serializable {
|
||||||
.setContactgroup(
|
.setContactgroup(
|
||||||
Optional
|
Optional
|
||||||
.ofNullable(ir.getContactgroup())
|
.ofNullable(ir.getContactgroup())
|
||||||
.map(value -> value.stream().map(cg -> cg.getValue()).collect(Collectors.toList()))
|
.map(value -> value.stream().map(Field::getValue).collect(Collectors.toList()))
|
||||||
.orElse(null));
|
.orElse(null));
|
||||||
|
|
||||||
out
|
out
|
||||||
.setContactperson(
|
.setContactperson(
|
||||||
Optional
|
Optional
|
||||||
.ofNullable(ir.getContactperson())
|
.ofNullable(ir.getContactperson())
|
||||||
.map(value -> value.stream().map(cp -> cp.getValue()).collect(Collectors.toList()))
|
.map(value -> value.stream().map(Field::getValue).collect(Collectors.toList()))
|
||||||
.orElse(null));
|
.orElse(null));
|
||||||
out
|
out
|
||||||
.setTool(
|
.setTool(
|
||||||
Optional
|
Optional
|
||||||
.ofNullable(ir.getTool())
|
.ofNullable(ir.getTool())
|
||||||
.map(value -> value.stream().map(t -> t.getValue()).collect(Collectors.toList()))
|
.map(value -> value.stream().map(Field::getValue).collect(Collectors.toList()))
|
||||||
.orElse(null));
|
.orElse(null));
|
||||||
|
|
||||||
out.setType(ModelConstants.ORP_DEFAULT_RESULTTYPE.getClassname());
|
out.setType(ModelConstants.ORP_DEFAULT_RESULTTYPE.getClassname());
|
||||||
|
@ -123,7 +123,8 @@ public class GraphResultMapper implements Serializable {
|
||||||
|
|
||||||
Optional
|
Optional
|
||||||
.ofNullable(input.getAuthor())
|
.ofNullable(input.getAuthor())
|
||||||
.ifPresent(ats -> out.setAuthor(ats.stream().map(at -> getAuthor(at)).collect(Collectors.toList())));
|
.ifPresent(
|
||||||
|
ats -> out.setAuthor(ats.stream().map(GraphResultMapper::getAuthor).collect(Collectors.toList())));
|
||||||
|
|
||||||
// I do not map Access Right UNKNOWN or OTHER
|
// I do not map Access Right UNKNOWN or OTHER
|
||||||
|
|
||||||
|
@ -210,7 +211,7 @@ public class GraphResultMapper implements Serializable {
|
||||||
if (oInst.isPresent()) {
|
if (oInst.isPresent()) {
|
||||||
out
|
out
|
||||||
.setInstance(
|
.setInstance(
|
||||||
oInst.get().stream().map(i -> getInstance(i)).collect(Collectors.toList()));
|
oInst.get().stream().map(GraphResultMapper::getInstance).collect(Collectors.toList()));
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -230,7 +231,7 @@ public class GraphResultMapper implements Serializable {
|
||||||
.stream()
|
.stream()
|
||||||
.filter(t -> t.getQualifier().getClassid().equalsIgnoreCase("main title"))
|
.filter(t -> t.getQualifier().getClassid().equalsIgnoreCase("main title"))
|
||||||
.collect(Collectors.toList());
|
.collect(Collectors.toList());
|
||||||
if (iTitle.size() > 0) {
|
if (!iTitle.isEmpty()) {
|
||||||
out.setMaintitle(iTitle.get(0).getValue());
|
out.setMaintitle(iTitle.get(0).getValue());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -239,7 +240,7 @@ public class GraphResultMapper implements Serializable {
|
||||||
.stream()
|
.stream()
|
||||||
.filter(t -> t.getQualifier().getClassid().equalsIgnoreCase("subtitle"))
|
.filter(t -> t.getQualifier().getClassid().equalsIgnoreCase("subtitle"))
|
||||||
.collect(Collectors.toList());
|
.collect(Collectors.toList());
|
||||||
if (iTitle.size() > 0) {
|
if (!iTitle.isEmpty()) {
|
||||||
out.setSubtitle(iTitle.get(0).getValue());
|
out.setSubtitle(iTitle.get(0).getValue());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -28,7 +28,7 @@ public class HdfsSupport {
|
||||||
* @param configuration Configuration of hadoop env
|
* @param configuration Configuration of hadoop env
|
||||||
*/
|
*/
|
||||||
public static boolean exists(String path, Configuration configuration) {
|
public static boolean exists(String path, Configuration configuration) {
|
||||||
logger.info("Removing path: {}", path);
|
logger.info("Checking existence for path: {}", path);
|
||||||
return rethrowAsRuntimeException(
|
return rethrowAsRuntimeException(
|
||||||
() -> {
|
() -> {
|
||||||
Path f = new Path(path);
|
Path f = new Path(path);
|
||||||
|
|
|
@ -14,38 +14,33 @@ public class MakeTarArchive implements Serializable {
|
||||||
|
|
||||||
private static TarArchiveOutputStream getTar(FileSystem fileSystem, String outputPath) throws IOException {
|
private static TarArchiveOutputStream getTar(FileSystem fileSystem, String outputPath) throws IOException {
|
||||||
Path hdfsWritePath = new Path(outputPath);
|
Path hdfsWritePath = new Path(outputPath);
|
||||||
FSDataOutputStream fsDataOutputStream = null;
|
|
||||||
if (fileSystem.exists(hdfsWritePath)) {
|
if (fileSystem.exists(hdfsWritePath)) {
|
||||||
fileSystem.delete(hdfsWritePath, true);
|
fileSystem.delete(hdfsWritePath, true);
|
||||||
|
|
||||||
}
|
}
|
||||||
fsDataOutputStream = fileSystem.create(hdfsWritePath);
|
return new TarArchiveOutputStream(fileSystem.create(hdfsWritePath).getWrappedStream());
|
||||||
|
|
||||||
return new TarArchiveOutputStream(fsDataOutputStream.getWrappedStream());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private static void write(FileSystem fileSystem, String inputPath, String outputPath, String dir_name)
|
private static void write(FileSystem fileSystem, String inputPath, String outputPath, String dir_name)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
|
|
||||||
Path hdfsWritePath = new Path(outputPath);
|
Path hdfsWritePath = new Path(outputPath);
|
||||||
FSDataOutputStream fsDataOutputStream = null;
|
|
||||||
if (fileSystem.exists(hdfsWritePath)) {
|
if (fileSystem.exists(hdfsWritePath)) {
|
||||||
fileSystem.delete(hdfsWritePath, true);
|
fileSystem.delete(hdfsWritePath, true);
|
||||||
|
|
||||||
}
|
}
|
||||||
fsDataOutputStream = fileSystem.create(hdfsWritePath);
|
try (TarArchiveOutputStream ar = new TarArchiveOutputStream(
|
||||||
|
fileSystem.create(hdfsWritePath).getWrappedStream())) {
|
||||||
|
|
||||||
TarArchiveOutputStream ar = new TarArchiveOutputStream(fsDataOutputStream.getWrappedStream());
|
RemoteIterator<LocatedFileStatus> iterator = fileSystem
|
||||||
|
.listFiles(
|
||||||
|
new Path(inputPath), true);
|
||||||
|
|
||||||
RemoteIterator<LocatedFileStatus> fileStatusListIterator = fileSystem
|
while (iterator.hasNext()) {
|
||||||
.listFiles(
|
writeCurrentFile(fileSystem, dir_name, iterator, ar, 0);
|
||||||
new Path(inputPath), true);
|
}
|
||||||
|
|
||||||
while (fileStatusListIterator.hasNext()) {
|
|
||||||
writeCurrentFile(fileSystem, dir_name, fileStatusListIterator, ar, 0);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
ar.close();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public static void tarMaxSize(FileSystem fileSystem, String inputPath, String outputPath, String dir_name,
|
public static void tarMaxSize(FileSystem fileSystem, String inputPath, String outputPath, String dir_name,
|
||||||
|
@ -90,6 +85,13 @@ public class MakeTarArchive implements Serializable {
|
||||||
String p_string = p.toString();
|
String p_string = p.toString();
|
||||||
if (!p_string.endsWith("_SUCCESS")) {
|
if (!p_string.endsWith("_SUCCESS")) {
|
||||||
String name = p_string.substring(p_string.lastIndexOf("/") + 1);
|
String name = p_string.substring(p_string.lastIndexOf("/") + 1);
|
||||||
|
if (name.startsWith("part-") & name.length() > 10) {
|
||||||
|
String tmp = name.substring(0, 10);
|
||||||
|
if (name.contains(".")) {
|
||||||
|
tmp += name.substring(name.indexOf("."));
|
||||||
|
}
|
||||||
|
name = tmp;
|
||||||
|
}
|
||||||
TarArchiveEntry entry = new TarArchiveEntry(dir_name + "/" + name);
|
TarArchiveEntry entry = new TarArchiveEntry(dir_name + "/" + name);
|
||||||
entry.setSize(fileStatus.getLen());
|
entry.setSize(fileStatus.getLen());
|
||||||
current_size += fileStatus.getLen();
|
current_size += fileStatus.getLen();
|
||||||
|
@ -100,7 +102,7 @@ public class MakeTarArchive implements Serializable {
|
||||||
BufferedInputStream bis = new BufferedInputStream(is);
|
BufferedInputStream bis = new BufferedInputStream(is);
|
||||||
|
|
||||||
int count;
|
int count;
|
||||||
byte data[] = new byte[1024];
|
byte[] data = new byte[1024];
|
||||||
while ((count = bis.read(data, 0, data.length)) != -1) {
|
while ((count = bis.read(data, 0, data.length)) != -1) {
|
||||||
ar.write(data, 0, count);
|
ar.write(data, 0, count);
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,39 +1,59 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.oa.graph.raw.common;
|
package eu.dnetlib.dhp.common;
|
||||||
|
|
||||||
import java.io.Closeable;
|
import java.io.Closeable;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
import java.util.Optional;
|
||||||
import java.util.stream.StreamSupport;
|
import java.util.stream.StreamSupport;
|
||||||
|
|
||||||
import org.apache.commons.lang3.StringUtils;
|
import org.apache.commons.lang3.StringUtils;
|
||||||
import org.apache.commons.logging.Log;
|
|
||||||
import org.apache.commons.logging.LogFactory;
|
|
||||||
import org.bson.Document;
|
import org.bson.Document;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
import com.google.common.collect.Iterables;
|
import com.google.common.collect.Iterables;
|
||||||
|
import com.mongodb.BasicDBObject;
|
||||||
import com.mongodb.MongoClient;
|
import com.mongodb.MongoClient;
|
||||||
import com.mongodb.MongoClientURI;
|
import com.mongodb.MongoClientURI;
|
||||||
|
import com.mongodb.QueryBuilder;
|
||||||
|
import com.mongodb.client.FindIterable;
|
||||||
import com.mongodb.client.MongoCollection;
|
import com.mongodb.client.MongoCollection;
|
||||||
import com.mongodb.client.MongoDatabase;
|
import com.mongodb.client.MongoDatabase;
|
||||||
|
|
||||||
public class MdstoreClient implements Closeable {
|
public class MdstoreClient implements Closeable {
|
||||||
|
|
||||||
|
private static final Logger log = LoggerFactory.getLogger(MdstoreClient.class);
|
||||||
|
|
||||||
private final MongoClient client;
|
private final MongoClient client;
|
||||||
private final MongoDatabase db;
|
private final MongoDatabase db;
|
||||||
|
|
||||||
private static final String COLL_METADATA = "metadata";
|
private static final String COLL_METADATA = "metadata";
|
||||||
private static final String COLL_METADATA_MANAGER = "metadataManager";
|
private static final String COLL_METADATA_MANAGER = "metadataManager";
|
||||||
|
|
||||||
private static final Log log = LogFactory.getLog(MdstoreClient.class);
|
|
||||||
|
|
||||||
public MdstoreClient(final String baseUrl, final String dbName) {
|
public MdstoreClient(final String baseUrl, final String dbName) {
|
||||||
this.client = new MongoClient(new MongoClientURI(baseUrl));
|
this.client = new MongoClient(new MongoClientURI(baseUrl));
|
||||||
this.db = getDb(client, dbName);
|
this.db = getDb(client, dbName);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public MongoCollection<Document> mdStore(final String mdId) {
|
||||||
|
BasicDBObject query = (BasicDBObject) QueryBuilder.start("mdId").is(mdId).get();
|
||||||
|
|
||||||
|
log.info("querying current mdId: {}", query.toJson());
|
||||||
|
|
||||||
|
final String currentId = Optional
|
||||||
|
.ofNullable(getColl(db, COLL_METADATA_MANAGER, true).find(query))
|
||||||
|
.map(FindIterable::first)
|
||||||
|
.map(d -> d.getString("currentId"))
|
||||||
|
.orElseThrow(() -> new IllegalArgumentException("cannot find current mdstore id for: " + mdId));
|
||||||
|
|
||||||
|
log.info("currentId: {}", currentId);
|
||||||
|
|
||||||
|
return getColl(db, currentId, true);
|
||||||
|
}
|
||||||
|
|
||||||
public Map<String, String> validCollections(
|
public Map<String, String> validCollections(
|
||||||
final String mdFormat, final String mdLayout, final String mdInterpretation) {
|
final String mdFormat, final String mdLayout, final String mdInterpretation) {
|
||||||
|
|
||||||
|
@ -63,7 +83,7 @@ public class MdstoreClient implements Closeable {
|
||||||
if (!Iterables.contains(client.listDatabaseNames(), dbName)) {
|
if (!Iterables.contains(client.listDatabaseNames(), dbName)) {
|
||||||
final String err = String.format("Database '%s' not found in %s", dbName, client.getAddress());
|
final String err = String.format("Database '%s' not found in %s", dbName, client.getAddress());
|
||||||
log.warn(err);
|
log.warn(err);
|
||||||
throw new RuntimeException(err);
|
throw new IllegalArgumentException(err);
|
||||||
}
|
}
|
||||||
return client.getDatabase(dbName);
|
return client.getDatabase(dbName);
|
||||||
}
|
}
|
||||||
|
@ -76,7 +96,7 @@ public class MdstoreClient implements Closeable {
|
||||||
String.format("Missing collection '%s' in database '%s'", collName, db.getName()));
|
String.format("Missing collection '%s' in database '%s'", collName, db.getName()));
|
||||||
log.warn(err);
|
log.warn(err);
|
||||||
if (abortIfMissing) {
|
if (abortIfMissing) {
|
||||||
throw new RuntimeException(err);
|
throw new IllegalArgumentException(err);
|
||||||
} else {
|
} else {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
|
@ -24,7 +24,6 @@ import com.google.common.hash.Hashing;
|
||||||
*/
|
*/
|
||||||
public class PacePerson {
|
public class PacePerson {
|
||||||
|
|
||||||
private static final String UTF8 = "UTF-8";
|
|
||||||
private List<String> name = Lists.newArrayList();
|
private List<String> name = Lists.newArrayList();
|
||||||
private List<String> surname = Lists.newArrayList();
|
private List<String> surname = Lists.newArrayList();
|
||||||
private List<String> fullname = Lists.newArrayList();
|
private List<String> fullname = Lists.newArrayList();
|
||||||
|
|
|
@ -0,0 +1,45 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.common.aggregation;
|
||||||
|
|
||||||
|
import java.io.Closeable;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.LinkedHashMap;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.Objects;
|
||||||
|
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.message.MessageSender;
|
||||||
|
import eu.dnetlib.dhp.utils.DHPUtils;
|
||||||
|
|
||||||
|
public class AggregatorReport extends LinkedHashMap<String, String> implements Closeable {
|
||||||
|
|
||||||
|
private static final Logger log = LoggerFactory.getLogger(AggregatorReport.class);
|
||||||
|
|
||||||
|
private transient MessageSender messageSender;
|
||||||
|
|
||||||
|
public AggregatorReport() {
|
||||||
|
}
|
||||||
|
|
||||||
|
public AggregatorReport(MessageSender messageSender) {
|
||||||
|
this.messageSender = messageSender;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void ongoing(Long current, Long total) {
|
||||||
|
messageSender.sendMessage(current, total);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void close() throws IOException {
|
||||||
|
if (Objects.nonNull(messageSender)) {
|
||||||
|
log.info("closing report: ");
|
||||||
|
this.forEach((k, v) -> log.info("{} - {}", k, v));
|
||||||
|
|
||||||
|
Map<String, String> m = new HashMap<>();
|
||||||
|
m.put(getClass().getSimpleName().toLowerCase(), DHPUtils.MAPPER.writeValueAsString(values()));
|
||||||
|
messageSender.sendReport(m);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -13,9 +13,9 @@ import okio.Source;
|
||||||
|
|
||||||
public class InputStreamRequestBody extends RequestBody {
|
public class InputStreamRequestBody extends RequestBody {
|
||||||
|
|
||||||
private InputStream inputStream;
|
private final InputStream inputStream;
|
||||||
private MediaType mediaType;
|
private final MediaType mediaType;
|
||||||
private long lenght;
|
private final long lenght;
|
||||||
|
|
||||||
public static RequestBody create(final MediaType mediaType, final InputStream inputStream, final long len) {
|
public static RequestBody create(final MediaType mediaType, final InputStream inputStream, final long len) {
|
||||||
|
|
||||||
|
|
|
@ -5,6 +5,9 @@ import java.io.*;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.concurrent.TimeUnit;
|
import java.util.concurrent.TimeUnit;
|
||||||
|
|
||||||
|
import org.apache.http.HttpHeaders;
|
||||||
|
import org.apache.http.entity.ContentType;
|
||||||
|
|
||||||
import com.google.gson.Gson;
|
import com.google.gson.Gson;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.common.api.zenodo.ZenodoModel;
|
import eu.dnetlib.dhp.common.api.zenodo.ZenodoModel;
|
||||||
|
@ -43,7 +46,7 @@ public class ZenodoAPIClient implements Serializable {
|
||||||
this.deposition_id = deposition_id;
|
this.deposition_id = deposition_id;
|
||||||
}
|
}
|
||||||
|
|
||||||
public ZenodoAPIClient(String urlString, String access_token) throws IOException {
|
public ZenodoAPIClient(String urlString, String access_token) {
|
||||||
|
|
||||||
this.urlString = urlString;
|
this.urlString = urlString;
|
||||||
this.access_token = access_token;
|
this.access_token = access_token;
|
||||||
|
@ -63,8 +66,8 @@ public class ZenodoAPIClient implements Serializable {
|
||||||
|
|
||||||
Request request = new Request.Builder()
|
Request request = new Request.Builder()
|
||||||
.url(urlString)
|
.url(urlString)
|
||||||
.addHeader("Content-Type", "application/json") // add request headers
|
.addHeader(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON.toString()) // add request headers
|
||||||
.addHeader("Authorization", "Bearer " + access_token)
|
.addHeader(HttpHeaders.AUTHORIZATION, "Bearer " + access_token)
|
||||||
.post(body)
|
.post(body)
|
||||||
.build();
|
.build();
|
||||||
|
|
||||||
|
@ -103,8 +106,8 @@ public class ZenodoAPIClient implements Serializable {
|
||||||
|
|
||||||
Request request = new Request.Builder()
|
Request request = new Request.Builder()
|
||||||
.url(bucket + "/" + file_name)
|
.url(bucket + "/" + file_name)
|
||||||
.addHeader("Content-Type", "application/zip") // add request headers
|
.addHeader(HttpHeaders.CONTENT_TYPE, "application/zip") // add request headers
|
||||||
.addHeader("Authorization", "Bearer " + access_token)
|
.addHeader(HttpHeaders.AUTHORIZATION, "Bearer " + access_token)
|
||||||
.put(InputStreamRequestBody.create(MEDIA_TYPE_ZIP, is, len))
|
.put(InputStreamRequestBody.create(MEDIA_TYPE_ZIP, is, len))
|
||||||
.build();
|
.build();
|
||||||
|
|
||||||
|
@ -130,8 +133,8 @@ public class ZenodoAPIClient implements Serializable {
|
||||||
|
|
||||||
Request request = new Request.Builder()
|
Request request = new Request.Builder()
|
||||||
.url(urlString + "/" + deposition_id)
|
.url(urlString + "/" + deposition_id)
|
||||||
.addHeader("Content-Type", "application/json") // add request headers
|
.addHeader(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON.toString()) // add request headers
|
||||||
.addHeader("Authorization", "Bearer " + access_token)
|
.addHeader(HttpHeaders.AUTHORIZATION, "Bearer " + access_token)
|
||||||
.put(body)
|
.put(body)
|
||||||
.build();
|
.build();
|
||||||
|
|
||||||
|
@ -197,7 +200,7 @@ public class ZenodoAPIClient implements Serializable {
|
||||||
|
|
||||||
Request request = new Request.Builder()
|
Request request = new Request.Builder()
|
||||||
.url(urlString + "/" + deposition_id + "/actions/newversion")
|
.url(urlString + "/" + deposition_id + "/actions/newversion")
|
||||||
.addHeader("Authorization", "Bearer " + access_token)
|
.addHeader(HttpHeaders.AUTHORIZATION, "Bearer " + access_token)
|
||||||
.post(body)
|
.post(body)
|
||||||
.build();
|
.build();
|
||||||
|
|
||||||
|
@ -270,8 +273,8 @@ public class ZenodoAPIClient implements Serializable {
|
||||||
|
|
||||||
Request request = new Request.Builder()
|
Request request = new Request.Builder()
|
||||||
.url(urlString)
|
.url(urlString)
|
||||||
.addHeader("Content-Type", "application/json") // add request headers
|
.addHeader(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON.toString()) // add request headers
|
||||||
.addHeader("Authorization", "Bearer " + access_token)
|
.addHeader(HttpHeaders.AUTHORIZATION, "Bearer " + access_token)
|
||||||
.get()
|
.get()
|
||||||
.build();
|
.build();
|
||||||
|
|
||||||
|
@ -293,8 +296,8 @@ public class ZenodoAPIClient implements Serializable {
|
||||||
|
|
||||||
Request request = new Request.Builder()
|
Request request = new Request.Builder()
|
||||||
.url(url)
|
.url(url)
|
||||||
.addHeader("Content-Type", "application/json") // add request headers
|
.addHeader(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON.toString()) // add request headers
|
||||||
.addHeader("Authorization", "Bearer " + access_token)
|
.addHeader(HttpHeaders.AUTHORIZATION, "Bearer " + access_token)
|
||||||
.get()
|
.get()
|
||||||
.build();
|
.build();
|
||||||
|
|
||||||
|
|
|
@ -32,13 +32,13 @@ public class Creator {
|
||||||
|
|
||||||
public static Creator newInstance(String name, String affiliation, String orcid) {
|
public static Creator newInstance(String name, String affiliation, String orcid) {
|
||||||
Creator c = new Creator();
|
Creator c = new Creator();
|
||||||
if (!(name == null)) {
|
if (name != null) {
|
||||||
c.name = name;
|
c.name = name;
|
||||||
}
|
}
|
||||||
if (!(affiliation == null)) {
|
if (affiliation != null) {
|
||||||
c.affiliation = affiliation;
|
c.affiliation = affiliation;
|
||||||
}
|
}
|
||||||
if (!(orcid == null)) {
|
if (orcid != null) {
|
||||||
c.orcid = orcid;
|
c.orcid = orcid;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -3,17 +3,12 @@ package eu.dnetlib.dhp.common.api.zenodo;
|
||||||
|
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
|
|
||||||
import net.minidev.json.annotate.JsonIgnore;
|
|
||||||
|
|
||||||
public class File implements Serializable {
|
public class File implements Serializable {
|
||||||
private String checksum;
|
private String checksum;
|
||||||
private String filename;
|
private String filename;
|
||||||
private long filesize;
|
private long filesize;
|
||||||
private String id;
|
private String id;
|
||||||
|
|
||||||
@JsonIgnore
|
|
||||||
// private Links links;
|
|
||||||
|
|
||||||
public String getChecksum() {
|
public String getChecksum() {
|
||||||
return checksum;
|
return checksum;
|
||||||
}
|
}
|
||||||
|
@ -46,13 +41,4 @@ public class File implements Serializable {
|
||||||
this.id = id;
|
this.id = id;
|
||||||
}
|
}
|
||||||
|
|
||||||
// @JsonIgnore
|
|
||||||
// public Links getLinks() {
|
|
||||||
// return links;
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// @JsonIgnore
|
|
||||||
// public void setLinks(Links links) {
|
|
||||||
// this.links = links;
|
|
||||||
// }
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,16 +1,16 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.collection.worker;
|
package eu.dnetlib.dhp.common.collection;
|
||||||
|
|
||||||
public class DnetCollectorException extends Exception {
|
public class CollectorException extends Exception {
|
||||||
|
|
||||||
/** */
|
/** */
|
||||||
private static final long serialVersionUID = -290723075076039757L;
|
private static final long serialVersionUID = -290723075076039757L;
|
||||||
|
|
||||||
public DnetCollectorException() {
|
public CollectorException() {
|
||||||
super();
|
super();
|
||||||
}
|
}
|
||||||
|
|
||||||
public DnetCollectorException(
|
public CollectorException(
|
||||||
final String message,
|
final String message,
|
||||||
final Throwable cause,
|
final Throwable cause,
|
||||||
final boolean enableSuppression,
|
final boolean enableSuppression,
|
||||||
|
@ -18,15 +18,15 @@ public class DnetCollectorException extends Exception {
|
||||||
super(message, cause, enableSuppression, writableStackTrace);
|
super(message, cause, enableSuppression, writableStackTrace);
|
||||||
}
|
}
|
||||||
|
|
||||||
public DnetCollectorException(final String message, final Throwable cause) {
|
public CollectorException(final String message, final Throwable cause) {
|
||||||
super(message, cause);
|
super(message, cause);
|
||||||
}
|
}
|
||||||
|
|
||||||
public DnetCollectorException(final String message) {
|
public CollectorException(final String message) {
|
||||||
super(message);
|
super(message);
|
||||||
}
|
}
|
||||||
|
|
||||||
public DnetCollectorException(final Throwable cause) {
|
public CollectorException(final Throwable cause) {
|
||||||
super(cause);
|
super(cause);
|
||||||
}
|
}
|
||||||
}
|
}
|
|
@ -0,0 +1,56 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.common.collection;
|
||||||
|
|
||||||
|
import java.io.*;
|
||||||
|
import java.nio.charset.StandardCharsets;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import org.apache.hadoop.fs.FSDataOutputStream;
|
||||||
|
import org.apache.hadoop.fs.FileSystem;
|
||||||
|
import org.apache.hadoop.fs.Path;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
import com.opencsv.bean.CsvToBeanBuilder;
|
||||||
|
|
||||||
|
public class GetCSV {
|
||||||
|
|
||||||
|
public static final char DEFAULT_DELIMITER = ',';
|
||||||
|
|
||||||
|
private GetCSV() {
|
||||||
|
}
|
||||||
|
|
||||||
|
public static void getCsv(FileSystem fileSystem, BufferedReader reader, String hdfsPath,
|
||||||
|
String modelClass) throws IOException, ClassNotFoundException {
|
||||||
|
getCsv(fileSystem, reader, hdfsPath, modelClass, DEFAULT_DELIMITER);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static void getCsv(FileSystem fileSystem, Reader reader, String hdfsPath,
|
||||||
|
String modelClass, char delimiter) throws IOException, ClassNotFoundException {
|
||||||
|
|
||||||
|
Path hdfsWritePath = new Path(hdfsPath);
|
||||||
|
FSDataOutputStream fsDataOutputStream = null;
|
||||||
|
if (fileSystem.exists(hdfsWritePath)) {
|
||||||
|
fileSystem.delete(hdfsWritePath, false);
|
||||||
|
}
|
||||||
|
fsDataOutputStream = fileSystem.create(hdfsWritePath);
|
||||||
|
|
||||||
|
try (BufferedWriter writer = new BufferedWriter(
|
||||||
|
new OutputStreamWriter(fsDataOutputStream, StandardCharsets.UTF_8))) {
|
||||||
|
|
||||||
|
final ObjectMapper mapper = new ObjectMapper();
|
||||||
|
|
||||||
|
@SuppressWarnings("unchecked")
|
||||||
|
final List lines = new CsvToBeanBuilder(reader)
|
||||||
|
.withType(Class.forName(modelClass))
|
||||||
|
.withSeparator(delimiter)
|
||||||
|
.build()
|
||||||
|
.parse();
|
||||||
|
|
||||||
|
for (Object line : lines) {
|
||||||
|
writer.write(mapper.writeValueAsString(line));
|
||||||
|
writer.newLine();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,94 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.common.collection;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Bundles the http connection parameters driving the client behaviour.
|
||||||
|
*/
|
||||||
|
public class HttpClientParams {
|
||||||
|
|
||||||
|
// Defaults
|
||||||
|
public static int _maxNumberOfRetry = 3;
|
||||||
|
public static int _requestDelay = 0; // milliseconds
|
||||||
|
public static int _retryDelay = 10; // seconds
|
||||||
|
public static int _connectTimeOut = 10; // seconds
|
||||||
|
public static int _readTimeOut = 30; // seconds
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Maximum number of allowed retires before failing
|
||||||
|
*/
|
||||||
|
private int maxNumberOfRetry;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Delay between request (Milliseconds)
|
||||||
|
*/
|
||||||
|
private int requestDelay;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Time to wait after a failure before retrying (Seconds)
|
||||||
|
*/
|
||||||
|
private int retryDelay;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Connect timeout (Seconds)
|
||||||
|
*/
|
||||||
|
private int connectTimeOut;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Read timeout (Seconds)
|
||||||
|
*/
|
||||||
|
private int readTimeOut;
|
||||||
|
|
||||||
|
public HttpClientParams() {
|
||||||
|
this(_maxNumberOfRetry, _requestDelay, _retryDelay, _connectTimeOut, _readTimeOut);
|
||||||
|
}
|
||||||
|
|
||||||
|
public HttpClientParams(int maxNumberOfRetry, int requestDelay, int retryDelay, int connectTimeOut,
|
||||||
|
int readTimeOut) {
|
||||||
|
this.maxNumberOfRetry = maxNumberOfRetry;
|
||||||
|
this.requestDelay = requestDelay;
|
||||||
|
this.retryDelay = retryDelay;
|
||||||
|
this.connectTimeOut = connectTimeOut;
|
||||||
|
this.readTimeOut = readTimeOut;
|
||||||
|
}
|
||||||
|
|
||||||
|
public int getMaxNumberOfRetry() {
|
||||||
|
return maxNumberOfRetry;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setMaxNumberOfRetry(int maxNumberOfRetry) {
|
||||||
|
this.maxNumberOfRetry = maxNumberOfRetry;
|
||||||
|
}
|
||||||
|
|
||||||
|
public int getRequestDelay() {
|
||||||
|
return requestDelay;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setRequestDelay(int requestDelay) {
|
||||||
|
this.requestDelay = requestDelay;
|
||||||
|
}
|
||||||
|
|
||||||
|
public int getRetryDelay() {
|
||||||
|
return retryDelay;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setRetryDelay(int retryDelay) {
|
||||||
|
this.retryDelay = retryDelay;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setConnectTimeOut(int connectTimeOut) {
|
||||||
|
this.connectTimeOut = connectTimeOut;
|
||||||
|
}
|
||||||
|
|
||||||
|
public int getConnectTimeOut() {
|
||||||
|
return connectTimeOut;
|
||||||
|
}
|
||||||
|
|
||||||
|
public int getReadTimeOut() {
|
||||||
|
return readTimeOut;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setReadTimeOut(int readTimeOut) {
|
||||||
|
this.readTimeOut = readTimeOut;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,263 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.common.collection;
|
||||||
|
|
||||||
|
import static eu.dnetlib.dhp.utils.DHPUtils.*;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.InputStream;
|
||||||
|
import java.net.*;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
import org.apache.commons.io.IOUtils;
|
||||||
|
import org.apache.commons.lang3.math.NumberUtils;
|
||||||
|
import org.apache.http.HttpHeaders;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.common.Constants;
|
||||||
|
import eu.dnetlib.dhp.common.aggregation.AggregatorReport;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Migrated from https://svn.driver.research-infrastructures.eu/driver/dnet45/modules/dnet-modular-collector-service/trunk/src/main/java/eu/dnetlib/data/collector/plugins/HttpConnector.java
|
||||||
|
*
|
||||||
|
* @author jochen, michele, andrea, alessia, claudio, andreas
|
||||||
|
*/
|
||||||
|
public class HttpConnector2 {
|
||||||
|
|
||||||
|
private static final Logger log = LoggerFactory.getLogger(HttpConnector2.class);
|
||||||
|
|
||||||
|
private static final String REPORT_PREFIX = "http:";
|
||||||
|
|
||||||
|
private HttpClientParams clientParams;
|
||||||
|
|
||||||
|
private String responseType = null;
|
||||||
|
|
||||||
|
private static final String userAgent = "Mozilla/5.0 (compatible; OAI; +http://www.openaire.eu)";
|
||||||
|
|
||||||
|
public HttpConnector2() {
|
||||||
|
this(new HttpClientParams());
|
||||||
|
}
|
||||||
|
|
||||||
|
public HttpConnector2(HttpClientParams clientParams) {
|
||||||
|
this.clientParams = clientParams;
|
||||||
|
CookieHandler.setDefault(new CookieManager(null, CookiePolicy.ACCEPT_ALL));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @see HttpConnector2#getInputSource(java.lang.String, AggregatorReport)
|
||||||
|
*/
|
||||||
|
public InputStream getInputSourceAsStream(final String requestUrl) throws CollectorException {
|
||||||
|
return IOUtils.toInputStream(getInputSource(requestUrl));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @see HttpConnector2#getInputSource(java.lang.String, AggregatorReport)
|
||||||
|
*/
|
||||||
|
public String getInputSource(final String requestUrl) throws CollectorException {
|
||||||
|
return attemptDownloadAsString(requestUrl, 1, new AggregatorReport());
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Given the URL returns the content via HTTP GET
|
||||||
|
*
|
||||||
|
* @param requestUrl the URL
|
||||||
|
* @param report the list of errors
|
||||||
|
* @return the content of the downloaded resource
|
||||||
|
* @throws CollectorException when retrying more than maxNumberOfRetry times
|
||||||
|
*/
|
||||||
|
public String getInputSource(final String requestUrl, AggregatorReport report)
|
||||||
|
throws CollectorException {
|
||||||
|
return attemptDownloadAsString(requestUrl, 1, report);
|
||||||
|
}
|
||||||
|
|
||||||
|
private String attemptDownloadAsString(final String requestUrl, final int retryNumber,
|
||||||
|
final AggregatorReport report) throws CollectorException {
|
||||||
|
|
||||||
|
try (InputStream s = attemptDownload(requestUrl, retryNumber, report)) {
|
||||||
|
return IOUtils.toString(s);
|
||||||
|
} catch (IOException e) {
|
||||||
|
log.error(e.getMessage(), e);
|
||||||
|
throw new CollectorException(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private InputStream attemptDownload(final String requestUrl, final int retryNumber,
|
||||||
|
final AggregatorReport report) throws CollectorException, IOException {
|
||||||
|
|
||||||
|
if (retryNumber > getClientParams().getMaxNumberOfRetry()) {
|
||||||
|
final String msg = String
|
||||||
|
.format(
|
||||||
|
"Max number of retries (%s/%s) exceeded, failing.",
|
||||||
|
retryNumber, getClientParams().getMaxNumberOfRetry());
|
||||||
|
log.error(msg);
|
||||||
|
throw new CollectorException(msg);
|
||||||
|
}
|
||||||
|
|
||||||
|
log.info("Request attempt {} [{}]", retryNumber, requestUrl);
|
||||||
|
|
||||||
|
InputStream input = null;
|
||||||
|
|
||||||
|
try {
|
||||||
|
if (getClientParams().getRequestDelay() > 0) {
|
||||||
|
backoffAndSleep(getClientParams().getRequestDelay());
|
||||||
|
}
|
||||||
|
final HttpURLConnection urlConn = (HttpURLConnection) new URL(requestUrl).openConnection();
|
||||||
|
urlConn.setInstanceFollowRedirects(false);
|
||||||
|
urlConn.setReadTimeout(getClientParams().getReadTimeOut() * 1000);
|
||||||
|
urlConn.setConnectTimeout(getClientParams().getConnectTimeOut() * 1000);
|
||||||
|
urlConn.addRequestProperty(HttpHeaders.USER_AGENT, userAgent);
|
||||||
|
|
||||||
|
if (log.isDebugEnabled()) {
|
||||||
|
logHeaderFields(urlConn);
|
||||||
|
}
|
||||||
|
|
||||||
|
int retryAfter = obtainRetryAfter(urlConn.getHeaderFields());
|
||||||
|
String rateLimit = urlConn.getHeaderField(Constants.HTTPHEADER_IETF_DRAFT_RATELIMIT_LIMIT);
|
||||||
|
String rateRemaining = urlConn.getHeaderField(Constants.HTTPHEADER_IETF_DRAFT_RATELIMIT_REMAINING);
|
||||||
|
|
||||||
|
if ((rateLimit != null) && (rateRemaining != null) && (Integer.parseInt(rateRemaining) < 2)) {
|
||||||
|
if (retryAfter > 0) {
|
||||||
|
backoffAndSleep(retryAfter);
|
||||||
|
} else {
|
||||||
|
backoffAndSleep(1000);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (is2xx(urlConn.getResponseCode())) {
|
||||||
|
input = urlConn.getInputStream();
|
||||||
|
responseType = urlConn.getContentType();
|
||||||
|
return input;
|
||||||
|
}
|
||||||
|
if (is3xx(urlConn.getResponseCode())) {
|
||||||
|
// REDIRECTS
|
||||||
|
final String newUrl = obtainNewLocation(urlConn.getHeaderFields());
|
||||||
|
log.info("The requested url has been moved to {}", newUrl);
|
||||||
|
report
|
||||||
|
.put(
|
||||||
|
REPORT_PREFIX + urlConn.getResponseCode(),
|
||||||
|
String.format("Moved to: %s", newUrl));
|
||||||
|
urlConn.disconnect();
|
||||||
|
if (retryAfter > 0) {
|
||||||
|
backoffAndSleep(retryAfter);
|
||||||
|
}
|
||||||
|
return attemptDownload(newUrl, retryNumber + 1, report);
|
||||||
|
}
|
||||||
|
if (is4xx(urlConn.getResponseCode()) || is5xx(urlConn.getResponseCode())) {
|
||||||
|
switch (urlConn.getResponseCode()) {
|
||||||
|
case HttpURLConnection.HTTP_NOT_FOUND:
|
||||||
|
case HttpURLConnection.HTTP_BAD_GATEWAY:
|
||||||
|
case HttpURLConnection.HTTP_UNAVAILABLE:
|
||||||
|
case HttpURLConnection.HTTP_GATEWAY_TIMEOUT:
|
||||||
|
if (retryAfter > 0) {
|
||||||
|
log
|
||||||
|
.warn(
|
||||||
|
"{} - waiting and repeating request after suggested retry-after {} sec.",
|
||||||
|
requestUrl, retryAfter);
|
||||||
|
backoffAndSleep(retryAfter * 1000);
|
||||||
|
} else {
|
||||||
|
log
|
||||||
|
.warn(
|
||||||
|
"{} - waiting and repeating request after default delay of {} sec.",
|
||||||
|
requestUrl, getClientParams().getRetryDelay());
|
||||||
|
backoffAndSleep(retryNumber * getClientParams().getRetryDelay() * 1000);
|
||||||
|
}
|
||||||
|
report.put(REPORT_PREFIX + urlConn.getResponseCode(), requestUrl);
|
||||||
|
urlConn.disconnect();
|
||||||
|
return attemptDownload(requestUrl, retryNumber + 1, report);
|
||||||
|
default:
|
||||||
|
report
|
||||||
|
.put(
|
||||||
|
REPORT_PREFIX + urlConn.getResponseCode(),
|
||||||
|
String
|
||||||
|
.format(
|
||||||
|
"%s Error: %s", requestUrl, urlConn.getResponseMessage()));
|
||||||
|
throw new CollectorException(urlConn.getResponseCode() + " error " + report);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
throw new CollectorException(
|
||||||
|
String
|
||||||
|
.format(
|
||||||
|
"Unexpected status code: %s errors: %s", urlConn.getResponseCode(),
|
||||||
|
MAPPER.writeValueAsString(report)));
|
||||||
|
} catch (MalformedURLException | UnknownHostException e) {
|
||||||
|
log.error(e.getMessage(), e);
|
||||||
|
report.put(e.getClass().getName(), e.getMessage());
|
||||||
|
throw new CollectorException(e.getMessage(), e);
|
||||||
|
} catch (SocketTimeoutException | SocketException e) {
|
||||||
|
log.error(e.getMessage(), e);
|
||||||
|
report.put(e.getClass().getName(), e.getMessage());
|
||||||
|
backoffAndSleep(getClientParams().getRetryDelay() * retryNumber * 1000);
|
||||||
|
return attemptDownload(requestUrl, retryNumber + 1, report);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void logHeaderFields(final HttpURLConnection urlConn) throws IOException {
|
||||||
|
log.debug("StatusCode: {}", urlConn.getResponseMessage());
|
||||||
|
|
||||||
|
for (Map.Entry<String, List<String>> e : urlConn.getHeaderFields().entrySet()) {
|
||||||
|
if (e.getKey() != null) {
|
||||||
|
for (String v : e.getValue()) {
|
||||||
|
log.debug(" key: {} - value: {}", e.getKey(), v);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void backoffAndSleep(int sleepTimeMs) throws CollectorException {
|
||||||
|
log.info("I'm going to sleep for {}ms", sleepTimeMs);
|
||||||
|
try {
|
||||||
|
Thread.sleep(sleepTimeMs);
|
||||||
|
} catch (InterruptedException e) {
|
||||||
|
log.error(e.getMessage(), e);
|
||||||
|
throw new CollectorException(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private int obtainRetryAfter(final Map<String, List<String>> headerMap) {
|
||||||
|
for (String key : headerMap.keySet()) {
|
||||||
|
if ((key != null) && key.equalsIgnoreCase(HttpHeaders.RETRY_AFTER) && (!headerMap.get(key).isEmpty())
|
||||||
|
&& NumberUtils.isCreatable(headerMap.get(key).get(0))) {
|
||||||
|
return Integer.parseInt(headerMap.get(key).get(0)) + 10;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
private String obtainNewLocation(final Map<String, List<String>> headerMap) throws CollectorException {
|
||||||
|
for (String key : headerMap.keySet()) {
|
||||||
|
if ((key != null) && key.equalsIgnoreCase(HttpHeaders.LOCATION) && (headerMap.get(key).size() > 0)) {
|
||||||
|
return headerMap.get(key).get(0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
throw new CollectorException("The requested url has been MOVED, but 'location' param is MISSING");
|
||||||
|
}
|
||||||
|
|
||||||
|
private boolean is2xx(final int statusCode) {
|
||||||
|
return statusCode >= 200 && statusCode <= 299;
|
||||||
|
}
|
||||||
|
|
||||||
|
private boolean is4xx(final int statusCode) {
|
||||||
|
return statusCode >= 400 && statusCode <= 499;
|
||||||
|
}
|
||||||
|
|
||||||
|
private boolean is3xx(final int statusCode) {
|
||||||
|
return statusCode >= 300 && statusCode <= 399;
|
||||||
|
}
|
||||||
|
|
||||||
|
private boolean is5xx(final int statusCode) {
|
||||||
|
return statusCode >= 500 && statusCode <= 599;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getResponseType() {
|
||||||
|
return responseType;
|
||||||
|
}
|
||||||
|
|
||||||
|
public HttpClientParams getClientParams() {
|
||||||
|
return clientParams;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setClientParams(HttpClientParams clientParams) {
|
||||||
|
this.clientParams = clientParams;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,75 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.common.rest;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
|
import org.apache.commons.io.IOUtils;
|
||||||
|
import org.apache.http.client.methods.HttpGet;
|
||||||
|
import org.apache.http.client.methods.HttpPost;
|
||||||
|
import org.apache.http.client.methods.HttpUriRequest;
|
||||||
|
import org.apache.http.entity.StringEntity;
|
||||||
|
import org.apache.http.impl.client.CloseableHttpClient;
|
||||||
|
import org.apache.http.impl.client.HttpClients;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
|
||||||
|
public class DNetRestClient {
|
||||||
|
|
||||||
|
private static final Logger log = LoggerFactory.getLogger(DNetRestClient.class);
|
||||||
|
|
||||||
|
private static final ObjectMapper mapper = new ObjectMapper();
|
||||||
|
|
||||||
|
private DNetRestClient() {
|
||||||
|
}
|
||||||
|
|
||||||
|
public static <T> T doGET(final String url, Class<T> clazz) throws Exception {
|
||||||
|
final HttpGet httpGet = new HttpGet(url);
|
||||||
|
return doHTTPRequest(httpGet, clazz);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static String doGET(final String url) throws IOException {
|
||||||
|
final HttpGet httpGet = new HttpGet(url);
|
||||||
|
return doHTTPRequest(httpGet);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static <V> String doPOST(final String url, V objParam) throws IOException {
|
||||||
|
final HttpPost httpPost = new HttpPost(url);
|
||||||
|
|
||||||
|
if (objParam != null) {
|
||||||
|
final StringEntity entity = new StringEntity(mapper.writeValueAsString(objParam));
|
||||||
|
httpPost.setEntity(entity);
|
||||||
|
httpPost.setHeader("Accept", "application/json");
|
||||||
|
httpPost.setHeader("Content-type", "application/json");
|
||||||
|
}
|
||||||
|
return doHTTPRequest(httpPost);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static <T, V> T doPOST(final String url, V objParam, Class<T> clazz) throws IOException {
|
||||||
|
return mapper.readValue(doPOST(url, objParam), clazz);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static String doHTTPRequest(final HttpUriRequest r) throws IOException {
|
||||||
|
try (CloseableHttpClient client = HttpClients.createDefault()) {
|
||||||
|
|
||||||
|
log.info("performing HTTP request, method {} on URI {}", r.getMethod(), r.getURI().toString());
|
||||||
|
log
|
||||||
|
.info(
|
||||||
|
"request headers: {}",
|
||||||
|
Arrays
|
||||||
|
.asList(r.getAllHeaders())
|
||||||
|
.stream()
|
||||||
|
.map(h -> h.getName() + ":" + h.getValue())
|
||||||
|
.collect(Collectors.joining(",")));
|
||||||
|
|
||||||
|
return IOUtils.toString(client.execute(r).getEntity().getContent());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static <T> T doHTTPRequest(final HttpUriRequest r, Class<T> clazz) throws Exception {
|
||||||
|
return mapper.readValue(doHTTPRequest(r), clazz);
|
||||||
|
}
|
||||||
|
}
|
|
@ -1,5 +1,5 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.oa.graph.raw.common;
|
package eu.dnetlib.dhp.common.vocabulary;
|
||||||
|
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
|
@ -10,8 +10,8 @@ import org.apache.commons.lang3.StringUtils;
|
||||||
|
|
||||||
import com.google.common.collect.Maps;
|
import com.google.common.collect.Maps;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.OafMapperUtils;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.Qualifier;
|
import eu.dnetlib.dhp.schema.oaf.Qualifier;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
|
||||||
|
|
||||||
public class Vocabulary implements Serializable {
|
public class Vocabulary implements Serializable {
|
||||||
|
|
||||||
|
@ -46,7 +46,7 @@ public class Vocabulary implements Serializable {
|
||||||
}
|
}
|
||||||
|
|
||||||
public VocabularyTerm getTerm(final String id) {
|
public VocabularyTerm getTerm(final String id) {
|
||||||
return Optional.ofNullable(id).map(s -> s.toLowerCase()).map(s -> terms.get(s)).orElse(null);
|
return Optional.ofNullable(id).map(String::toLowerCase).map(terms::get).orElse(null);
|
||||||
}
|
}
|
||||||
|
|
||||||
protected void addTerm(final String id, final String name) {
|
protected void addTerm(final String id, final String name) {
|
||||||
|
@ -81,7 +81,6 @@ public class Vocabulary implements Serializable {
|
||||||
.ofNullable(getTermBySynonym(syn))
|
.ofNullable(getTermBySynonym(syn))
|
||||||
.map(term -> getTermAsQualifier(term.getId()))
|
.map(term -> getTermAsQualifier(term.getId()))
|
||||||
.orElse(null);
|
.orElse(null);
|
||||||
// .orElse(OafMapperUtils.unknown(getId(), getName()));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
|
@ -1,5 +1,5 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.oa.graph.raw.common;
|
package eu.dnetlib.dhp.common.vocabulary;
|
||||||
|
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
@ -7,8 +7,8 @@ import java.util.stream.Collectors;
|
||||||
|
|
||||||
import org.apache.commons.lang3.StringUtils;
|
import org.apache.commons.lang3.StringUtils;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.OafMapperUtils;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.Qualifier;
|
import eu.dnetlib.dhp.schema.oaf.Qualifier;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
|
||||||
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
|
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
|
||||||
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
|
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
|
||||||
|
|
||||||
|
@ -46,7 +46,6 @@ public class VocabularyGroup implements Serializable {
|
||||||
}
|
}
|
||||||
|
|
||||||
vocs.addTerm(vocId, termId, termName);
|
vocs.addTerm(vocId, termId, termName);
|
||||||
// vocs.addSynonyms(vocId, termId, termId);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -58,7 +57,6 @@ public class VocabularyGroup implements Serializable {
|
||||||
final String syn = arr[2].trim();
|
final String syn = arr[2].trim();
|
||||||
|
|
||||||
vocs.addSynonyms(vocId, termId, syn);
|
vocs.addSynonyms(vocId, termId, syn);
|
||||||
// vocs.addSynonyms(vocId, termId, termId);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -67,6 +65,10 @@ public class VocabularyGroup implements Serializable {
|
||||||
|
|
||||||
private final Map<String, Vocabulary> vocs = new HashMap<>();
|
private final Map<String, Vocabulary> vocs = new HashMap<>();
|
||||||
|
|
||||||
|
public Set<String> vocabularyNames() {
|
||||||
|
return vocs.keySet();
|
||||||
|
}
|
||||||
|
|
||||||
public void addVocabulary(final String id, final String name) {
|
public void addVocabulary(final String id, final String name) {
|
||||||
vocs.put(id.toLowerCase(), new Vocabulary(id, name));
|
vocs.put(id.toLowerCase(), new Vocabulary(id, name));
|
||||||
}
|
}
|
||||||
|
@ -94,7 +96,7 @@ public class VocabularyGroup implements Serializable {
|
||||||
.getTerms()
|
.getTerms()
|
||||||
.values()
|
.values()
|
||||||
.stream()
|
.stream()
|
||||||
.map(t -> t.getId())
|
.map(VocabularyTerm::getId)
|
||||||
.collect(Collectors.toCollection(HashSet::new));
|
.collect(Collectors.toCollection(HashSet::new));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -118,7 +120,31 @@ public class VocabularyGroup implements Serializable {
|
||||||
return vocs.get(vocId.toLowerCase()).getSynonymAsQualifier(syn);
|
return vocs.get(vocId.toLowerCase()).getSynonymAsQualifier(syn);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* getSynonymAsQualifierCaseSensitive
|
||||||
|
*
|
||||||
|
* refelects the situation to check caseSensitive vocabulary
|
||||||
|
*/
|
||||||
|
public Qualifier getSynonymAsQualifierCaseSensitive(final String vocId, final String syn) {
|
||||||
|
if (StringUtils.isBlank(vocId)) {
|
||||||
|
return OafMapperUtils.unknown("", "");
|
||||||
|
}
|
||||||
|
return vocs.get(vocId).getSynonymAsQualifier(syn);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* termExists
|
||||||
|
*
|
||||||
|
* two methods: without and with caseSensitive check
|
||||||
|
*/
|
||||||
public boolean termExists(final String vocId, final String id) {
|
public boolean termExists(final String vocId, final String id) {
|
||||||
|
return termExists(vocId, id, Boolean.FALSE);
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean termExists(final String vocId, final String id, final Boolean caseSensitive) {
|
||||||
|
if (Boolean.TRUE.equals(caseSensitive)) {
|
||||||
|
return vocabularyExists(vocId) && vocs.get(vocId).termExists(id);
|
||||||
|
}
|
||||||
return vocabularyExists(vocId) && vocs.get(vocId.toLowerCase()).termExists(id);
|
return vocabularyExists(vocId) && vocs.get(vocId.toLowerCase()).termExists(id);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -126,16 +152,19 @@ public class VocabularyGroup implements Serializable {
|
||||||
return Optional
|
return Optional
|
||||||
.ofNullable(vocId)
|
.ofNullable(vocId)
|
||||||
.map(String::toLowerCase)
|
.map(String::toLowerCase)
|
||||||
.map(id -> vocs.containsKey(id))
|
.map(vocs::containsKey)
|
||||||
.orElse(false);
|
.orElse(false);
|
||||||
}
|
}
|
||||||
|
|
||||||
private void addSynonyms(final String vocId, final String termId, final String syn) {
|
private void addSynonyms(final String vocId, final String termId, final String syn) {
|
||||||
String id = Optional
|
String id = Optional
|
||||||
.ofNullable(vocId)
|
.ofNullable(vocId)
|
||||||
.map(s -> s.toLowerCase())
|
.map(String::toLowerCase)
|
||||||
.orElseThrow(
|
.orElseThrow(
|
||||||
() -> new IllegalArgumentException(String.format("empty vocabulary id for [term:%s, synonym:%s]")));
|
() -> new IllegalArgumentException(
|
||||||
|
String
|
||||||
|
.format(
|
||||||
|
"empty vocabulary id for [term:%s, synonym:%s]", termId, syn)));
|
||||||
Optional
|
Optional
|
||||||
.ofNullable(vocs.get(id))
|
.ofNullable(vocs.get(id))
|
||||||
.orElseThrow(() -> new IllegalArgumentException("missing vocabulary id: " + vocId))
|
.orElseThrow(() -> new IllegalArgumentException("missing vocabulary id: " + vocId))
|
|
@ -1,5 +1,5 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.oa.graph.raw.common;
|
package eu.dnetlib.dhp.common.vocabulary;
|
||||||
|
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
|
|
|
@ -0,0 +1,63 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.message;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
import java.util.LinkedHashMap;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
public class Message implements Serializable {
|
||||||
|
|
||||||
|
private static final long serialVersionUID = 401753881204524893L;
|
||||||
|
|
||||||
|
public static final String CURRENT_PARAM = "current";
|
||||||
|
public static final String TOTAL_PARAM = "total";
|
||||||
|
|
||||||
|
private MessageType messageType;
|
||||||
|
|
||||||
|
private String workflowId;
|
||||||
|
|
||||||
|
private Map<String, String> body;
|
||||||
|
|
||||||
|
public Message() {
|
||||||
|
}
|
||||||
|
|
||||||
|
public Message(final MessageType messageType, final String workflowId) {
|
||||||
|
this(messageType, workflowId, new LinkedHashMap<>());
|
||||||
|
}
|
||||||
|
|
||||||
|
public Message(final MessageType messageType, final String workflowId, final Map<String, String> body) {
|
||||||
|
this.messageType = messageType;
|
||||||
|
this.workflowId = workflowId;
|
||||||
|
this.body = body;
|
||||||
|
}
|
||||||
|
|
||||||
|
public MessageType getMessageType() {
|
||||||
|
return messageType;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setMessageType(MessageType messageType) {
|
||||||
|
this.messageType = messageType;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getWorkflowId() {
|
||||||
|
return workflowId;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setWorkflowId(final String workflowId) {
|
||||||
|
this.workflowId = workflowId;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Map<String, String> getBody() {
|
||||||
|
return body;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setBody(final Map<String, String> body) {
|
||||||
|
this.body = body;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return String.format("Message [type=%s, workflowId=%s, body=%s]", messageType, workflowId, body);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,94 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.message;
|
||||||
|
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.concurrent.ExecutorService;
|
||||||
|
import java.util.concurrent.Executors;
|
||||||
|
|
||||||
|
import org.apache.http.client.config.RequestConfig;
|
||||||
|
import org.apache.http.client.methods.CloseableHttpResponse;
|
||||||
|
import org.apache.http.client.methods.HttpPut;
|
||||||
|
import org.apache.http.entity.ContentType;
|
||||||
|
import org.apache.http.entity.StringEntity;
|
||||||
|
import org.apache.http.impl.client.CloseableHttpClient;
|
||||||
|
import org.apache.http.impl.client.HttpClients;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.core.JsonProcessingException;
|
||||||
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
|
||||||
|
public class MessageSender {
|
||||||
|
|
||||||
|
private static final Logger log = LoggerFactory.getLogger(MessageSender.class);
|
||||||
|
|
||||||
|
private static final int SOCKET_TIMEOUT_MS = 2000;
|
||||||
|
|
||||||
|
private static final int CONNECTION_REQUEST_TIMEOUT_MS = 2000;
|
||||||
|
|
||||||
|
private static final int CONNTECTION_TIMEOUT_MS = 2000;
|
||||||
|
|
||||||
|
private final ObjectMapper objectMapper = new ObjectMapper();
|
||||||
|
|
||||||
|
private final String dnetMessageEndpoint;
|
||||||
|
|
||||||
|
private final String workflowId;
|
||||||
|
|
||||||
|
private final ExecutorService executorService = Executors.newCachedThreadPool();
|
||||||
|
|
||||||
|
public MessageSender(final String dnetMessageEndpoint, final String workflowId) {
|
||||||
|
this.workflowId = workflowId;
|
||||||
|
this.dnetMessageEndpoint = dnetMessageEndpoint;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void sendMessage(final Message message) {
|
||||||
|
executorService.submit(() -> _sendMessage(message));
|
||||||
|
}
|
||||||
|
|
||||||
|
public void sendMessage(final Long current, final Long total) {
|
||||||
|
sendMessage(createOngoingMessage(current, total));
|
||||||
|
}
|
||||||
|
|
||||||
|
public void sendReport(final Map<String, String> report) {
|
||||||
|
sendMessage(new Message(MessageType.REPORT, workflowId, report));
|
||||||
|
}
|
||||||
|
|
||||||
|
private Message createOngoingMessage(final Long current, final Long total) {
|
||||||
|
final Message m = new Message(MessageType.ONGOING, workflowId);
|
||||||
|
m.getBody().put(Message.CURRENT_PARAM, current.toString());
|
||||||
|
if (total != null) {
|
||||||
|
m.getBody().put(Message.TOTAL_PARAM, total.toString());
|
||||||
|
}
|
||||||
|
return m;
|
||||||
|
}
|
||||||
|
|
||||||
|
private void _sendMessage(final Message message) {
|
||||||
|
try {
|
||||||
|
final String json = objectMapper.writeValueAsString(message);
|
||||||
|
|
||||||
|
final HttpPut req = new HttpPut(dnetMessageEndpoint);
|
||||||
|
req.setEntity(new StringEntity(json, ContentType.APPLICATION_JSON));
|
||||||
|
|
||||||
|
final RequestConfig requestConfig = RequestConfig
|
||||||
|
.custom()
|
||||||
|
.setConnectTimeout(CONNTECTION_TIMEOUT_MS)
|
||||||
|
.setConnectionRequestTimeout(CONNECTION_REQUEST_TIMEOUT_MS)
|
||||||
|
.setSocketTimeout(SOCKET_TIMEOUT_MS)
|
||||||
|
.build();
|
||||||
|
|
||||||
|
try (final CloseableHttpClient client = HttpClients
|
||||||
|
.custom()
|
||||||
|
.setDefaultRequestConfig(requestConfig)
|
||||||
|
.build();
|
||||||
|
final CloseableHttpResponse response = client.execute(req)) {
|
||||||
|
log.debug("Sent Message to " + dnetMessageEndpoint);
|
||||||
|
log.debug("MESSAGE:" + message);
|
||||||
|
} catch (final Throwable e) {
|
||||||
|
log.error("Error sending message to " + dnetMessageEndpoint + ", message content: " + message, e);
|
||||||
|
}
|
||||||
|
} catch (final JsonProcessingException e) {
|
||||||
|
log.error("Error sending message to " + dnetMessageEndpoint + ", message content: " + message, e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,21 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.message;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
import java.util.Optional;
|
||||||
|
|
||||||
|
import org.apache.commons.lang3.StringUtils;
|
||||||
|
|
||||||
|
public enum MessageType implements Serializable {
|
||||||
|
|
||||||
|
ONGOING, REPORT;
|
||||||
|
|
||||||
|
public MessageType from(String value) {
|
||||||
|
return Optional
|
||||||
|
.ofNullable(value)
|
||||||
|
.map(StringUtils::upperCase)
|
||||||
|
.map(MessageType::valueOf)
|
||||||
|
.orElseThrow(() -> new IllegalArgumentException("unknown message type: " + value));
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -1,121 +0,0 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.model.mdstore;
|
|
||||||
|
|
||||||
import java.io.Serializable;
|
|
||||||
|
|
||||||
import eu.dnetlib.dhp.utils.DHPUtils;
|
|
||||||
|
|
||||||
/** This class models a record inside the new Metadata store collection on HDFS * */
|
|
||||||
public class MetadataRecord implements Serializable {
|
|
||||||
|
|
||||||
/** The D-Net Identifier associated to the record */
|
|
||||||
private String id;
|
|
||||||
|
|
||||||
/** The original Identifier of the record */
|
|
||||||
private String originalId;
|
|
||||||
|
|
||||||
/** The encoding of the record, should be JSON or XML */
|
|
||||||
private String encoding;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* The information about the provenance of the record see @{@link Provenance} for the model of this information
|
|
||||||
*/
|
|
||||||
private Provenance provenance;
|
|
||||||
|
|
||||||
/** The content of the metadata */
|
|
||||||
private String body;
|
|
||||||
|
|
||||||
/** the date when the record has been stored */
|
|
||||||
private long dateOfCollection;
|
|
||||||
|
|
||||||
/** the date when the record has been stored */
|
|
||||||
private long dateOfTransformation;
|
|
||||||
|
|
||||||
public MetadataRecord() {
|
|
||||||
this.dateOfCollection = System.currentTimeMillis();
|
|
||||||
}
|
|
||||||
|
|
||||||
public MetadataRecord(
|
|
||||||
String originalId,
|
|
||||||
String encoding,
|
|
||||||
Provenance provenance,
|
|
||||||
String body,
|
|
||||||
long dateOfCollection) {
|
|
||||||
|
|
||||||
this.originalId = originalId;
|
|
||||||
this.encoding = encoding;
|
|
||||||
this.provenance = provenance;
|
|
||||||
this.body = body;
|
|
||||||
this.dateOfCollection = dateOfCollection;
|
|
||||||
this.id = DHPUtils.generateIdentifier(originalId, this.provenance.getNsPrefix());
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getId() {
|
|
||||||
return id;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setId(String id) {
|
|
||||||
this.id = id;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getOriginalId() {
|
|
||||||
return originalId;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setOriginalId(String originalId) {
|
|
||||||
this.originalId = originalId;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getEncoding() {
|
|
||||||
return encoding;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setEncoding(String encoding) {
|
|
||||||
this.encoding = encoding;
|
|
||||||
}
|
|
||||||
|
|
||||||
public Provenance getProvenance() {
|
|
||||||
return provenance;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setProvenance(Provenance provenance) {
|
|
||||||
this.provenance = provenance;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getBody() {
|
|
||||||
return body;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setBody(String body) {
|
|
||||||
this.body = body;
|
|
||||||
}
|
|
||||||
|
|
||||||
public long getDateOfCollection() {
|
|
||||||
return dateOfCollection;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setDateOfCollection(long dateOfCollection) {
|
|
||||||
this.dateOfCollection = dateOfCollection;
|
|
||||||
}
|
|
||||||
|
|
||||||
public long getDateOfTransformation() {
|
|
||||||
return dateOfTransformation;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setDateOfTransformation(long dateOfTransformation) {
|
|
||||||
this.dateOfTransformation = dateOfTransformation;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean equals(Object o) {
|
|
||||||
if (!(o instanceof MetadataRecord)) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
return ((MetadataRecord) o).getId().equalsIgnoreCase(id);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int hashCode() {
|
|
||||||
return id.hashCode();
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,52 +0,0 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.model.mdstore;
|
|
||||||
|
|
||||||
import java.io.Serializable;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @author Sandro La Bruzzo
|
|
||||||
* <p>
|
|
||||||
* Provenace class models the provenance of the record in the metadataStore It contains the identifier and the
|
|
||||||
* name of the datasource that gives the record
|
|
||||||
*/
|
|
||||||
public class Provenance implements Serializable {
|
|
||||||
|
|
||||||
private String datasourceId;
|
|
||||||
|
|
||||||
private String datasourceName;
|
|
||||||
|
|
||||||
private String nsPrefix;
|
|
||||||
|
|
||||||
public Provenance() {
|
|
||||||
}
|
|
||||||
|
|
||||||
public Provenance(String datasourceId, String datasourceName, String nsPrefix) {
|
|
||||||
this.datasourceId = datasourceId;
|
|
||||||
this.datasourceName = datasourceName;
|
|
||||||
this.nsPrefix = nsPrefix;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getDatasourceId() {
|
|
||||||
return datasourceId;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setDatasourceId(String datasourceId) {
|
|
||||||
this.datasourceId = datasourceId;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getDatasourceName() {
|
|
||||||
return datasourceName;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setDatasourceName(String datasourceName) {
|
|
||||||
this.datasourceName = datasourceName;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getNsPrefix() {
|
|
||||||
return nsPrefix;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setNsPrefix(String nsPrefix) {
|
|
||||||
this.nsPrefix = nsPrefix;
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -4,7 +4,6 @@ package eu.dnetlib.dhp.oa.merge;
|
||||||
import java.text.Normalizer;
|
import java.text.Normalizer;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
import java.util.stream.Stream;
|
|
||||||
|
|
||||||
import org.apache.commons.lang3.StringUtils;
|
import org.apache.commons.lang3.StringUtils;
|
||||||
|
|
||||||
|
@ -19,6 +18,9 @@ public class AuthorMerger {
|
||||||
|
|
||||||
private static final Double THRESHOLD = 0.95;
|
private static final Double THRESHOLD = 0.95;
|
||||||
|
|
||||||
|
private AuthorMerger() {
|
||||||
|
}
|
||||||
|
|
||||||
public static List<Author> merge(List<List<Author>> authors) {
|
public static List<Author> merge(List<List<Author>> authors) {
|
||||||
|
|
||||||
authors.sort((o1, o2) -> -Integer.compare(countAuthorsPids(o1), countAuthorsPids(o2)));
|
authors.sort((o1, o2) -> -Integer.compare(countAuthorsPids(o1), countAuthorsPids(o2)));
|
||||||
|
@ -36,7 +38,8 @@ public class AuthorMerger {
|
||||||
public static List<Author> mergeAuthor(final List<Author> a, final List<Author> b, Double threshold) {
|
public static List<Author> mergeAuthor(final List<Author> a, final List<Author> b, Double threshold) {
|
||||||
int pa = countAuthorsPids(a);
|
int pa = countAuthorsPids(a);
|
||||||
int pb = countAuthorsPids(b);
|
int pb = countAuthorsPids(b);
|
||||||
List<Author> base, enrich;
|
List<Author> base;
|
||||||
|
List<Author> enrich;
|
||||||
int sa = authorsSize(a);
|
int sa = authorsSize(a);
|
||||||
int sb = authorsSize(b);
|
int sb = authorsSize(b);
|
||||||
|
|
||||||
|
@ -62,22 +65,24 @@ public class AuthorMerger {
|
||||||
// <pidComparableString, Author> (if an Author has more than 1 pid, it appears 2 times in the list)
|
// <pidComparableString, Author> (if an Author has more than 1 pid, it appears 2 times in the list)
|
||||||
final Map<String, Author> basePidAuthorMap = base
|
final Map<String, Author> basePidAuthorMap = base
|
||||||
.stream()
|
.stream()
|
||||||
.filter(a -> a.getPid() != null && a.getPid().size() > 0)
|
.filter(a -> a.getPid() != null && !a.getPid().isEmpty())
|
||||||
.flatMap(
|
.flatMap(
|
||||||
a -> a
|
a -> a
|
||||||
.getPid()
|
.getPid()
|
||||||
.stream()
|
.stream()
|
||||||
|
.filter(Objects::nonNull)
|
||||||
.map(p -> new Tuple2<>(pidToComparableString(p), a)))
|
.map(p -> new Tuple2<>(pidToComparableString(p), a)))
|
||||||
.collect(Collectors.toMap(Tuple2::_1, Tuple2::_2, (x1, x2) -> x1));
|
.collect(Collectors.toMap(Tuple2::_1, Tuple2::_2, (x1, x2) -> x1));
|
||||||
|
|
||||||
// <pid, Author> (list of pid that are missing in the other list)
|
// <pid, Author> (list of pid that are missing in the other list)
|
||||||
final List<Tuple2<StructuredProperty, Author>> pidToEnrich = enrich
|
final List<Tuple2<StructuredProperty, Author>> pidToEnrich = enrich
|
||||||
.stream()
|
.stream()
|
||||||
.filter(a -> a.getPid() != null && a.getPid().size() > 0)
|
.filter(a -> a.getPid() != null && !a.getPid().isEmpty())
|
||||||
.flatMap(
|
.flatMap(
|
||||||
a -> a
|
a -> a
|
||||||
.getPid()
|
.getPid()
|
||||||
.stream()
|
.stream()
|
||||||
|
.filter(Objects::nonNull)
|
||||||
.filter(p -> !basePidAuthorMap.containsKey(pidToComparableString(p)))
|
.filter(p -> !basePidAuthorMap.containsKey(pidToComparableString(p)))
|
||||||
.map(p -> new Tuple2<>(p, a)))
|
.map(p -> new Tuple2<>(p, a)))
|
||||||
.collect(Collectors.toList());
|
.collect(Collectors.toList());
|
||||||
|
@ -115,11 +120,17 @@ public class AuthorMerger {
|
||||||
}
|
}
|
||||||
|
|
||||||
public static String pidToComparableString(StructuredProperty pid) {
|
public static String pidToComparableString(StructuredProperty pid) {
|
||||||
|
<<<<<<< HEAD
|
||||||
if (pid == null)
|
if (pid == null)
|
||||||
return "";
|
return "";
|
||||||
return (pid.getQualifier() != null
|
return (pid.getQualifier() != null
|
||||||
? pid.getQualifier().getClassid() != null ? pid.getQualifier().getClassid().toLowerCase() : ""
|
? pid.getQualifier().getClassid() != null ? pid.getQualifier().getClassid().toLowerCase() : ""
|
||||||
: "")
|
: "")
|
||||||
|
=======
|
||||||
|
final String classid = pid.getQualifier().getClassid() != null ? pid.getQualifier().getClassid().toLowerCase()
|
||||||
|
: "";
|
||||||
|
return (pid.getQualifier() != null ? classid : "")
|
||||||
|
>>>>>>> upstream/master
|
||||||
+ (pid.getValue() != null ? pid.getValue().toLowerCase() : "");
|
+ (pid.getValue() != null ? pid.getValue().toLowerCase() : "");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -152,7 +163,7 @@ public class AuthorMerger {
|
||||||
}
|
}
|
||||||
|
|
||||||
private static boolean hasPid(Author a) {
|
private static boolean hasPid(Author a) {
|
||||||
if (a == null || a.getPid() == null || a.getPid().size() == 0)
|
if (a == null || a.getPid() == null || a.getPid().isEmpty())
|
||||||
return false;
|
return false;
|
||||||
return a.getPid().stream().anyMatch(p -> p != null && StringUtils.isNotBlank(p.getValue()));
|
return a.getPid().stream().anyMatch(p -> p != null && StringUtils.isNotBlank(p.getValue()));
|
||||||
}
|
}
|
||||||
|
@ -161,7 +172,10 @@ public class AuthorMerger {
|
||||||
if (StringUtils.isNotBlank(author.getSurname())) {
|
if (StringUtils.isNotBlank(author.getSurname())) {
|
||||||
return new Person(author.getSurname() + ", " + author.getName(), false);
|
return new Person(author.getSurname() + ", " + author.getName(), false);
|
||||||
} else {
|
} else {
|
||||||
return new Person(author.getFullname(), false);
|
if (StringUtils.isNotBlank(author.getFullname()))
|
||||||
|
return new Person(author.getFullname(), false);
|
||||||
|
else
|
||||||
|
return new Person("", false);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -12,6 +12,9 @@ import com.ximpleware.VTDNav;
|
||||||
/** Created by sandro on 9/29/16. */
|
/** Created by sandro on 9/29/16. */
|
||||||
public class VtdUtilityParser {
|
public class VtdUtilityParser {
|
||||||
|
|
||||||
|
private VtdUtilityParser() {
|
||||||
|
}
|
||||||
|
|
||||||
public static List<Node> getTextValuesWithAttributes(
|
public static List<Node> getTextValuesWithAttributes(
|
||||||
final AutoPilot ap, final VTDNav vn, final String xpath, final List<String> attributes)
|
final AutoPilot ap, final VTDNav vn, final String xpath, final List<String> attributes)
|
||||||
throws VtdException {
|
throws VtdException {
|
||||||
|
|
|
@ -1,49 +0,0 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.schema.oaf;
|
|
||||||
|
|
||||||
import java.util.Comparator;
|
|
||||||
|
|
||||||
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
|
||||||
|
|
||||||
public class ResultTypeComparator implements Comparator<Result> {
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int compare(Result left, Result right) {
|
|
||||||
|
|
||||||
if (left == null && right == null)
|
|
||||||
return 0;
|
|
||||||
if (left == null)
|
|
||||||
return 1;
|
|
||||||
if (right == null)
|
|
||||||
return -1;
|
|
||||||
|
|
||||||
String lClass = left.getResulttype().getClassid();
|
|
||||||
String rClass = right.getResulttype().getClassid();
|
|
||||||
|
|
||||||
if (lClass.equals(rClass))
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
if (lClass.equals(ModelConstants.PUBLICATION_RESULTTYPE_CLASSID))
|
|
||||||
return -1;
|
|
||||||
if (rClass.equals(ModelConstants.PUBLICATION_RESULTTYPE_CLASSID))
|
|
||||||
return 1;
|
|
||||||
|
|
||||||
if (lClass.equals(ModelConstants.DATASET_RESULTTYPE_CLASSID))
|
|
||||||
return -1;
|
|
||||||
if (rClass.equals(ModelConstants.DATASET_RESULTTYPE_CLASSID))
|
|
||||||
return 1;
|
|
||||||
|
|
||||||
if (lClass.equals(ModelConstants.SOFTWARE_RESULTTYPE_CLASSID))
|
|
||||||
return -1;
|
|
||||||
if (rClass.equals(ModelConstants.SOFTWARE_RESULTTYPE_CLASSID))
|
|
||||||
return 1;
|
|
||||||
|
|
||||||
if (lClass.equals(ModelConstants.ORP_RESULTTYPE_CLASSID))
|
|
||||||
return -1;
|
|
||||||
if (rClass.equals(ModelConstants.ORP_RESULTTYPE_CLASSID))
|
|
||||||
return 1;
|
|
||||||
|
|
||||||
// Else (but unlikely), lexicographical ordering will do.
|
|
||||||
return lClass.compareTo(rClass);
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,33 +1,37 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.oa.graph.clean;
|
package eu.dnetlib.dhp.schema.oaf.utils;
|
||||||
|
|
||||||
|
import java.time.LocalDate;
|
||||||
|
import java.time.ZoneId;
|
||||||
|
import java.time.format.DateTimeFormatter;
|
||||||
|
import java.time.format.DateTimeParseException;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
import java.util.function.Function;
|
import java.util.function.Function;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
|
import java.util.stream.Stream;
|
||||||
|
|
||||||
import org.apache.commons.lang3.StringUtils;
|
import org.apache.commons.lang3.StringUtils;
|
||||||
|
|
||||||
import com.clearspring.analytics.util.Lists;
|
import com.github.sisyphsu.dateparser.DateParserUtils;
|
||||||
|
import com.google.common.collect.Lists;
|
||||||
|
import com.google.common.collect.Sets;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.oa.graph.raw.AbstractMdRecordToOafMapper;
|
|
||||||
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||||
|
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
||||||
import eu.dnetlib.dhp.schema.oaf.*;
|
import eu.dnetlib.dhp.schema.oaf.*;
|
||||||
|
import me.xuender.unidecode.Unidecode;
|
||||||
|
|
||||||
public class CleaningFunctions {
|
public class GraphCleaningFunctions extends CleaningFunctions {
|
||||||
|
|
||||||
public static final String DOI_PREFIX_REGEX = "^10\\.";
|
|
||||||
|
|
||||||
public static final String ORCID_CLEANING_REGEX = ".*([0-9]{4}).*[-–—−=].*([0-9]{4}).*[-–—−=].*([0-9]{4}).*[-–—−=].*([0-9x]{4})";
|
public static final String ORCID_CLEANING_REGEX = ".*([0-9]{4}).*[-–—−=].*([0-9]{4}).*[-–—−=].*([0-9]{4}).*[-–—−=].*([0-9x]{4})";
|
||||||
public static final int ORCID_LEN = 19;
|
public static final int ORCID_LEN = 19;
|
||||||
|
|
||||||
public static final String CLEANING_REGEX = "(?:\\n|\\r|\\t)";
|
public static final String CLEANING_REGEX = "(?:\\n|\\r|\\t)";
|
||||||
|
public static final String INVALID_AUTHOR_REGEX = ".*deactivated.*";
|
||||||
|
|
||||||
public static final Set<String> PID_BLACKLIST = new HashSet<>();
|
public static final String TITLE_TEST = "test";
|
||||||
|
public static final String TITLE_FILTER_REGEX = String.format("(%s)|\\W|\\d", TITLE_TEST);
|
||||||
|
|
||||||
static {
|
public static final int TITLE_FILTER_RESIDUAL_LENGTH = 5;
|
||||||
PID_BLACKLIST.add("none");
|
|
||||||
PID_BLACKLIST.add("na");
|
|
||||||
}
|
|
||||||
|
|
||||||
public static <T extends Oaf> T fixVocabularyNames(T value) {
|
public static <T extends Oaf> T fixVocabularyNames(T value) {
|
||||||
if (value instanceof Datasource) {
|
if (value instanceof Datasource) {
|
||||||
|
@ -59,23 +63,17 @@ public class CleaningFunctions {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (Objects.nonNull(r.getAuthor())) {
|
if (Objects.nonNull(r.getAuthor())) {
|
||||||
r
|
r.getAuthor().stream().filter(Objects::nonNull).forEach(a -> {
|
||||||
.getAuthor()
|
if (Objects.nonNull(a.getPid())) {
|
||||||
.stream()
|
a.getPid().stream().filter(Objects::nonNull).forEach(p -> {
|
||||||
.filter(Objects::nonNull)
|
fixVocabName(p.getQualifier(), ModelConstants.DNET_PID_TYPES);
|
||||||
.forEach(a -> {
|
});
|
||||||
if (Objects.nonNull(a.getPid())) {
|
}
|
||||||
a
|
});
|
||||||
.getPid()
|
|
||||||
.stream()
|
|
||||||
.filter(Objects::nonNull)
|
|
||||||
.forEach(p -> fixVocabName(p.getQualifier(), ModelConstants.DNET_PID_TYPES));
|
|
||||||
}
|
|
||||||
});
|
|
||||||
}
|
}
|
||||||
if (value instanceof Publication) {
|
if (value instanceof Publication) {
|
||||||
|
|
||||||
} else if (value instanceof eu.dnetlib.dhp.schema.oaf.Dataset) {
|
} else if (value instanceof Dataset) {
|
||||||
|
|
||||||
} else if (value instanceof OtherResearchProduct) {
|
} else if (value instanceof OtherResearchProduct) {
|
||||||
|
|
||||||
|
@ -87,7 +85,37 @@ public class CleaningFunctions {
|
||||||
return value;
|
return value;
|
||||||
}
|
}
|
||||||
|
|
||||||
public static <T extends Oaf> T fixDefaults(T value) {
|
public static <T extends Oaf> boolean filter(T value) {
|
||||||
|
if (value instanceof Datasource) {
|
||||||
|
// nothing to evaluate here
|
||||||
|
} else if (value instanceof Project) {
|
||||||
|
// nothing to evaluate here
|
||||||
|
} else if (value instanceof Organization) {
|
||||||
|
// nothing to evaluate here
|
||||||
|
} else if (value instanceof Relation) {
|
||||||
|
// nothing to clean here
|
||||||
|
} else if (value instanceof Result) {
|
||||||
|
|
||||||
|
Result r = (Result) value;
|
||||||
|
|
||||||
|
if (Objects.isNull(r.getTitle()) || r.getTitle().isEmpty()) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (value instanceof Publication) {
|
||||||
|
|
||||||
|
} else if (value instanceof Dataset) {
|
||||||
|
|
||||||
|
} else if (value instanceof OtherResearchProduct) {
|
||||||
|
|
||||||
|
} else if (value instanceof Software) {
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static <T extends Oaf> T cleanup(T value) {
|
||||||
if (value instanceof Datasource) {
|
if (value instanceof Datasource) {
|
||||||
// nothing to clean here
|
// nothing to clean here
|
||||||
} else if (value instanceof Project) {
|
} else if (value instanceof Project) {
|
||||||
|
@ -98,10 +126,44 @@ public class CleaningFunctions {
|
||||||
o.setCountry(ModelConstants.UNKNOWN_COUNTRY);
|
o.setCountry(ModelConstants.UNKNOWN_COUNTRY);
|
||||||
}
|
}
|
||||||
} else if (value instanceof Relation) {
|
} else if (value instanceof Relation) {
|
||||||
// nothing to clean here
|
Relation r = (Relation) value;
|
||||||
|
|
||||||
|
Optional<String> validationDate = doCleanDate(r.getValidationDate());
|
||||||
|
if (validationDate.isPresent()) {
|
||||||
|
r.setValidationDate(validationDate.get());
|
||||||
|
r.setValidated(true);
|
||||||
|
} else {
|
||||||
|
r.setValidationDate(null);
|
||||||
|
r.setValidated(false);
|
||||||
|
}
|
||||||
} else if (value instanceof Result) {
|
} else if (value instanceof Result) {
|
||||||
|
|
||||||
Result r = (Result) value;
|
Result r = (Result) value;
|
||||||
|
|
||||||
|
if (Objects.nonNull(r.getDateofacceptance())) {
|
||||||
|
Optional<String> date = cleanDateField(r.getDateofacceptance());
|
||||||
|
if (date.isPresent()) {
|
||||||
|
r.getDateofacceptance().setValue(date.get());
|
||||||
|
} else {
|
||||||
|
r.setDateofacceptance(null);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (Objects.nonNull(r.getRelevantdate())) {
|
||||||
|
r
|
||||||
|
.setRelevantdate(
|
||||||
|
r
|
||||||
|
.getRelevantdate()
|
||||||
|
.stream()
|
||||||
|
.filter(Objects::nonNull)
|
||||||
|
.filter(sp -> Objects.nonNull(sp.getQualifier()))
|
||||||
|
.filter(sp -> StringUtils.isNotBlank(sp.getQualifier().getClassid()))
|
||||||
|
.map(sp -> {
|
||||||
|
sp.setValue(GraphCleaningFunctions.cleanDate(sp.getValue()));
|
||||||
|
return sp;
|
||||||
|
})
|
||||||
|
.filter(sp -> StringUtils.isNotBlank(sp.getValue()))
|
||||||
|
.collect(Collectors.toList()));
|
||||||
|
}
|
||||||
if (Objects.nonNull(r.getPublisher()) && StringUtils.isBlank(r.getPublisher().getValue())) {
|
if (Objects.nonNull(r.getPublisher()) && StringUtils.isBlank(r.getPublisher().getValue())) {
|
||||||
r.setPublisher(null);
|
r.setPublisher(null);
|
||||||
}
|
}
|
||||||
|
@ -110,16 +172,6 @@ public class CleaningFunctions {
|
||||||
.setLanguage(
|
.setLanguage(
|
||||||
qualifier("und", "Undetermined", ModelConstants.DNET_LANGUAGES));
|
qualifier("und", "Undetermined", ModelConstants.DNET_LANGUAGES));
|
||||||
}
|
}
|
||||||
if (Objects.nonNull(r.getCountry())) {
|
|
||||||
r
|
|
||||||
.setCountry(
|
|
||||||
r
|
|
||||||
.getCountry()
|
|
||||||
.stream()
|
|
||||||
.filter(Objects::nonNull)
|
|
||||||
.filter(c -> StringUtils.isNotBlank(c.getClassid()))
|
|
||||||
.collect(Collectors.toList()));
|
|
||||||
}
|
|
||||||
if (Objects.nonNull(r.getSubject())) {
|
if (Objects.nonNull(r.getSubject())) {
|
||||||
r
|
r
|
||||||
.setSubject(
|
.setSubject(
|
||||||
|
@ -130,7 +182,7 @@ public class CleaningFunctions {
|
||||||
.filter(sp -> StringUtils.isNotBlank(sp.getValue()))
|
.filter(sp -> StringUtils.isNotBlank(sp.getValue()))
|
||||||
.filter(sp -> Objects.nonNull(sp.getQualifier()))
|
.filter(sp -> Objects.nonNull(sp.getQualifier()))
|
||||||
.filter(sp -> StringUtils.isNotBlank(sp.getQualifier().getClassid()))
|
.filter(sp -> StringUtils.isNotBlank(sp.getQualifier().getClassid()))
|
||||||
.map(CleaningFunctions::cleanValue)
|
.map(GraphCleaningFunctions::cleanValue)
|
||||||
.collect(Collectors.toList()));
|
.collect(Collectors.toList()));
|
||||||
}
|
}
|
||||||
if (Objects.nonNull(r.getTitle())) {
|
if (Objects.nonNull(r.getTitle())) {
|
||||||
|
@ -141,7 +193,23 @@ public class CleaningFunctions {
|
||||||
.stream()
|
.stream()
|
||||||
.filter(Objects::nonNull)
|
.filter(Objects::nonNull)
|
||||||
.filter(sp -> StringUtils.isNotBlank(sp.getValue()))
|
.filter(sp -> StringUtils.isNotBlank(sp.getValue()))
|
||||||
.map(CleaningFunctions::cleanValue)
|
.filter(
|
||||||
|
sp -> {
|
||||||
|
final String title = sp
|
||||||
|
.getValue()
|
||||||
|
.toLowerCase();
|
||||||
|
final String decoded = Unidecode.decode(title);
|
||||||
|
|
||||||
|
if (StringUtils.contains(decoded, TITLE_TEST)) {
|
||||||
|
return decoded
|
||||||
|
.replaceAll(TITLE_FILTER_REGEX, "")
|
||||||
|
.length() > TITLE_FILTER_RESIDUAL_LENGTH;
|
||||||
|
}
|
||||||
|
return !decoded
|
||||||
|
.replaceAll("\\W|\\d", "")
|
||||||
|
.isEmpty();
|
||||||
|
})
|
||||||
|
.map(GraphCleaningFunctions::cleanValue)
|
||||||
.collect(Collectors.toList()));
|
.collect(Collectors.toList()));
|
||||||
}
|
}
|
||||||
if (Objects.nonNull(r.getDescription())) {
|
if (Objects.nonNull(r.getDescription())) {
|
||||||
|
@ -152,22 +220,11 @@ public class CleaningFunctions {
|
||||||
.stream()
|
.stream()
|
||||||
.filter(Objects::nonNull)
|
.filter(Objects::nonNull)
|
||||||
.filter(sp -> StringUtils.isNotBlank(sp.getValue()))
|
.filter(sp -> StringUtils.isNotBlank(sp.getValue()))
|
||||||
.map(CleaningFunctions::cleanValue)
|
.map(GraphCleaningFunctions::cleanValue)
|
||||||
.collect(Collectors.toList()));
|
.collect(Collectors.toList()));
|
||||||
}
|
}
|
||||||
if (Objects.nonNull(r.getPid())) {
|
if (Objects.nonNull(r.getPid())) {
|
||||||
r
|
r.setPid(processPidCleaning(r.getPid()));
|
||||||
.setPid(
|
|
||||||
r
|
|
||||||
.getPid()
|
|
||||||
.stream()
|
|
||||||
.filter(Objects::nonNull)
|
|
||||||
.filter(sp -> StringUtils.isNotBlank(StringUtils.trim(sp.getValue())))
|
|
||||||
.filter(sp -> !PID_BLACKLIST.contains(sp.getValue().trim().toLowerCase()))
|
|
||||||
.filter(sp -> Objects.nonNull(sp.getQualifier()))
|
|
||||||
.filter(sp -> StringUtils.isNotBlank(sp.getQualifier().getClassid()))
|
|
||||||
.map(CleaningFunctions::normalizePidValue)
|
|
||||||
.collect(Collectors.toList()));
|
|
||||||
}
|
}
|
||||||
if (Objects.isNull(r.getResourcetype()) || StringUtils.isBlank(r.getResourcetype().getClassid())) {
|
if (Objects.isNull(r.getResourcetype()) || StringUtils.isBlank(r.getResourcetype().getClassid())) {
|
||||||
r
|
r
|
||||||
|
@ -175,11 +232,32 @@ public class CleaningFunctions {
|
||||||
qualifier(ModelConstants.UNKNOWN, "Unknown", ModelConstants.DNET_DATA_CITE_RESOURCE));
|
qualifier(ModelConstants.UNKNOWN, "Unknown", ModelConstants.DNET_DATA_CITE_RESOURCE));
|
||||||
}
|
}
|
||||||
if (Objects.nonNull(r.getInstance())) {
|
if (Objects.nonNull(r.getInstance())) {
|
||||||
|
|
||||||
for (Instance i : r.getInstance()) {
|
for (Instance i : r.getInstance()) {
|
||||||
|
if (Objects.nonNull(i.getPid())) {
|
||||||
|
i.setPid(processPidCleaning(i.getPid()));
|
||||||
|
}
|
||||||
|
if (Objects.nonNull(i.getAlternateIdentifier())) {
|
||||||
|
i.setAlternateIdentifier(processPidCleaning(i.getAlternateIdentifier()));
|
||||||
|
}
|
||||||
|
Optional
|
||||||
|
.ofNullable(i.getPid())
|
||||||
|
.ifPresent(pid -> {
|
||||||
|
final Set<StructuredProperty> pids = Sets.newHashSet(pid);
|
||||||
|
Optional
|
||||||
|
.ofNullable(i.getAlternateIdentifier())
|
||||||
|
.ifPresent(altId -> {
|
||||||
|
final Set<StructuredProperty> altIds = Sets.newHashSet(altId);
|
||||||
|
i.setAlternateIdentifier(Lists.newArrayList(Sets.difference(altIds, pids)));
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
if (Objects.isNull(i.getAccessright()) || StringUtils.isBlank(i.getAccessright().getClassid())) {
|
if (Objects.isNull(i.getAccessright()) || StringUtils.isBlank(i.getAccessright().getClassid())) {
|
||||||
i
|
i
|
||||||
.setAccessright(
|
.setAccessright(
|
||||||
qualifier(ModelConstants.UNKNOWN, "not available", ModelConstants.DNET_ACCESS_MODES));
|
accessRight(
|
||||||
|
ModelConstants.UNKNOWN, ModelConstants.NOT_AVAILABLE,
|
||||||
|
ModelConstants.DNET_ACCESS_MODES));
|
||||||
}
|
}
|
||||||
if (Objects.isNull(i.getHostedby()) || StringUtils.isBlank(i.getHostedby().getKey())) {
|
if (Objects.isNull(i.getHostedby()) || StringUtils.isBlank(i.getHostedby().getKey())) {
|
||||||
i.setHostedby(ModelConstants.UNKNOWN_REPOSITORY);
|
i.setHostedby(ModelConstants.UNKNOWN_REPOSITORY);
|
||||||
|
@ -187,14 +265,24 @@ public class CleaningFunctions {
|
||||||
if (Objects.isNull(i.getRefereed())) {
|
if (Objects.isNull(i.getRefereed())) {
|
||||||
i.setRefereed(qualifier("0000", "Unknown", ModelConstants.DNET_REVIEW_LEVELS));
|
i.setRefereed(qualifier("0000", "Unknown", ModelConstants.DNET_REVIEW_LEVELS));
|
||||||
}
|
}
|
||||||
|
if (Objects.nonNull(i.getDateofacceptance())) {
|
||||||
|
Optional<String> date = cleanDateField(i.getDateofacceptance());
|
||||||
|
if (date.isPresent()) {
|
||||||
|
i.getDateofacceptance().setValue(date.get());
|
||||||
|
} else {
|
||||||
|
i.setDateofacceptance(null);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (Objects.isNull(r.getBestaccessright()) || StringUtils.isBlank(r.getBestaccessright().getClassid())) {
|
if (Objects.isNull(r.getBestaccessright()) || StringUtils.isBlank(r.getBestaccessright().getClassid())) {
|
||||||
Qualifier bestaccessrights = AbstractMdRecordToOafMapper.createBestAccessRights(r.getInstance());
|
Qualifier bestaccessrights = OafMapperUtils.createBestAccessRights(r.getInstance());
|
||||||
if (Objects.isNull(bestaccessrights)) {
|
if (Objects.isNull(bestaccessrights)) {
|
||||||
r
|
r
|
||||||
.setBestaccessright(
|
.setBestaccessright(
|
||||||
qualifier(ModelConstants.UNKNOWN, "not available", ModelConstants.DNET_ACCESS_MODES));
|
qualifier(
|
||||||
|
ModelConstants.UNKNOWN, ModelConstants.NOT_AVAILABLE,
|
||||||
|
ModelConstants.DNET_ACCESS_MODES));
|
||||||
} else {
|
} else {
|
||||||
r.setBestaccessright(bestaccessrights);
|
r.setBestaccessright(bestaccessrights);
|
||||||
}
|
}
|
||||||
|
@ -205,7 +293,7 @@ public class CleaningFunctions {
|
||||||
r
|
r
|
||||||
.getAuthor()
|
.getAuthor()
|
||||||
.stream()
|
.stream()
|
||||||
.filter(a -> Objects.nonNull(a))
|
.filter(Objects::nonNull)
|
||||||
.filter(a -> StringUtils.isNotBlank(a.getFullname()))
|
.filter(a -> StringUtils.isNotBlank(a.getFullname()))
|
||||||
.filter(a -> StringUtils.isNotBlank(a.getFullname().replaceAll("[\\W]", "")))
|
.filter(a -> StringUtils.isNotBlank(a.getFullname().replaceAll("[\\W]", "")))
|
||||||
.collect(Collectors.toList()));
|
.collect(Collectors.toList()));
|
||||||
|
@ -280,11 +368,10 @@ public class CleaningFunctions {
|
||||||
.collect(Collectors.toList()));
|
.collect(Collectors.toList()));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
if (value instanceof Publication) {
|
if (value instanceof Publication) {
|
||||||
|
|
||||||
} else if (value instanceof eu.dnetlib.dhp.schema.oaf.Dataset) {
|
} else if (value instanceof Dataset) {
|
||||||
|
|
||||||
} else if (value instanceof OtherResearchProduct) {
|
} else if (value instanceof OtherResearchProduct) {
|
||||||
|
|
||||||
|
@ -296,6 +383,79 @@ public class CleaningFunctions {
|
||||||
return value;
|
return value;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static Optional<String> cleanDateField(Field<String> dateofacceptance) {
|
||||||
|
return Optional
|
||||||
|
.ofNullable(dateofacceptance)
|
||||||
|
.map(Field::getValue)
|
||||||
|
.map(GraphCleaningFunctions::cleanDate)
|
||||||
|
.filter(Objects::nonNull);
|
||||||
|
}
|
||||||
|
|
||||||
|
protected static Optional<String> doCleanDate(String date) {
|
||||||
|
return Optional.ofNullable(cleanDate(date));
|
||||||
|
}
|
||||||
|
|
||||||
|
public static String cleanDate(final String inputDate) {
|
||||||
|
|
||||||
|
if (StringUtils.isBlank(inputDate)) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
final LocalDate date = DateParserUtils
|
||||||
|
.parseDate(inputDate.trim())
|
||||||
|
.toInstant()
|
||||||
|
.atZone(ZoneId.systemDefault())
|
||||||
|
.toLocalDate();
|
||||||
|
return DateTimeFormatter.ofPattern(ModelSupport.DATE_FORMAT).format(date);
|
||||||
|
} catch (DateTimeParseException e) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// HELPERS
|
||||||
|
|
||||||
|
private static boolean isValidAuthorName(Author a) {
|
||||||
|
return !Stream
|
||||||
|
.of(a.getFullname(), a.getName(), a.getSurname())
|
||||||
|
.filter(s -> s != null && !s.isEmpty())
|
||||||
|
.collect(Collectors.joining(""))
|
||||||
|
.toLowerCase()
|
||||||
|
.matches(INVALID_AUTHOR_REGEX);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static List<StructuredProperty> processPidCleaning(List<StructuredProperty> pids) {
|
||||||
|
return pids
|
||||||
|
.stream()
|
||||||
|
.filter(Objects::nonNull)
|
||||||
|
.filter(sp -> StringUtils.isNotBlank(StringUtils.trim(sp.getValue())))
|
||||||
|
.filter(sp -> !PID_BLACKLIST.contains(sp.getValue().trim().toLowerCase()))
|
||||||
|
.filter(sp -> Objects.nonNull(sp.getQualifier()))
|
||||||
|
.filter(sp -> StringUtils.isNotBlank(sp.getQualifier().getClassid()))
|
||||||
|
.map(CleaningFunctions::normalizePidValue)
|
||||||
|
.filter(CleaningFunctions::pidFilter)
|
||||||
|
.collect(Collectors.toList());
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void fixVocabName(Qualifier q, String vocabularyName) {
|
||||||
|
if (Objects.nonNull(q) && StringUtils.isBlank(q.getSchemeid())) {
|
||||||
|
q.setSchemeid(vocabularyName);
|
||||||
|
q.setSchemename(vocabularyName);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static AccessRight accessRight(String classid, String classname, String scheme) {
|
||||||
|
return OafMapperUtils
|
||||||
|
.accessRight(
|
||||||
|
classid, classname, scheme, scheme);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static Qualifier qualifier(String classid, String classname, String scheme) {
|
||||||
|
return OafMapperUtils
|
||||||
|
.qualifier(
|
||||||
|
classid, classname, scheme, scheme);
|
||||||
|
}
|
||||||
|
|
||||||
protected static StructuredProperty cleanValue(StructuredProperty s) {
|
protected static StructuredProperty cleanValue(StructuredProperty s) {
|
||||||
s.setValue(s.getValue().replaceAll(CLEANING_REGEX, " "));
|
s.setValue(s.getValue().replaceAll(CLEANING_REGEX, " "));
|
||||||
return s;
|
return s;
|
||||||
|
@ -306,39 +466,4 @@ public class CleaningFunctions {
|
||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
|
|
||||||
// HELPERS
|
|
||||||
|
|
||||||
private static void fixVocabName(Qualifier q, String vocabularyName) {
|
|
||||||
if (Objects.nonNull(q) && StringUtils.isBlank(q.getSchemeid())) {
|
|
||||||
q.setSchemeid(vocabularyName);
|
|
||||||
q.setSchemename(vocabularyName);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private static Qualifier qualifier(String classid, String classname, String scheme) {
|
|
||||||
return OafMapperUtils
|
|
||||||
.qualifier(
|
|
||||||
classid, classname, scheme, scheme);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Utility method that normalises PID values on a per-type basis.
|
|
||||||
* @param pid the PID whose value will be normalised.
|
|
||||||
* @return the PID containing the normalised value.
|
|
||||||
*/
|
|
||||||
public static StructuredProperty normalizePidValue(StructuredProperty pid) {
|
|
||||||
String value = Optional
|
|
||||||
.ofNullable(pid.getValue())
|
|
||||||
.map(String::trim)
|
|
||||||
.orElseThrow(() -> new IllegalArgumentException("PID value cannot be empty"));
|
|
||||||
switch (pid.getQualifier().getClassid()) {
|
|
||||||
|
|
||||||
// TODO add cleaning for more PID types as needed
|
|
||||||
case "doi":
|
|
||||||
pid.setValue(value.toLowerCase().replaceAll(DOI_PREFIX_REGEX, "10."));
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
return pid;
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
}
|
|
@ -1,11 +1,9 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.schema.oaf;
|
package eu.dnetlib.dhp.schema.oaf.utils;
|
||||||
|
|
||||||
import java.util.ArrayList;
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.*;
|
||||||
import java.util.Arrays;
|
|
||||||
import java.util.List;
|
import java.util.*;
|
||||||
import java.util.Map;
|
|
||||||
import java.util.Objects;
|
|
||||||
import java.util.concurrent.ConcurrentHashMap;
|
import java.util.concurrent.ConcurrentHashMap;
|
||||||
import java.util.function.Function;
|
import java.util.function.Function;
|
||||||
import java.util.function.Predicate;
|
import java.util.function.Predicate;
|
||||||
|
@ -13,42 +11,48 @@ import java.util.stream.Collectors;
|
||||||
|
|
||||||
import org.apache.commons.lang3.StringUtils;
|
import org.apache.commons.lang3.StringUtils;
|
||||||
|
|
||||||
import com.google.common.base.Joiner;
|
import eu.dnetlib.dhp.schema.common.AccessRightComparator;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
||||||
import eu.dnetlib.dhp.utils.DHPUtils;
|
import eu.dnetlib.dhp.schema.oaf.*;
|
||||||
|
|
||||||
public class OafMapperUtils {
|
public class OafMapperUtils {
|
||||||
|
|
||||||
public static Oaf merge(final Oaf o1, final Oaf o2) {
|
private OafMapperUtils() {
|
||||||
if (ModelSupport.isSubClass(o1, OafEntity.class)) {
|
|
||||||
if (ModelSupport.isSubClass(o1, Result.class)) {
|
|
||||||
|
|
||||||
return mergeResults((Result) o1, (Result) o2);
|
|
||||||
} else if (ModelSupport.isSubClass(o1, Datasource.class)) {
|
|
||||||
((Datasource) o1).mergeFrom((Datasource) o2);
|
|
||||||
} else if (ModelSupport.isSubClass(o1, Organization.class)) {
|
|
||||||
((Organization) o1).mergeFrom((Organization) o2);
|
|
||||||
} else if (ModelSupport.isSubClass(o1, Project.class)) {
|
|
||||||
((Project) o1).mergeFrom((Project) o2);
|
|
||||||
} else {
|
|
||||||
throw new RuntimeException("invalid OafEntity subtype:" + o1.getClass().getCanonicalName());
|
|
||||||
}
|
|
||||||
} else if (ModelSupport.isSubClass(o1, Relation.class)) {
|
|
||||||
((Relation) o1).mergeFrom((Relation) o2);
|
|
||||||
} else {
|
|
||||||
throw new RuntimeException("invalid Oaf type:" + o1.getClass().getCanonicalName());
|
|
||||||
}
|
|
||||||
return o1;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public static Result mergeResults(Result r1, Result r2) {
|
public static Oaf merge(final Oaf left, final Oaf right) {
|
||||||
if (new ResultTypeComparator().compare(r1, r2) < 0) {
|
if (ModelSupport.isSubClass(left, OafEntity.class)) {
|
||||||
r1.mergeFrom(r2);
|
return mergeEntities((OafEntity) left, (OafEntity) right);
|
||||||
return r1;
|
} else if (ModelSupport.isSubClass(left, Relation.class)) {
|
||||||
|
((Relation) left).mergeFrom((Relation) right);
|
||||||
} else {
|
} else {
|
||||||
r2.mergeFrom(r1);
|
throw new IllegalArgumentException("invalid Oaf type:" + left.getClass().getCanonicalName());
|
||||||
return r2;
|
}
|
||||||
|
return left;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static OafEntity mergeEntities(OafEntity left, OafEntity right) {
|
||||||
|
if (ModelSupport.isSubClass(left, Result.class)) {
|
||||||
|
return mergeResults((Result) left, (Result) right);
|
||||||
|
} else if (ModelSupport.isSubClass(left, Datasource.class)) {
|
||||||
|
left.mergeFrom(right);
|
||||||
|
} else if (ModelSupport.isSubClass(left, Organization.class)) {
|
||||||
|
left.mergeFrom(right);
|
||||||
|
} else if (ModelSupport.isSubClass(left, Project.class)) {
|
||||||
|
left.mergeFrom(right);
|
||||||
|
} else {
|
||||||
|
throw new IllegalArgumentException("invalid OafEntity subtype:" + left.getClass().getCanonicalName());
|
||||||
|
}
|
||||||
|
return left;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static Result mergeResults(Result left, Result right) {
|
||||||
|
if (new ResultTypeComparator().compare(left, right) < 0) {
|
||||||
|
left.mergeFrom(right);
|
||||||
|
return left;
|
||||||
|
} else {
|
||||||
|
right.mergeFrom(left);
|
||||||
|
return right;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -61,7 +65,7 @@ public class OafMapperUtils {
|
||||||
|
|
||||||
public static List<KeyValue> listKeyValues(final String... s) {
|
public static List<KeyValue> listKeyValues(final String... s) {
|
||||||
if (s.length % 2 > 0) {
|
if (s.length % 2 > 0) {
|
||||||
throw new RuntimeException("Invalid number of parameters (k,v,k,v,....)");
|
throw new IllegalArgumentException("Invalid number of parameters (k,v,k,v,....)");
|
||||||
}
|
}
|
||||||
|
|
||||||
final List<KeyValue> list = new ArrayList<>();
|
final List<KeyValue> list = new ArrayList<>();
|
||||||
|
@ -87,7 +91,7 @@ public class OafMapperUtils {
|
||||||
.stream(values)
|
.stream(values)
|
||||||
.map(v -> field(v, info))
|
.map(v -> field(v, info))
|
||||||
.filter(Objects::nonNull)
|
.filter(Objects::nonNull)
|
||||||
.filter(distinctByKey(f -> f.getValue()))
|
.filter(distinctByKey(Field::getValue))
|
||||||
.collect(Collectors.toList());
|
.collect(Collectors.toList());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -96,7 +100,7 @@ public class OafMapperUtils {
|
||||||
.stream()
|
.stream()
|
||||||
.map(v -> field(v, info))
|
.map(v -> field(v, info))
|
||||||
.filter(Objects::nonNull)
|
.filter(Objects::nonNull)
|
||||||
.filter(distinctByKey(f -> f.getValue()))
|
.filter(distinctByKey(Field::getValue))
|
||||||
.collect(Collectors.toList());
|
.collect(Collectors.toList());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -104,6 +108,29 @@ public class OafMapperUtils {
|
||||||
return qualifier("UNKNOWN", "Unknown", schemeid, schemename);
|
return qualifier("UNKNOWN", "Unknown", schemeid, schemename);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static AccessRight accessRight(
|
||||||
|
final String classid,
|
||||||
|
final String classname,
|
||||||
|
final String schemeid,
|
||||||
|
final String schemename) {
|
||||||
|
return accessRight(classid, classname, schemeid, schemename, null);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static AccessRight accessRight(
|
||||||
|
final String classid,
|
||||||
|
final String classname,
|
||||||
|
final String schemeid,
|
||||||
|
final String schemename,
|
||||||
|
final OpenAccessRoute openAccessRoute) {
|
||||||
|
final AccessRight accessRight = new AccessRight();
|
||||||
|
accessRight.setClassid(classid);
|
||||||
|
accessRight.setClassname(classname);
|
||||||
|
accessRight.setSchemeid(schemeid);
|
||||||
|
accessRight.setSchemename(schemename);
|
||||||
|
accessRight.setOpenAccessRoute(openAccessRoute);
|
||||||
|
return accessRight;
|
||||||
|
}
|
||||||
|
|
||||||
public static Qualifier qualifier(
|
public static Qualifier qualifier(
|
||||||
final String classid,
|
final String classid,
|
||||||
final String classname,
|
final String classname,
|
||||||
|
@ -117,6 +144,15 @@ public class OafMapperUtils {
|
||||||
return q;
|
return q;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static Qualifier qualifier(final Qualifier qualifier) {
|
||||||
|
final Qualifier q = new Qualifier();
|
||||||
|
q.setClassid(qualifier.getClassid());
|
||||||
|
q.setClassname(qualifier.getClassname());
|
||||||
|
q.setSchemeid(qualifier.getSchemeid());
|
||||||
|
q.setSchemename(qualifier.getSchemename());
|
||||||
|
return q;
|
||||||
|
}
|
||||||
|
|
||||||
public static StructuredProperty structuredProperty(
|
public static StructuredProperty structuredProperty(
|
||||||
final String value,
|
final String value,
|
||||||
final String classid,
|
final String classid,
|
||||||
|
@ -267,7 +303,7 @@ public class OafMapperUtils {
|
||||||
} else if (to_md5) {
|
} else if (to_md5) {
|
||||||
final String nsPrefix = StringUtils.substringBefore(originalId, "::");
|
final String nsPrefix = StringUtils.substringBefore(originalId, "::");
|
||||||
final String rest = StringUtils.substringAfter(originalId, "::");
|
final String rest = StringUtils.substringAfter(originalId, "::");
|
||||||
return String.format("%s|%s::%s", prefix, nsPrefix, DHPUtils.md5(rest));
|
return String.format("%s|%s::%s", prefix, nsPrefix, IdentifierFactory.md5(rest));
|
||||||
} else {
|
} else {
|
||||||
return String.format("%s|%s", prefix, originalId);
|
return String.format("%s|%s", prefix, originalId);
|
||||||
}
|
}
|
||||||
|
@ -300,4 +336,36 @@ public class OafMapperUtils {
|
||||||
final Map<Object, Boolean> seen = new ConcurrentHashMap<>();
|
final Map<Object, Boolean> seen = new ConcurrentHashMap<>();
|
||||||
return t -> seen.putIfAbsent(keyExtractor.apply(t), Boolean.TRUE) == null;
|
return t -> seen.putIfAbsent(keyExtractor.apply(t), Boolean.TRUE) == null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static Qualifier createBestAccessRights(final List<Instance> instanceList) {
|
||||||
|
return getBestAccessRights(instanceList);
|
||||||
|
}
|
||||||
|
|
||||||
|
protected static Qualifier getBestAccessRights(final List<Instance> instanceList) {
|
||||||
|
if (instanceList != null) {
|
||||||
|
final Optional<AccessRight> min = instanceList
|
||||||
|
.stream()
|
||||||
|
.map(Instance::getAccessright)
|
||||||
|
.min(new AccessRightComparator<>());
|
||||||
|
|
||||||
|
final Qualifier rights = min.map(OafMapperUtils::qualifier).orElseGet(Qualifier::new);
|
||||||
|
|
||||||
|
if (StringUtils.isBlank(rights.getClassid())) {
|
||||||
|
rights.setClassid(UNKNOWN);
|
||||||
|
}
|
||||||
|
if (StringUtils.isBlank(rights.getClassname())
|
||||||
|
|| UNKNOWN.equalsIgnoreCase(rights.getClassname())) {
|
||||||
|
rights.setClassname(NOT_AVAILABLE);
|
||||||
|
}
|
||||||
|
if (StringUtils.isBlank(rights.getSchemeid())) {
|
||||||
|
rights.setSchemeid(DNET_ACCESS_MODES);
|
||||||
|
}
|
||||||
|
if (StringUtils.isBlank(rights.getSchemename())) {
|
||||||
|
rights.setSchemename(DNET_ACCESS_MODES);
|
||||||
|
}
|
||||||
|
|
||||||
|
return rights;
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
}
|
}
|
|
@ -1,26 +1,44 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.utils;
|
package eu.dnetlib.dhp.utils;
|
||||||
|
|
||||||
import java.io.ByteArrayInputStream;
|
import java.io.*;
|
||||||
import java.io.ByteArrayOutputStream;
|
|
||||||
import java.nio.charset.StandardCharsets;
|
import java.nio.charset.StandardCharsets;
|
||||||
import java.security.MessageDigest;
|
import java.security.MessageDigest;
|
||||||
import java.util.List;
|
import java.util.*;
|
||||||
import java.util.zip.GZIPInputStream;
|
import java.util.stream.Collectors;
|
||||||
import java.util.zip.GZIPOutputStream;
|
|
||||||
|
|
||||||
import org.apache.commons.codec.binary.Base64;
|
|
||||||
import org.apache.commons.codec.binary.Base64OutputStream;
|
|
||||||
import org.apache.commons.codec.binary.Hex;
|
import org.apache.commons.codec.binary.Hex;
|
||||||
|
import org.apache.commons.io.IOUtils;
|
||||||
|
import org.apache.commons.lang3.StringUtils;
|
||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.fs.FileSystem;
|
||||||
|
import org.apache.hadoop.fs.Path;
|
||||||
|
import org.apache.http.client.methods.CloseableHttpResponse;
|
||||||
|
import org.apache.http.client.methods.HttpGet;
|
||||||
|
import org.apache.http.impl.client.CloseableHttpClient;
|
||||||
|
import org.apache.http.impl.client.HttpClients;
|
||||||
|
import org.apache.spark.sql.Dataset;
|
||||||
|
import org.apache.spark.sql.SaveMode;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
import com.google.common.collect.Maps;
|
||||||
import com.jayway.jsonpath.JsonPath;
|
import com.jayway.jsonpath.JsonPath;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.schema.mdstore.MDStoreWithInfo;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.utils.CleaningFunctions;
|
||||||
import net.minidev.json.JSONArray;
|
import net.minidev.json.JSONArray;
|
||||||
import scala.collection.JavaConverters;
|
import scala.collection.JavaConverters;
|
||||||
import scala.collection.Seq;
|
import scala.collection.Seq;
|
||||||
|
|
||||||
public class DHPUtils {
|
public class DHPUtils {
|
||||||
|
|
||||||
|
private static final Logger log = LoggerFactory.getLogger(DHPUtils.class);
|
||||||
|
|
||||||
|
private DHPUtils() {
|
||||||
|
}
|
||||||
|
|
||||||
public static Seq<String> toSeq(List<String> list) {
|
public static Seq<String> toSeq(List<String> list) {
|
||||||
return JavaConverters.asScalaIteratorConverter(list.iterator()).asScala().toSeq();
|
return JavaConverters.asScalaIteratorConverter(list.iterator()).asScala().toSeq();
|
||||||
}
|
}
|
||||||
|
@ -31,40 +49,59 @@ public class DHPUtils {
|
||||||
md.update(s.getBytes(StandardCharsets.UTF_8));
|
md.update(s.getBytes(StandardCharsets.UTF_8));
|
||||||
return new String(Hex.encodeHex(md.digest()));
|
return new String(Hex.encodeHex(md.digest()));
|
||||||
} catch (final Exception e) {
|
} catch (final Exception e) {
|
||||||
System.err.println("Error creating id");
|
log.error("Error creating id from {}", s);
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Retrieves from the metadata store manager application the list of paths associated with mdstores characterized
|
||||||
|
* by he given format, layout, interpretation
|
||||||
|
* @param mdstoreManagerUrl the URL of the mdstore manager service
|
||||||
|
* @param format the mdstore format
|
||||||
|
* @param layout the mdstore layout
|
||||||
|
* @param interpretation the mdstore interpretation
|
||||||
|
* @param includeEmpty include Empty mdstores
|
||||||
|
* @return the set of hdfs paths
|
||||||
|
* @throws IOException in case of HTTP communication issues
|
||||||
|
*/
|
||||||
|
public static Set<String> mdstorePaths(final String mdstoreManagerUrl,
|
||||||
|
final String format,
|
||||||
|
final String layout,
|
||||||
|
final String interpretation,
|
||||||
|
boolean includeEmpty) throws IOException {
|
||||||
|
final String url = mdstoreManagerUrl + "/mdstores/";
|
||||||
|
final ObjectMapper objectMapper = new ObjectMapper();
|
||||||
|
|
||||||
|
final HttpGet req = new HttpGet(url);
|
||||||
|
|
||||||
|
try (final CloseableHttpClient client = HttpClients.createDefault()) {
|
||||||
|
try (final CloseableHttpResponse response = client.execute(req)) {
|
||||||
|
final String json = IOUtils.toString(response.getEntity().getContent());
|
||||||
|
final MDStoreWithInfo[] mdstores = objectMapper.readValue(json, MDStoreWithInfo[].class);
|
||||||
|
return Arrays
|
||||||
|
.stream(mdstores)
|
||||||
|
.filter(md -> md.getFormat().equalsIgnoreCase(format))
|
||||||
|
.filter(md -> md.getLayout().equalsIgnoreCase(layout))
|
||||||
|
.filter(md -> md.getInterpretation().equalsIgnoreCase(interpretation))
|
||||||
|
.filter(md -> StringUtils.isNotBlank(md.getHdfsPath()))
|
||||||
|
.filter(md -> StringUtils.isNotBlank(md.getCurrentVersion()))
|
||||||
|
.filter(md -> includeEmpty || md.getSize() > 0)
|
||||||
|
.map(md -> md.getHdfsPath() + "/" + md.getCurrentVersion() + "/store")
|
||||||
|
.collect(Collectors.toSet());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
public static String generateIdentifier(final String originalId, final String nsPrefix) {
|
public static String generateIdentifier(final String originalId, final String nsPrefix) {
|
||||||
return String.format("%s::%s", nsPrefix, DHPUtils.md5(originalId));
|
return String.format("%s::%s", nsPrefix, DHPUtils.md5(originalId));
|
||||||
}
|
}
|
||||||
|
|
||||||
public static String compressString(final String input) {
|
public static String generateUnresolvedIdentifier(final String pid, final String pidType) {
|
||||||
try (ByteArrayOutputStream out = new ByteArrayOutputStream();
|
|
||||||
Base64OutputStream b64os = new Base64OutputStream(out)) {
|
|
||||||
GZIPOutputStream gzip = new GZIPOutputStream(b64os);
|
|
||||||
gzip.write(input.getBytes(StandardCharsets.UTF_8));
|
|
||||||
gzip.close();
|
|
||||||
return out.toString();
|
|
||||||
} catch (Throwable e) {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public static String decompressString(final String input) {
|
final String cleanedPid = CleaningFunctions.normalizePidValue(pidType, pid);
|
||||||
byte[] byteArray = Base64.decodeBase64(input.getBytes());
|
|
||||||
int len;
|
return String.format("unresolved::%s::%s", cleanedPid, pidType.toLowerCase().trim());
|
||||||
try (GZIPInputStream gis = new GZIPInputStream(new ByteArrayInputStream((byteArray)));
|
|
||||||
ByteArrayOutputStream bos = new ByteArrayOutputStream(byteArray.length)) {
|
|
||||||
byte[] buffer = new byte[1024];
|
|
||||||
while ((len = gis.read(buffer)) != -1) {
|
|
||||||
bos.write(buffer, 0, len);
|
|
||||||
}
|
|
||||||
return bos.toString();
|
|
||||||
} catch (Exception e) {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public static String getJPathString(final String jsonPath, final String json) {
|
public static String getJPathString(final String jsonPath, final String json) {
|
||||||
|
@ -79,4 +116,72 @@ public class DHPUtils {
|
||||||
return "";
|
return "";
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static final ObjectMapper MAPPER = new ObjectMapper();
|
||||||
|
|
||||||
|
public static void writeHdfsFile(final Configuration conf, final String content, final String path)
|
||||||
|
throws IOException {
|
||||||
|
|
||||||
|
log.info("writing file {}, size {}", path, content.length());
|
||||||
|
try (FileSystem fs = FileSystem.get(conf);
|
||||||
|
BufferedOutputStream os = new BufferedOutputStream(fs.create(new Path(path)))) {
|
||||||
|
os.write(content.getBytes(StandardCharsets.UTF_8));
|
||||||
|
os.flush();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static String readHdfsFile(Configuration conf, String path) throws IOException {
|
||||||
|
log.info("reading file {}", path);
|
||||||
|
|
||||||
|
try (FileSystem fs = FileSystem.get(conf)) {
|
||||||
|
final Path p = new Path(path);
|
||||||
|
if (!fs.exists(p)) {
|
||||||
|
throw new FileNotFoundException(path);
|
||||||
|
}
|
||||||
|
return IOUtils.toString(fs.open(p));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static <T> T readHdfsFileAs(Configuration conf, String path, Class<T> clazz) throws IOException {
|
||||||
|
return MAPPER.readValue(readHdfsFile(conf, path), clazz);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static <T> void saveDataset(final Dataset<T> mdstore, final String targetPath) {
|
||||||
|
log.info("saving dataset in: {}", targetPath);
|
||||||
|
mdstore
|
||||||
|
.write()
|
||||||
|
.mode(SaveMode.Overwrite)
|
||||||
|
.format("parquet")
|
||||||
|
.save(targetPath);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static Configuration getHadoopConfiguration(String nameNode) {
|
||||||
|
// ====== Init HDFS File System Object
|
||||||
|
Configuration conf = new Configuration();
|
||||||
|
// Set FileSystem URI
|
||||||
|
conf.set("fs.defaultFS", nameNode);
|
||||||
|
// Because of Maven
|
||||||
|
conf.set("fs.hdfs.impl", org.apache.hadoop.hdfs.DistributedFileSystem.class.getName());
|
||||||
|
conf.set("fs.file.impl", org.apache.hadoop.fs.LocalFileSystem.class.getName());
|
||||||
|
|
||||||
|
System.setProperty("hadoop.home.dir", "/");
|
||||||
|
return conf;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static void populateOOZIEEnv(final Map<String, String> report) throws IOException {
|
||||||
|
File file = new File(System.getProperty("oozie.action.output.properties"));
|
||||||
|
Properties props = new Properties();
|
||||||
|
report.forEach((k, v) -> props.setProperty(k, v));
|
||||||
|
|
||||||
|
try (OutputStream os = new FileOutputStream(file)) {
|
||||||
|
props.store(os, "");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static void populateOOZIEEnv(final String paramName, String value) throws IOException {
|
||||||
|
Map<String, String> report = Maps.newHashMap();
|
||||||
|
report.put(paramName, value);
|
||||||
|
|
||||||
|
populateOOZIEEnv(report);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -15,8 +15,11 @@ public class ISLookupClientFactory {
|
||||||
|
|
||||||
private static final Logger log = LoggerFactory.getLogger(ISLookupClientFactory.class);
|
private static final Logger log = LoggerFactory.getLogger(ISLookupClientFactory.class);
|
||||||
|
|
||||||
private static int requestTimeout = 60000 * 10;
|
private static final int requestTimeout = 60000 * 10;
|
||||||
private static int connectTimeout = 60000 * 10;
|
private static final int connectTimeout = 60000 * 10;
|
||||||
|
|
||||||
|
private ISLookupClientFactory() {
|
||||||
|
}
|
||||||
|
|
||||||
public static ISLookUpService getLookUpService(final String isLookupUrl) {
|
public static ISLookUpService getLookUpService(final String isLookupUrl) {
|
||||||
return getServiceStub(ISLookUpService.class, isLookupUrl);
|
return getServiceStub(ISLookUpService.class, isLookupUrl);
|
||||||
|
@ -24,7 +27,7 @@ public class ISLookupClientFactory {
|
||||||
|
|
||||||
@SuppressWarnings("unchecked")
|
@SuppressWarnings("unchecked")
|
||||||
private static <T> T getServiceStub(final Class<T> clazz, final String endpoint) {
|
private static <T> T getServiceStub(final Class<T> clazz, final String endpoint) {
|
||||||
log.info(String.format("creating %s stub from %s", clazz.getName(), endpoint));
|
log.info("creating {} stub from {}", clazz.getName(), endpoint);
|
||||||
final JaxWsProxyFactoryBean jaxWsProxyFactory = new JaxWsProxyFactoryBean();
|
final JaxWsProxyFactoryBean jaxWsProxyFactory = new JaxWsProxyFactoryBean();
|
||||||
jaxWsProxyFactory.setServiceClass(clazz);
|
jaxWsProxyFactory.setServiceClass(clazz);
|
||||||
jaxWsProxyFactory.setAddress(endpoint);
|
jaxWsProxyFactory.setAddress(endpoint);
|
||||||
|
@ -38,12 +41,10 @@ public class ISLookupClientFactory {
|
||||||
|
|
||||||
log
|
log
|
||||||
.info(
|
.info(
|
||||||
String
|
"setting connectTimeout to {}, requestTimeout to {} for service {}",
|
||||||
.format(
|
connectTimeout,
|
||||||
"setting connectTimeout to %s, requestTimeout to %s for service %s",
|
requestTimeout,
|
||||||
connectTimeout,
|
clazz.getCanonicalName());
|
||||||
requestTimeout,
|
|
||||||
clazz.getCanonicalName()));
|
|
||||||
|
|
||||||
policy.setConnectionTimeout(connectTimeout);
|
policy.setConnectionTimeout(connectTimeout);
|
||||||
policy.setReceiveTimeout(requestTimeout);
|
policy.setReceiveTimeout(requestTimeout);
|
||||||
|
|
|
@ -10,7 +10,7 @@ import net.sf.saxon.trans.XPathException;
|
||||||
|
|
||||||
public abstract class AbstractExtensionFunction extends ExtensionFunctionDefinition {
|
public abstract class AbstractExtensionFunction extends ExtensionFunctionDefinition {
|
||||||
|
|
||||||
public static String DEFAULT_SAXON_EXT_NS_URI = "http://www.d-net.research-infrastructures.eu/saxon-extension";
|
public static final String DEFAULT_SAXON_EXT_NS_URI = "http://www.d-net.research-infrastructures.eu/saxon-extension";
|
||||||
|
|
||||||
public abstract String getName();
|
public abstract String getName();
|
||||||
|
|
||||||
|
|
|
@ -26,7 +26,7 @@ public class ExtractYear extends AbstractExtensionFunction {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Sequence doCall(XPathContext context, Sequence[] arguments) throws XPathException {
|
public Sequence doCall(XPathContext context, Sequence[] arguments) throws XPathException {
|
||||||
if (arguments == null | arguments.length == 0) {
|
if (arguments == null || arguments.length == 0) {
|
||||||
return new StringValue("");
|
return new StringValue("");
|
||||||
}
|
}
|
||||||
final Item item = arguments[0].head();
|
final Item item = arguments[0].head();
|
||||||
|
@ -63,8 +63,7 @@ public class ExtractYear extends AbstractExtensionFunction {
|
||||||
for (String format : dateFormats) {
|
for (String format : dateFormats) {
|
||||||
try {
|
try {
|
||||||
c.setTime(new SimpleDateFormat(format).parse(s));
|
c.setTime(new SimpleDateFormat(format).parse(s));
|
||||||
String year = String.valueOf(c.get(Calendar.YEAR));
|
return String.valueOf(c.get(Calendar.YEAR));
|
||||||
return year;
|
|
||||||
} catch (ParseException e) {
|
} catch (ParseException e) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -30,7 +30,7 @@ public class NormalizeDate extends AbstractExtensionFunction {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Sequence doCall(XPathContext context, Sequence[] arguments) throws XPathException {
|
public Sequence doCall(XPathContext context, Sequence[] arguments) throws XPathException {
|
||||||
if (arguments == null | arguments.length == 0) {
|
if (arguments == null || arguments.length == 0) {
|
||||||
return new StringValue(BLANK);
|
return new StringValue(BLANK);
|
||||||
}
|
}
|
||||||
String s = arguments[0].head().getStringValue();
|
String s = arguments[0].head().getStringValue();
|
||||||
|
|
|
@ -1,6 +1,8 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.utils.saxon;
|
package eu.dnetlib.dhp.utils.saxon;
|
||||||
|
|
||||||
|
import static org.apache.commons.lang3.StringUtils.isNotBlank;
|
||||||
|
|
||||||
import org.apache.commons.lang3.StringUtils;
|
import org.apache.commons.lang3.StringUtils;
|
||||||
|
|
||||||
import net.sf.saxon.expr.XPathContext;
|
import net.sf.saxon.expr.XPathContext;
|
||||||
|
@ -26,7 +28,8 @@ public class PickFirst extends AbstractExtensionFunction {
|
||||||
final String s1 = getValue(arguments[0]);
|
final String s1 = getValue(arguments[0]);
|
||||||
final String s2 = getValue(arguments[1]);
|
final String s2 = getValue(arguments[1]);
|
||||||
|
|
||||||
return new StringValue(StringUtils.isNotBlank(s1) ? s1 : StringUtils.isNotBlank(s2) ? s2 : "");
|
final String value = isNotBlank(s1) ? s1 : isNotBlank(s2) ? s2 : "";
|
||||||
|
return new StringValue(value);
|
||||||
}
|
}
|
||||||
|
|
||||||
private String getValue(final Sequence arg) throws XPathException {
|
private String getValue(final Sequence arg) throws XPathException {
|
||||||
|
|
|
@ -12,6 +12,9 @@ import net.sf.saxon.TransformerFactoryImpl;
|
||||||
|
|
||||||
public class SaxonTransformerFactory {
|
public class SaxonTransformerFactory {
|
||||||
|
|
||||||
|
private SaxonTransformerFactory() {
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Creates the index record transformer from the given XSLT
|
* Creates the index record transformer from the given XSLT
|
||||||
*
|
*
|
||||||
|
|
|
@ -1,76 +0,0 @@
|
||||||
|
|
||||||
package eu.dnetlib.message;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.util.Map;
|
|
||||||
|
|
||||||
import com.fasterxml.jackson.core.JsonProcessingException;
|
|
||||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
|
||||||
|
|
||||||
public class Message {
|
|
||||||
|
|
||||||
private String workflowId;
|
|
||||||
|
|
||||||
private String jobName;
|
|
||||||
|
|
||||||
private MessageType type;
|
|
||||||
|
|
||||||
private Map<String, String> body;
|
|
||||||
|
|
||||||
public static Message fromJson(final String json) throws IOException {
|
|
||||||
final ObjectMapper jsonMapper = new ObjectMapper();
|
|
||||||
return jsonMapper.readValue(json, Message.class);
|
|
||||||
}
|
|
||||||
|
|
||||||
public Message() {
|
|
||||||
}
|
|
||||||
|
|
||||||
public Message(String workflowId, String jobName, MessageType type, Map<String, String> body) {
|
|
||||||
this.workflowId = workflowId;
|
|
||||||
this.jobName = jobName;
|
|
||||||
this.type = type;
|
|
||||||
this.body = body;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getWorkflowId() {
|
|
||||||
return workflowId;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setWorkflowId(String workflowId) {
|
|
||||||
this.workflowId = workflowId;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getJobName() {
|
|
||||||
return jobName;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setJobName(String jobName) {
|
|
||||||
this.jobName = jobName;
|
|
||||||
}
|
|
||||||
|
|
||||||
public MessageType getType() {
|
|
||||||
return type;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setType(MessageType type) {
|
|
||||||
this.type = type;
|
|
||||||
}
|
|
||||||
|
|
||||||
public Map<String, String> getBody() {
|
|
||||||
return body;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setBody(Map<String, String> body) {
|
|
||||||
this.body = body;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String toString() {
|
|
||||||
final ObjectMapper jsonMapper = new ObjectMapper();
|
|
||||||
try {
|
|
||||||
return jsonMapper.writeValueAsString(this);
|
|
||||||
} catch (JsonProcessingException e) {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,47 +0,0 @@
|
||||||
|
|
||||||
package eu.dnetlib.message;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.nio.charset.StandardCharsets;
|
|
||||||
import java.util.concurrent.LinkedBlockingQueue;
|
|
||||||
|
|
||||||
import com.rabbitmq.client.AMQP;
|
|
||||||
import com.rabbitmq.client.Channel;
|
|
||||||
import com.rabbitmq.client.DefaultConsumer;
|
|
||||||
import com.rabbitmq.client.Envelope;
|
|
||||||
|
|
||||||
public class MessageConsumer extends DefaultConsumer {
|
|
||||||
|
|
||||||
final LinkedBlockingQueue<Message> queueMessages;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Constructs a new instance and records its association to the passed-in channel.
|
|
||||||
*
|
|
||||||
* @param channel the channel to which this consumer is attached
|
|
||||||
* @param queueMessages
|
|
||||||
*/
|
|
||||||
public MessageConsumer(Channel channel, LinkedBlockingQueue<Message> queueMessages) {
|
|
||||||
super(channel);
|
|
||||||
this.queueMessages = queueMessages;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void handleDelivery(
|
|
||||||
String consumerTag, Envelope envelope, AMQP.BasicProperties properties, byte[] body)
|
|
||||||
throws IOException {
|
|
||||||
final String json = new String(body, StandardCharsets.UTF_8);
|
|
||||||
Message message = Message.fromJson(json);
|
|
||||||
try {
|
|
||||||
this.queueMessages.put(message);
|
|
||||||
System.out.println("Receiving Message " + message);
|
|
||||||
} catch (InterruptedException e) {
|
|
||||||
if (message.getType() == MessageType.REPORT)
|
|
||||||
throw new RuntimeException("Error on sending message");
|
|
||||||
else {
|
|
||||||
// TODO LOGGING EXCEPTION
|
|
||||||
}
|
|
||||||
} finally {
|
|
||||||
getChannel().basicAck(envelope.getDeliveryTag(), false);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,136 +0,0 @@
|
||||||
|
|
||||||
package eu.dnetlib.message;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.util.HashMap;
|
|
||||||
import java.util.Map;
|
|
||||||
import java.util.concurrent.LinkedBlockingQueue;
|
|
||||||
import java.util.concurrent.TimeoutException;
|
|
||||||
|
|
||||||
import com.rabbitmq.client.Channel;
|
|
||||||
import com.rabbitmq.client.Connection;
|
|
||||||
import com.rabbitmq.client.ConnectionFactory;
|
|
||||||
|
|
||||||
public class MessageManager {
|
|
||||||
|
|
||||||
private final String messageHost;
|
|
||||||
|
|
||||||
private final String username;
|
|
||||||
|
|
||||||
private final String password;
|
|
||||||
|
|
||||||
private Connection connection;
|
|
||||||
|
|
||||||
private final Map<String, Channel> channels = new HashMap<>();
|
|
||||||
|
|
||||||
private boolean durable;
|
|
||||||
|
|
||||||
private boolean autodelete;
|
|
||||||
|
|
||||||
private final LinkedBlockingQueue<Message> queueMessages;
|
|
||||||
|
|
||||||
public MessageManager(
|
|
||||||
String messageHost,
|
|
||||||
String username,
|
|
||||||
String password,
|
|
||||||
final LinkedBlockingQueue<Message> queueMessages) {
|
|
||||||
this.queueMessages = queueMessages;
|
|
||||||
this.messageHost = messageHost;
|
|
||||||
this.username = username;
|
|
||||||
this.password = password;
|
|
||||||
}
|
|
||||||
|
|
||||||
public MessageManager(
|
|
||||||
String messageHost,
|
|
||||||
String username,
|
|
||||||
String password,
|
|
||||||
boolean durable,
|
|
||||||
boolean autodelete,
|
|
||||||
final LinkedBlockingQueue<Message> queueMessages) {
|
|
||||||
this.queueMessages = queueMessages;
|
|
||||||
this.messageHost = messageHost;
|
|
||||||
this.username = username;
|
|
||||||
this.password = password;
|
|
||||||
|
|
||||||
this.durable = durable;
|
|
||||||
this.autodelete = autodelete;
|
|
||||||
}
|
|
||||||
|
|
||||||
private Connection createConnection() throws IOException, TimeoutException {
|
|
||||||
ConnectionFactory factory = new ConnectionFactory();
|
|
||||||
factory.setHost(this.messageHost);
|
|
||||||
factory.setUsername(this.username);
|
|
||||||
factory.setPassword(this.password);
|
|
||||||
return factory.newConnection();
|
|
||||||
}
|
|
||||||
|
|
||||||
private Channel createChannel(
|
|
||||||
final Connection connection,
|
|
||||||
final String queueName,
|
|
||||||
final boolean durable,
|
|
||||||
final boolean autodelete)
|
|
||||||
throws Exception {
|
|
||||||
Map<String, Object> args = new HashMap<>();
|
|
||||||
args.put("x-message-ttl", 10000);
|
|
||||||
Channel channel = connection.createChannel();
|
|
||||||
channel.queueDeclare(queueName, durable, false, this.autodelete, args);
|
|
||||||
return channel;
|
|
||||||
}
|
|
||||||
|
|
||||||
private Channel getOrCreateChannel(final String queueName, boolean durable, boolean autodelete)
|
|
||||||
throws Exception {
|
|
||||||
if (channels.containsKey(queueName)) {
|
|
||||||
return channels.get(queueName);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (this.connection == null) {
|
|
||||||
this.connection = createConnection();
|
|
||||||
}
|
|
||||||
channels.put(queueName, createChannel(this.connection, queueName, durable, autodelete));
|
|
||||||
return channels.get(queueName);
|
|
||||||
}
|
|
||||||
|
|
||||||
public void close() throws IOException {
|
|
||||||
channels
|
|
||||||
.values()
|
|
||||||
.forEach(
|
|
||||||
ch -> {
|
|
||||||
try {
|
|
||||||
ch.close();
|
|
||||||
} catch (Exception e) {
|
|
||||||
// TODO LOG
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
this.connection.close();
|
|
||||||
}
|
|
||||||
|
|
||||||
public boolean sendMessage(final Message message, String queueName) throws Exception {
|
|
||||||
try {
|
|
||||||
Channel channel = getOrCreateChannel(queueName, this.durable, this.autodelete);
|
|
||||||
channel.basicPublish("", queueName, null, message.toString().getBytes());
|
|
||||||
return true;
|
|
||||||
} catch (Throwable e) {
|
|
||||||
throw new RuntimeException(e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public boolean sendMessage(
|
|
||||||
final Message message, String queueName, boolean durable_var, boolean autodelete_var)
|
|
||||||
throws Exception {
|
|
||||||
try {
|
|
||||||
Channel channel = getOrCreateChannel(queueName, durable_var, autodelete_var);
|
|
||||||
channel.basicPublish("", queueName, null, message.toString().getBytes());
|
|
||||||
return true;
|
|
||||||
} catch (Throwable e) {
|
|
||||||
throw new RuntimeException(e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public void startConsumingMessage(
|
|
||||||
final String queueName, final boolean durable, final boolean autodelete) throws Exception {
|
|
||||||
|
|
||||||
Channel channel = createChannel(createConnection(), queueName, durable, autodelete);
|
|
||||||
channel.basicConsume(queueName, false, new MessageConsumer(channel, queueMessages));
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,6 +0,0 @@
|
||||||
|
|
||||||
package eu.dnetlib.message;
|
|
||||||
|
|
||||||
public enum MessageType {
|
|
||||||
ONGOING, REPORT
|
|
||||||
}
|
|
File diff suppressed because one or more lines are too long
|
@ -7,10 +7,10 @@ import static org.junit.jupiter.api.Assertions.assertNotNull;
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
import org.junit.jupiter.api.Test;
|
import org.junit.jupiter.api.Test;
|
||||||
|
|
||||||
public class ArgumentApplicationParserTest {
|
class ArgumentApplicationParserTest {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testParseParameter() throws Exception {
|
void testParseParameter() throws Exception {
|
||||||
final String jsonConfiguration = IOUtils
|
final String jsonConfiguration = IOUtils
|
||||||
.toString(
|
.toString(
|
||||||
this.getClass().getResourceAsStream("/eu/dnetlib/application/parameters.json"));
|
this.getClass().getResourceAsStream("/eu/dnetlib/application/parameters.json"));
|
||||||
|
|
|
@ -21,13 +21,13 @@ public class HdfsSupportTest {
|
||||||
class Remove {
|
class Remove {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void shouldThrowARuntimeExceptionOnError() {
|
void shouldThrowARuntimeExceptionOnError() {
|
||||||
// when
|
// when
|
||||||
assertThrows(RuntimeException.class, () -> HdfsSupport.remove(null, new Configuration()));
|
assertThrows(RuntimeException.class, () -> HdfsSupport.remove(null, new Configuration()));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void shouldRemoveADirFromHDFS(@TempDir Path tempDir) {
|
void shouldRemoveADirFromHDFS(@TempDir Path tempDir) {
|
||||||
// when
|
// when
|
||||||
HdfsSupport.remove(tempDir.toString(), new Configuration());
|
HdfsSupport.remove(tempDir.toString(), new Configuration());
|
||||||
|
|
||||||
|
@ -36,7 +36,7 @@ public class HdfsSupportTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void shouldRemoveAFileFromHDFS(@TempDir Path tempDir) throws IOException {
|
void shouldRemoveAFileFromHDFS(@TempDir Path tempDir) throws IOException {
|
||||||
// given
|
// given
|
||||||
Path file = Files.createTempFile(tempDir, "p", "s");
|
Path file = Files.createTempFile(tempDir, "p", "s");
|
||||||
|
|
||||||
|
@ -52,13 +52,13 @@ public class HdfsSupportTest {
|
||||||
class ListFiles {
|
class ListFiles {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void shouldThrowARuntimeExceptionOnError() {
|
void shouldThrowARuntimeExceptionOnError() {
|
||||||
// when
|
// when
|
||||||
assertThrows(RuntimeException.class, () -> HdfsSupport.listFiles(null, new Configuration()));
|
assertThrows(RuntimeException.class, () -> HdfsSupport.listFiles(null, new Configuration()));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void shouldListFilesLocatedInPath(@TempDir Path tempDir) throws IOException {
|
void shouldListFilesLocatedInPath(@TempDir Path tempDir) throws IOException {
|
||||||
Path subDir1 = Files.createTempDirectory(tempDir, "list_me");
|
Path subDir1 = Files.createTempDirectory(tempDir, "list_me");
|
||||||
Path subDir2 = Files.createTempDirectory(tempDir, "list_me");
|
Path subDir2 = Files.createTempDirectory(tempDir, "list_me");
|
||||||
|
|
||||||
|
|
|
@ -5,10 +5,10 @@ import static org.junit.jupiter.api.Assertions.*;
|
||||||
|
|
||||||
import org.junit.jupiter.api.Test;
|
import org.junit.jupiter.api.Test;
|
||||||
|
|
||||||
public class PacePersonTest {
|
class PacePersonTest {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void pacePersonTest1() {
|
void pacePersonTest1() {
|
||||||
|
|
||||||
PacePerson p = new PacePerson("Artini, Michele", false);
|
PacePerson p = new PacePerson("Artini, Michele", false);
|
||||||
assertEquals("Artini", p.getSurnameString());
|
assertEquals("Artini", p.getSurnameString());
|
||||||
|
@ -17,7 +17,7 @@ public class PacePersonTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void pacePersonTest2() {
|
void pacePersonTest2() {
|
||||||
PacePerson p = new PacePerson("Michele G. Artini", false);
|
PacePerson p = new PacePerson("Michele G. Artini", false);
|
||||||
assertEquals("Artini, Michele G.", p.getNormalisedFullname());
|
assertEquals("Artini, Michele G.", p.getNormalisedFullname());
|
||||||
assertEquals("Michele G", p.getNameString());
|
assertEquals("Michele G", p.getNameString());
|
||||||
|
|
|
@ -18,7 +18,8 @@ public class SparkSessionSupportTest {
|
||||||
class RunWithSparkSession {
|
class RunWithSparkSession {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void shouldExecuteFunctionAndNotStopSparkSessionWhenSparkSessionIsNotManaged()
|
@SuppressWarnings("unchecked")
|
||||||
|
void shouldExecuteFunctionAndNotStopSparkSessionWhenSparkSessionIsNotManaged()
|
||||||
throws Exception {
|
throws Exception {
|
||||||
// given
|
// given
|
||||||
SparkSession spark = mock(SparkSession.class);
|
SparkSession spark = mock(SparkSession.class);
|
||||||
|
@ -37,7 +38,8 @@ public class SparkSessionSupportTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void shouldExecuteFunctionAndStopSparkSessionWhenSparkSessionIsManaged()
|
@SuppressWarnings("unchecked")
|
||||||
|
void shouldExecuteFunctionAndStopSparkSessionWhenSparkSessionIsManaged()
|
||||||
throws Exception {
|
throws Exception {
|
||||||
// given
|
// given
|
||||||
SparkSession spark = mock(SparkSession.class);
|
SparkSession spark = mock(SparkSession.class);
|
||||||
|
|
|
@ -12,7 +12,7 @@ import org.junit.jupiter.api.Disabled;
|
||||||
import org.junit.jupiter.api.Test;
|
import org.junit.jupiter.api.Test;
|
||||||
|
|
||||||
@Disabled
|
@Disabled
|
||||||
public class ZenodoAPIClientTest {
|
class ZenodoAPIClientTest {
|
||||||
|
|
||||||
private final String URL_STRING = "https://sandbox.zenodo.org/api/deposit/depositions";
|
private final String URL_STRING = "https://sandbox.zenodo.org/api/deposit/depositions";
|
||||||
private final String ACCESS_TOKEN = "";
|
private final String ACCESS_TOKEN = "";
|
||||||
|
@ -22,7 +22,7 @@ public class ZenodoAPIClientTest {
|
||||||
private final String depositionId = "674915";
|
private final String depositionId = "674915";
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testUploadOldDeposition() throws IOException, MissingConceptDoiException {
|
void testUploadOldDeposition() throws IOException, MissingConceptDoiException {
|
||||||
ZenodoAPIClient client = new ZenodoAPIClient(URL_STRING,
|
ZenodoAPIClient client = new ZenodoAPIClient(URL_STRING,
|
||||||
ACCESS_TOKEN);
|
ACCESS_TOKEN);
|
||||||
Assertions.assertEquals(200, client.uploadOpenDeposition(depositionId));
|
Assertions.assertEquals(200, client.uploadOpenDeposition(depositionId));
|
||||||
|
@ -44,7 +44,7 @@ public class ZenodoAPIClientTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testNewDeposition() throws IOException {
|
void testNewDeposition() throws IOException {
|
||||||
|
|
||||||
ZenodoAPIClient client = new ZenodoAPIClient(URL_STRING,
|
ZenodoAPIClient client = new ZenodoAPIClient(URL_STRING,
|
||||||
ACCESS_TOKEN);
|
ACCESS_TOKEN);
|
||||||
|
@ -67,7 +67,7 @@ public class ZenodoAPIClientTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testNewVersionNewName() throws IOException, MissingConceptDoiException {
|
void testNewVersionNewName() throws IOException, MissingConceptDoiException {
|
||||||
|
|
||||||
ZenodoAPIClient client = new ZenodoAPIClient(URL_STRING,
|
ZenodoAPIClient client = new ZenodoAPIClient(URL_STRING,
|
||||||
ACCESS_TOKEN);
|
ACCESS_TOKEN);
|
||||||
|
@ -87,7 +87,7 @@ public class ZenodoAPIClientTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testNewVersionOldName() throws IOException, MissingConceptDoiException {
|
void testNewVersionOldName() throws IOException, MissingConceptDoiException {
|
||||||
|
|
||||||
ZenodoAPIClient client = new ZenodoAPIClient(URL_STRING,
|
ZenodoAPIClient client = new ZenodoAPIClient(URL_STRING,
|
||||||
ACCESS_TOKEN);
|
ACCESS_TOKEN);
|
||||||
|
|
|
@ -1,16 +0,0 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.model.mdstore;
|
|
||||||
|
|
||||||
import static org.junit.jupiter.api.Assertions.assertTrue;
|
|
||||||
|
|
||||||
import org.junit.jupiter.api.Test;
|
|
||||||
|
|
||||||
public class MetadataRecordTest {
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void getTimestamp() {
|
|
||||||
|
|
||||||
MetadataRecord r = new MetadataRecord();
|
|
||||||
assertTrue(r.getDateOfCollection() > 0);
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -21,7 +21,7 @@ import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
||||||
import eu.dnetlib.pace.util.MapDocumentUtil;
|
import eu.dnetlib.pace.util.MapDocumentUtil;
|
||||||
import scala.Tuple2;
|
import scala.Tuple2;
|
||||||
|
|
||||||
public class AuthorMergerTest {
|
class AuthorMergerTest {
|
||||||
|
|
||||||
private String publicationsBasePath;
|
private String publicationsBasePath;
|
||||||
|
|
||||||
|
@ -43,7 +43,7 @@ public class AuthorMergerTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void mergeTest() { // used in the dedup: threshold set to 0.95
|
void mergeTest() { // used in the dedup: threshold set to 0.95
|
||||||
|
|
||||||
for (List<Author> authors1 : authors) {
|
for (List<Author> authors1 : authors) {
|
||||||
System.out.println("List " + (authors.indexOf(authors1) + 1));
|
System.out.println("List " + (authors.indexOf(authors1) + 1));
|
||||||
|
|
|
@ -0,0 +1,197 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.schema.oaf.utils;
|
||||||
|
|
||||||
|
import static org.junit.jupiter.api.Assertions.*;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.HashSet;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
|
import org.apache.commons.io.IOUtils;
|
||||||
|
import org.junit.jupiter.api.Test;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.databind.DeserializationFeature;
|
||||||
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Dataset;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.KeyValue;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Publication;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||||
|
import me.xuender.unidecode.Unidecode;
|
||||||
|
|
||||||
|
class OafMapperUtilsTest {
|
||||||
|
|
||||||
|
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper()
|
||||||
|
.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false);
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testUnidecode() {
|
||||||
|
|
||||||
|
assertEquals("Liu Ben Mu hiruzuSen tawa", Unidecode.decode("六本木ヒルズ森タワ"));
|
||||||
|
assertEquals("Nan Wu A Mi Tuo Fo", Unidecode.decode("南无阿弥陀佛"));
|
||||||
|
assertEquals("Yi Tiao Hui Zou Lu De Yu", Unidecode.decode("一条会走路的鱼"));
|
||||||
|
assertEquals("amidaniyorai", Unidecode.decode("あみだにょらい"));
|
||||||
|
assertEquals("T`owrk`iayi", Unidecode.decode("Թուրքիայի"));
|
||||||
|
assertEquals("Obzor tematiki", Unidecode.decode("Обзор тематики"));
|
||||||
|
assertEquals("GERMANSKIE IaZYKI", Unidecode.decode("ГЕРМАНСКИЕ ЯЗЫКИ"));
|
||||||
|
assertEquals("Diereunese tes ikanopoieses", Unidecode.decode("Διερεύνηση της ικανοποίησης"));
|
||||||
|
assertEquals("lqDy l'wly@", Unidecode.decode("القضايا الأولية"));
|
||||||
|
assertEquals("abc def ghi", Unidecode.decode("abc def ghi"));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void testDateValidation() {
|
||||||
|
|
||||||
|
assertTrue(GraphCleaningFunctions.doCleanDate("2016-05-07T12:41:19.202Z ").isPresent());
|
||||||
|
assertTrue(GraphCleaningFunctions.doCleanDate("2020-09-10 11:08:52 ").isPresent());
|
||||||
|
assertTrue(GraphCleaningFunctions.doCleanDate(" 2016-04-05").isPresent());
|
||||||
|
|
||||||
|
assertEquals("2016-04-05", GraphCleaningFunctions.doCleanDate("2016 Apr 05").get());
|
||||||
|
|
||||||
|
assertEquals("2009-05-08", GraphCleaningFunctions.doCleanDate("May 8, 2009 5:57:51 PM").get());
|
||||||
|
assertEquals("1970-10-07", GraphCleaningFunctions.doCleanDate("oct 7, 1970").get());
|
||||||
|
assertEquals("1970-10-07", GraphCleaningFunctions.doCleanDate("oct 7, '70").get());
|
||||||
|
assertEquals("1970-10-07", GraphCleaningFunctions.doCleanDate("oct. 7, 1970").get());
|
||||||
|
assertEquals("1970-10-07", GraphCleaningFunctions.doCleanDate("oct. 7, 70").get());
|
||||||
|
assertEquals("2006-01-02", GraphCleaningFunctions.doCleanDate("Mon Jan 2 15:04:05 2006").get());
|
||||||
|
assertEquals("2006-01-02", GraphCleaningFunctions.doCleanDate("Mon Jan 2 15:04:05 MST 2006").get());
|
||||||
|
assertEquals("2006-01-02", GraphCleaningFunctions.doCleanDate("Mon Jan 02 15:04:05 -0700 2006").get());
|
||||||
|
assertEquals("2006-01-02", GraphCleaningFunctions.doCleanDate("Monday, 02-Jan-06 15:04:05 MST").get());
|
||||||
|
assertEquals("2006-01-02", GraphCleaningFunctions.doCleanDate("Mon, 02 Jan 2006 15:04:05 MST").get());
|
||||||
|
assertEquals("2017-07-11", GraphCleaningFunctions.doCleanDate("Tue, 11 Jul 2017 16:28:13 +0200 (CEST)").get());
|
||||||
|
assertEquals("2006-01-02", GraphCleaningFunctions.doCleanDate("Mon, 02 Jan 2006 15:04:05 -0700").get());
|
||||||
|
assertEquals("2018-01-04", GraphCleaningFunctions.doCleanDate("Thu, 4 Jan 2018 17:53:36 +0000").get());
|
||||||
|
assertEquals("2015-08-10", GraphCleaningFunctions.doCleanDate("Mon Aug 10 15:44:11 UTC+0100 2015").get());
|
||||||
|
assertEquals(
|
||||||
|
"2015-07-03",
|
||||||
|
GraphCleaningFunctions.doCleanDate("Fri Jul 03 2015 18:04:07 GMT+0100 (GMT Daylight Time)").get());
|
||||||
|
assertEquals("2012-09-17", GraphCleaningFunctions.doCleanDate("September 17, 2012 10:09am").get());
|
||||||
|
assertEquals("2012-09-17", GraphCleaningFunctions.doCleanDate("September 17, 2012 at 10:09am PST-08").get());
|
||||||
|
assertEquals("2012-09-17", GraphCleaningFunctions.doCleanDate("September 17, 2012, 10:10:09").get());
|
||||||
|
assertEquals("1970-10-07", GraphCleaningFunctions.doCleanDate("October 7, 1970").get());
|
||||||
|
assertEquals("1970-10-07", GraphCleaningFunctions.doCleanDate("October 7th, 1970").get());
|
||||||
|
assertEquals("2006-02-12", GraphCleaningFunctions.doCleanDate("12 Feb 2006, 19:17").get());
|
||||||
|
assertEquals("2006-02-12", GraphCleaningFunctions.doCleanDate("12 Feb 2006 19:17").get());
|
||||||
|
assertEquals("1970-10-07", GraphCleaningFunctions.doCleanDate("7 oct 70").get());
|
||||||
|
assertEquals("1970-10-07", GraphCleaningFunctions.doCleanDate("7 oct 1970").get());
|
||||||
|
assertEquals("2013-02-03", GraphCleaningFunctions.doCleanDate("03 February 2013").get());
|
||||||
|
assertEquals("2013-07-01", GraphCleaningFunctions.doCleanDate("1 July 2013").get());
|
||||||
|
assertEquals("2013-02-03", GraphCleaningFunctions.doCleanDate("2013-Feb-03").get());
|
||||||
|
assertEquals("2014-03-31", GraphCleaningFunctions.doCleanDate("3/31/2014").get());
|
||||||
|
assertEquals("2014-03-31", GraphCleaningFunctions.doCleanDate("03/31/2014").get());
|
||||||
|
assertEquals("1971-08-21", GraphCleaningFunctions.doCleanDate("08/21/71").get());
|
||||||
|
assertEquals("1971-01-08", GraphCleaningFunctions.doCleanDate("8/1/71").get());
|
||||||
|
assertEquals("2014-08-04", GraphCleaningFunctions.doCleanDate("4/8/2014 22:05").get());
|
||||||
|
assertEquals("2014-08-04", GraphCleaningFunctions.doCleanDate("04/08/2014 22:05").get());
|
||||||
|
assertEquals("2014-08-04", GraphCleaningFunctions.doCleanDate("4/8/14 22:05").get());
|
||||||
|
assertEquals("2014-02-04", GraphCleaningFunctions.doCleanDate("04/2/2014 03:00:51").get());
|
||||||
|
assertEquals("1965-08-08", GraphCleaningFunctions.doCleanDate("8/8/1965 12:00:00 AM").get());
|
||||||
|
assertEquals("1965-08-08", GraphCleaningFunctions.doCleanDate("8/8/1965 01:00:01 PM").get());
|
||||||
|
assertEquals("1965-08-08", GraphCleaningFunctions.doCleanDate("8/8/1965 01:00 PM").get());
|
||||||
|
assertEquals("1965-08-08", GraphCleaningFunctions.doCleanDate("8/8/1965 1:00 PM").get());
|
||||||
|
assertEquals("1965-08-08", GraphCleaningFunctions.doCleanDate("8/8/1965 12:00 AM").get());
|
||||||
|
assertEquals("2014-02-04", GraphCleaningFunctions.doCleanDate("4/02/2014 03:00:51").get());
|
||||||
|
assertEquals("2012-03-19", GraphCleaningFunctions.doCleanDate("03/19/2012 10:11:59").get());
|
||||||
|
assertEquals("2012-03-19", GraphCleaningFunctions.doCleanDate("03/19/2012 10:11:59.3186369").get());
|
||||||
|
assertEquals("2014-03-31", GraphCleaningFunctions.doCleanDate("2014/3/31").get());
|
||||||
|
assertEquals("2014-03-31", GraphCleaningFunctions.doCleanDate("2014/03/31").get());
|
||||||
|
assertEquals("2014-04-08", GraphCleaningFunctions.doCleanDate("2014/4/8 22:05").get());
|
||||||
|
assertEquals("2014-04-08", GraphCleaningFunctions.doCleanDate("2014/04/08 22:05").get());
|
||||||
|
assertEquals("2014-04-02", GraphCleaningFunctions.doCleanDate("2014/04/2 03:00:51").get());
|
||||||
|
assertEquals("2014-04-02", GraphCleaningFunctions.doCleanDate("2014/4/02 03:00:51").get());
|
||||||
|
assertEquals("2012-03-19", GraphCleaningFunctions.doCleanDate("2012/03/19 10:11:59").get());
|
||||||
|
assertEquals("2012-03-19", GraphCleaningFunctions.doCleanDate("2012/03/19 10:11:59.3186369").get());
|
||||||
|
assertEquals("2014-04-08", GraphCleaningFunctions.doCleanDate("2014年04月08日").get());
|
||||||
|
assertEquals("2006-01-02", GraphCleaningFunctions.doCleanDate("2006-01-02T15:04:05+0000").get());
|
||||||
|
assertEquals("2009-08-13", GraphCleaningFunctions.doCleanDate("2009-08-12T22:15:09-07:00").get());
|
||||||
|
assertEquals("2009-08-12", GraphCleaningFunctions.doCleanDate("2009-08-12T22:15:09").get());
|
||||||
|
assertEquals("2009-08-12", GraphCleaningFunctions.doCleanDate("2009-08-12T22:15:09Z").get());
|
||||||
|
assertEquals("2014-04-26", GraphCleaningFunctions.doCleanDate("2014-04-26 17:24:37.3186369").get());
|
||||||
|
assertEquals("2012-08-03", GraphCleaningFunctions.doCleanDate("2012-08-03 18:31:59.257000000").get());
|
||||||
|
assertEquals("2014-04-26", GraphCleaningFunctions.doCleanDate("2014-04-26 17:24:37.123").get());
|
||||||
|
assertEquals("2013-04-01", GraphCleaningFunctions.doCleanDate("2013-04-01 22:43").get());
|
||||||
|
assertEquals("2013-04-01", GraphCleaningFunctions.doCleanDate("2013-04-01 22:43:22").get());
|
||||||
|
assertEquals("2014-12-16", GraphCleaningFunctions.doCleanDate("2014-12-16 06:20:00 UTC").get());
|
||||||
|
assertEquals("2014-12-16", GraphCleaningFunctions.doCleanDate("2014-12-16 06:20:00 GMT").get());
|
||||||
|
assertEquals("2014-04-26", GraphCleaningFunctions.doCleanDate("2014-04-26 05:24:37 PM").get());
|
||||||
|
assertEquals("2014-04-26", GraphCleaningFunctions.doCleanDate("2014-04-26 13:13:43 +0800").get());
|
||||||
|
assertEquals("2014-04-26", GraphCleaningFunctions.doCleanDate("2014-04-26 13:13:43 +0800 +08").get());
|
||||||
|
assertEquals("2014-04-26", GraphCleaningFunctions.doCleanDate("2014-04-26 13:13:44 +09:00").get());
|
||||||
|
assertEquals("2012-08-03", GraphCleaningFunctions.doCleanDate("2012-08-03 18:31:59.257000000 +0000 UTC").get());
|
||||||
|
assertEquals("2015-09-30", GraphCleaningFunctions.doCleanDate("2015-09-30 18:48:56.35272715 +0000 UTC").get());
|
||||||
|
assertEquals("2015-02-18", GraphCleaningFunctions.doCleanDate("2015-02-18 00:12:00 +0000 GMT").get());
|
||||||
|
assertEquals("2015-02-18", GraphCleaningFunctions.doCleanDate("2015-02-18 00:12:00 +0000 UTC").get());
|
||||||
|
assertEquals(
|
||||||
|
"2015-02-08", GraphCleaningFunctions.doCleanDate("2015-02-08 03:02:00 +0300 MSK m=+0.000000001").get());
|
||||||
|
assertEquals(
|
||||||
|
"2015-02-08", GraphCleaningFunctions.doCleanDate("2015-02-08 03:02:00.001 +0300 MSK m=+0.000000001").get());
|
||||||
|
assertEquals("2017-07-19", GraphCleaningFunctions.doCleanDate("2017-07-19 03:21:51+00:00").get());
|
||||||
|
assertEquals("2014-04-26", GraphCleaningFunctions.doCleanDate("2014-04-26").get());
|
||||||
|
assertEquals("2014-04-01", GraphCleaningFunctions.doCleanDate("2014-04").get());
|
||||||
|
assertEquals("2014-01-01", GraphCleaningFunctions.doCleanDate("2014").get());
|
||||||
|
assertEquals("2014-05-11", GraphCleaningFunctions.doCleanDate("2014-05-11 08:20:13,787").get());
|
||||||
|
assertEquals("2014-03-31", GraphCleaningFunctions.doCleanDate("3.31.2014").get());
|
||||||
|
assertEquals("2014-03-31", GraphCleaningFunctions.doCleanDate("03.31.2014").get());
|
||||||
|
assertEquals("1971-08-21", GraphCleaningFunctions.doCleanDate("08.21.71").get());
|
||||||
|
assertEquals("2014-03-01", GraphCleaningFunctions.doCleanDate("2014.03").get());
|
||||||
|
assertEquals("2014-03-30", GraphCleaningFunctions.doCleanDate("2014.03.30").get());
|
||||||
|
assertEquals("2014-06-01", GraphCleaningFunctions.doCleanDate("20140601").get());
|
||||||
|
assertEquals("2014-07-22", GraphCleaningFunctions.doCleanDate("20140722105203").get());
|
||||||
|
assertEquals("2012-03-19", GraphCleaningFunctions.doCleanDate("1332151919").get());
|
||||||
|
assertEquals("2013-11-12", GraphCleaningFunctions.doCleanDate("1384216367189").get());
|
||||||
|
assertEquals("2013-11-12", GraphCleaningFunctions.doCleanDate("1384216367111222").get());
|
||||||
|
assertEquals("2013-11-12", GraphCleaningFunctions.doCleanDate("1384216367111222333").get());
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void testDate() {
|
||||||
|
final String date = GraphCleaningFunctions.cleanDate("23-FEB-1998");
|
||||||
|
assertNotNull(date);
|
||||||
|
System.out.println(date);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void testMergePubs() throws IOException {
|
||||||
|
Publication p1 = read("publication_1.json", Publication.class);
|
||||||
|
Publication p2 = read("publication_2.json", Publication.class);
|
||||||
|
Dataset d1 = read("dataset_1.json", Dataset.class);
|
||||||
|
Dataset d2 = read("dataset_2.json", Dataset.class);
|
||||||
|
|
||||||
|
assertEquals(1, p1.getCollectedfrom().size());
|
||||||
|
assertEquals(ModelConstants.CROSSREF_ID, p1.getCollectedfrom().get(0).getKey());
|
||||||
|
assertEquals(1, d2.getCollectedfrom().size());
|
||||||
|
assertFalse(cfId(d2.getCollectedfrom()).contains(ModelConstants.CROSSREF_ID));
|
||||||
|
|
||||||
|
assertEquals(
|
||||||
|
ModelConstants.PUBLICATION_RESULTTYPE_CLASSID,
|
||||||
|
OafMapperUtils
|
||||||
|
.mergeResults(p1, d2)
|
||||||
|
.getResulttype()
|
||||||
|
.getClassid());
|
||||||
|
|
||||||
|
assertEquals(1, p2.getCollectedfrom().size());
|
||||||
|
assertFalse(cfId(p2.getCollectedfrom()).contains(ModelConstants.CROSSREF_ID));
|
||||||
|
assertEquals(1, d1.getCollectedfrom().size());
|
||||||
|
assertTrue(cfId(d1.getCollectedfrom()).contains(ModelConstants.CROSSREF_ID));
|
||||||
|
|
||||||
|
assertEquals(
|
||||||
|
ModelConstants.DATASET_RESULTTYPE_CLASSID,
|
||||||
|
OafMapperUtils
|
||||||
|
.mergeResults(p2, d1)
|
||||||
|
.getResulttype()
|
||||||
|
.getClassid());
|
||||||
|
}
|
||||||
|
|
||||||
|
protected HashSet<String> cfId(List<KeyValue> collectedfrom) {
|
||||||
|
return collectedfrom.stream().map(KeyValue::getKey).collect(Collectors.toCollection(HashSet::new));
|
||||||
|
}
|
||||||
|
|
||||||
|
protected <T extends Result> T read(String filename, Class<T> clazz) throws IOException {
|
||||||
|
final String json = IOUtils.toString(getClass().getResourceAsStream(filename));
|
||||||
|
return OBJECT_MAPPER.readValue(json, clazz);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -1,51 +0,0 @@
|
||||||
|
|
||||||
package eu.dnetlib.message;
|
|
||||||
|
|
||||||
import static org.junit.jupiter.api.Assertions.*;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.util.HashMap;
|
|
||||||
import java.util.Map;
|
|
||||||
|
|
||||||
import org.junit.jupiter.api.Test;
|
|
||||||
|
|
||||||
public class MessageTest {
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void fromJsonTest() throws IOException {
|
|
||||||
Message m = new Message();
|
|
||||||
m.setWorkflowId("wId");
|
|
||||||
m.setType(MessageType.ONGOING);
|
|
||||||
m.setJobName("Collection");
|
|
||||||
Map<String, String> body = new HashMap<>();
|
|
||||||
body.put("parsedItem", "300");
|
|
||||||
body.put("ExecutionTime", "30s");
|
|
||||||
|
|
||||||
m.setBody(body);
|
|
||||||
System.out.println("m = " + m);
|
|
||||||
Message m1 = Message.fromJson(m.toString());
|
|
||||||
assertEquals(m1.getWorkflowId(), m.getWorkflowId());
|
|
||||||
assertEquals(m1.getType(), m.getType());
|
|
||||||
assertEquals(m1.getJobName(), m.getJobName());
|
|
||||||
|
|
||||||
assertNotNull(m1.getBody());
|
|
||||||
m1.getBody().keySet().forEach(it -> assertEquals(m1.getBody().get(it), m.getBody().get(it)));
|
|
||||||
assertEquals(m1.getJobName(), m.getJobName());
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void toStringTest() {
|
|
||||||
final String expectedJson = "{\"workflowId\":\"wId\",\"jobName\":\"Collection\",\"type\":\"ONGOING\",\"body\":{\"ExecutionTime\":\"30s\",\"parsedItem\":\"300\"}}";
|
|
||||||
Message m = new Message();
|
|
||||||
m.setWorkflowId("wId");
|
|
||||||
m.setType(MessageType.ONGOING);
|
|
||||||
m.setJobName("Collection");
|
|
||||||
Map<String, String> body = new HashMap<>();
|
|
||||||
body.put("parsedItem", "300");
|
|
||||||
body.put("ExecutionTime", "30s");
|
|
||||||
|
|
||||||
m.setBody(body);
|
|
||||||
|
|
||||||
assertEquals(expectedJson, m.toString());
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -3,10 +3,10 @@ package eu.dnetlib.scholexplorer.relation;
|
||||||
|
|
||||||
import org.junit.jupiter.api.Test;
|
import org.junit.jupiter.api.Test;
|
||||||
|
|
||||||
public class RelationMapperTest {
|
class RelationMapperTest {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testLoadRels() throws Exception {
|
void testLoadRels() throws Exception {
|
||||||
|
|
||||||
RelationMapper relationMapper = RelationMapper.load();
|
RelationMapper relationMapper = RelationMapper.load();
|
||||||
relationMapper.keySet().forEach(System.out::println);
|
relationMapper.keySet().forEach(System.out::println);
|
||||||
|
|
|
@ -0,0 +1 @@
|
||||||
|
{"id":"50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1g", "resuttype" : { "classid" : "dataset" }, "pid":[{"qualifier":{"classid":"doi"},"value":"10.1016/j.cmet.2011.03.013"},{"qualifier":{"classid":"urn"},"value":"urn:nbn:nl:ui:29-f3ed5f9e-edf6-457e-8848-61b58a4075e2"},{"qualifier":{"classid":"scp-number"},"value":"79953761260"},{"qualifier":{"classid":"pmc"},"value":"21459329"}], "collectedfrom" : [ { "key" : "10|openaire____::081b82f96300b6a6e3d282bad31cb6e2", "value" : "Crossref"} ]}
|
|
@ -0,0 +1 @@
|
||||||
|
{"id":"50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1g", "resuttype" : { "classid" : "dataset" }, "pid":[{"qualifier":{"classid":"doi"},"value":"10.1016/j.cmet.2011.03.013"},{"qualifier":{"classid":"urn"},"value":"urn:nbn:nl:ui:29-f3ed5f9e-edf6-457e-8848-61b58a4075e2"},{"qualifier":{"classid":"scp-number"},"value":"79953761260"},{"qualifier":{"classid":"pmc"},"value":"21459329"}], "collectedfrom" : [ { "key" : "10|openaire____::081b82f96300b6a6e3d282bad31cb6e3", "value" : "Repository B"} ]}
|
|
@ -0,0 +1 @@
|
||||||
|
{"id":"50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f", "resuttype" : { "classid" : "publication" }, "pid":[{"qualifier":{"classid":"doi"},"value":"10.1016/j.cmet.2011.03.013"},{"qualifier":{"classid":"urn"},"value":"urn:nbn:nl:ui:29-f3ed5f9e-edf6-457e-8848-61b58a4075e2"},{"qualifier":{"classid":"scp-number"},"value":"79953761260"},{"qualifier":{"classid":"pmc"},"value":"21459329"}], "collectedfrom" : [ { "key" : "10|openaire____::081b82f96300b6a6e3d282bad31cb6e2", "value" : "Crossref"} ]}
|
|
@ -0,0 +1 @@
|
||||||
|
{"id":"50|DansKnawCris::0829b5191605bdbea36d6502b8c1ce1f", "resuttype" : { "classid" : "publication" }, "pid":[{"qualifier":{"classid":"doi"},"value":"10.1016/j.cmet.2011.03.013"},{"qualifier":{"classid":"urn"},"value":"urn:nbn:nl:ui:29-f3ed5f9e-edf6-457e-8848-61b58a4075e2"},{"qualifier":{"classid":"scp-number"},"value":"79953761260"},{"qualifier":{"classid":"pmc"},"value":"21459329"}], "collectedfrom" : [ { "key" : "10|openaire____::081b82f96300b6a6e3d282bad31cb6e3", "value" : "Repository A"} ]}
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -51,16 +51,6 @@
|
||||||
<artifactId>hadoop-distcp</artifactId>
|
<artifactId>hadoop-distcp</artifactId>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
<dependency>
|
|
||||||
<groupId>eu.dnetlib</groupId>
|
|
||||||
<artifactId>dnet-openaire-data-protos</artifactId>
|
|
||||||
</dependency>
|
|
||||||
|
|
||||||
<dependency>
|
|
||||||
<groupId>eu.dnetlib.dhp</groupId>
|
|
||||||
<artifactId>dhp-schemas</artifactId>
|
|
||||||
</dependency>
|
|
||||||
|
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>eu.dnetlib</groupId>
|
<groupId>eu.dnetlib</groupId>
|
||||||
<artifactId>dnet-actionmanager-api</artifactId>
|
<artifactId>dnet-actionmanager-api</artifactId>
|
||||||
|
|
|
@ -3,40 +3,37 @@ package eu.dnetlib.dhp.actionmanager;
|
||||||
|
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
import java.io.StringReader;
|
import java.io.StringReader;
|
||||||
import java.util.*;
|
import java.util.List;
|
||||||
|
import java.util.NoSuchElementException;
|
||||||
|
import java.util.Optional;
|
||||||
|
import java.util.Set;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
import org.apache.commons.lang3.tuple.Triple;
|
import org.apache.commons.lang3.tuple.Triple;
|
||||||
import org.dom4j.Document;
|
import org.dom4j.Document;
|
||||||
import org.dom4j.DocumentException;
|
import org.dom4j.DocumentException;
|
||||||
import org.dom4j.Element;
|
|
||||||
import org.dom4j.io.SAXReader;
|
import org.dom4j.io.SAXReader;
|
||||||
import org.jetbrains.annotations.NotNull;
|
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
import org.xml.sax.SAXException;
|
||||||
|
|
||||||
import com.google.common.base.Joiner;
|
import com.google.common.base.Joiner;
|
||||||
import com.google.common.base.Splitter;
|
import com.google.common.base.Splitter;
|
||||||
import com.google.common.collect.Iterables;
|
import com.google.common.collect.Iterables;
|
||||||
import com.google.common.collect.Lists;
|
|
||||||
import com.google.common.collect.Sets;
|
import com.google.common.collect.Sets;
|
||||||
|
|
||||||
import eu.dnetlib.actionmanager.rmi.ActionManagerException;
|
import eu.dnetlib.actionmanager.rmi.ActionManagerException;
|
||||||
import eu.dnetlib.actionmanager.set.ActionManagerSet;
|
|
||||||
import eu.dnetlib.actionmanager.set.ActionManagerSet.ImpactTypes;
|
|
||||||
import eu.dnetlib.dhp.actionmanager.partition.PartitionActionSetsByPayloadTypeJob;
|
|
||||||
import eu.dnetlib.dhp.utils.ISLookupClientFactory;
|
import eu.dnetlib.dhp.utils.ISLookupClientFactory;
|
||||||
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
|
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
|
||||||
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
|
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
|
||||||
import scala.Tuple2;
|
|
||||||
|
|
||||||
public class ISClient implements Serializable {
|
public class ISClient implements Serializable {
|
||||||
|
|
||||||
private static final Logger log = LoggerFactory.getLogger(PartitionActionSetsByPayloadTypeJob.class);
|
private static final Logger log = LoggerFactory.getLogger(ISClient.class);
|
||||||
|
|
||||||
private static final String INPUT_ACTION_SET_ID_SEPARATOR = ",";
|
private static final String INPUT_ACTION_SET_ID_SEPARATOR = ",";
|
||||||
|
|
||||||
private final ISLookUpService isLookup;
|
private final transient ISLookUpService isLookup;
|
||||||
|
|
||||||
public ISClient(String isLookupUrl) {
|
public ISClient(String isLookupUrl) {
|
||||||
isLookup = ISLookupClientFactory.getLookUpService(isLookupUrl);
|
isLookup = ISLookupClientFactory.getLookUpService(isLookupUrl);
|
||||||
|
@ -63,7 +60,7 @@ public class ISClient implements Serializable {
|
||||||
.map(
|
.map(
|
||||||
sets -> sets
|
sets -> sets
|
||||||
.stream()
|
.stream()
|
||||||
.map(set -> parseSetInfo(set))
|
.map(ISClient::parseSetInfo)
|
||||||
.filter(t -> ids.contains(t.getLeft()))
|
.filter(t -> ids.contains(t.getLeft()))
|
||||||
.map(t -> buildDirectory(basePath, t))
|
.map(t -> buildDirectory(basePath, t))
|
||||||
.collect(Collectors.toList()))
|
.collect(Collectors.toList()))
|
||||||
|
@ -73,15 +70,17 @@ public class ISClient implements Serializable {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private Triple<String, String, String> parseSetInfo(String set) {
|
private static Triple<String, String, String> parseSetInfo(String set) {
|
||||||
try {
|
try {
|
||||||
Document doc = new SAXReader().read(new StringReader(set));
|
final SAXReader reader = new SAXReader();
|
||||||
|
reader.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true);
|
||||||
|
Document doc = reader.read(new StringReader(set));
|
||||||
return Triple
|
return Triple
|
||||||
.of(
|
.of(
|
||||||
doc.valueOf("//SET/@id"),
|
doc.valueOf("//SET/@id"),
|
||||||
doc.valueOf("//SET/@directory"),
|
doc.valueOf("//SET/@directory"),
|
||||||
doc.valueOf("//SET/@latest"));
|
doc.valueOf("//SET/@latest"));
|
||||||
} catch (DocumentException e) {
|
} catch (DocumentException | SAXException e) {
|
||||||
throw new IllegalStateException(e);
|
throw new IllegalStateException(e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -99,7 +98,7 @@ public class ISClient implements Serializable {
|
||||||
final String q = "for $x in /RESOURCE_PROFILE[.//RESOURCE_TYPE/@value='ActionManagerServiceResourceType'] return $x//SERVICE_PROPERTIES/PROPERTY[./@ key='"
|
final String q = "for $x in /RESOURCE_PROFILE[.//RESOURCE_TYPE/@value='ActionManagerServiceResourceType'] return $x//SERVICE_PROPERTIES/PROPERTY[./@ key='"
|
||||||
+ propertyName
|
+ propertyName
|
||||||
+ "']/@value/string()";
|
+ "']/@value/string()";
|
||||||
log.debug("quering for service property: " + q);
|
log.debug("quering for service property: {}", q);
|
||||||
try {
|
try {
|
||||||
final List<String> value = isLookup.quickSearchProfile(q);
|
final List<String> value = isLookup.quickSearchProfile(q);
|
||||||
return Iterables.getOnlyElement(value);
|
return Iterables.getOnlyElement(value);
|
||||||
|
|
|
@ -1,69 +0,0 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.actionmanager.migration;
|
|
||||||
|
|
||||||
import java.util.Comparator;
|
|
||||||
|
|
||||||
import eu.dnetlib.data.proto.FieldTypeProtos.Qualifier;
|
|
||||||
|
|
||||||
public class LicenseComparator implements Comparator<Qualifier> {
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int compare(Qualifier left, Qualifier right) {
|
|
||||||
|
|
||||||
if (left == null && right == null)
|
|
||||||
return 0;
|
|
||||||
if (left == null)
|
|
||||||
return 1;
|
|
||||||
if (right == null)
|
|
||||||
return -1;
|
|
||||||
|
|
||||||
String lClass = left.getClassid();
|
|
||||||
String rClass = right.getClassid();
|
|
||||||
|
|
||||||
if (lClass.equals(rClass))
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
if (lClass.equals("OPEN SOURCE"))
|
|
||||||
return -1;
|
|
||||||
if (rClass.equals("OPEN SOURCE"))
|
|
||||||
return 1;
|
|
||||||
|
|
||||||
if (lClass.equals("OPEN"))
|
|
||||||
return -1;
|
|
||||||
if (rClass.equals("OPEN"))
|
|
||||||
return 1;
|
|
||||||
|
|
||||||
if (lClass.equals("6MONTHS"))
|
|
||||||
return -1;
|
|
||||||
if (rClass.equals("6MONTHS"))
|
|
||||||
return 1;
|
|
||||||
|
|
||||||
if (lClass.equals("12MONTHS"))
|
|
||||||
return -1;
|
|
||||||
if (rClass.equals("12MONTHS"))
|
|
||||||
return 1;
|
|
||||||
|
|
||||||
if (lClass.equals("EMBARGO"))
|
|
||||||
return -1;
|
|
||||||
if (rClass.equals("EMBARGO"))
|
|
||||||
return 1;
|
|
||||||
|
|
||||||
if (lClass.equals("RESTRICTED"))
|
|
||||||
return -1;
|
|
||||||
if (rClass.equals("RESTRICTED"))
|
|
||||||
return 1;
|
|
||||||
|
|
||||||
if (lClass.equals("CLOSED"))
|
|
||||||
return -1;
|
|
||||||
if (rClass.equals("CLOSED"))
|
|
||||||
return 1;
|
|
||||||
|
|
||||||
if (lClass.equals("UNKNOWN"))
|
|
||||||
return -1;
|
|
||||||
if (rClass.equals("UNKNOWN"))
|
|
||||||
return 1;
|
|
||||||
|
|
||||||
// Else (but unlikely), lexicographical ordering will do.
|
|
||||||
return lClass.compareTo(rClass);
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,196 +0,0 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.actionmanager.migration;
|
|
||||||
|
|
||||||
import java.io.File;
|
|
||||||
import java.io.FileOutputStream;
|
|
||||||
import java.io.OutputStream;
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.LinkedList;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.Properties;
|
|
||||||
import java.util.stream.Collectors;
|
|
||||||
|
|
||||||
import org.apache.commons.io.IOUtils;
|
|
||||||
import org.apache.commons.lang3.StringUtils;
|
|
||||||
import org.apache.hadoop.conf.Configuration;
|
|
||||||
import org.apache.hadoop.fs.FileSystem;
|
|
||||||
import org.apache.hadoop.fs.Path;
|
|
||||||
import org.apache.hadoop.tools.DistCp;
|
|
||||||
import org.apache.hadoop.tools.DistCpOptions;
|
|
||||||
import org.apache.hadoop.util.ToolRunner;
|
|
||||||
import org.slf4j.Logger;
|
|
||||||
import org.slf4j.LoggerFactory;
|
|
||||||
|
|
||||||
import com.google.common.base.Splitter;
|
|
||||||
import com.google.common.collect.Lists;
|
|
||||||
|
|
||||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
|
||||||
import eu.dnetlib.dhp.utils.ISLookupClientFactory;
|
|
||||||
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
|
|
||||||
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
|
|
||||||
|
|
||||||
public class MigrateActionSet {
|
|
||||||
|
|
||||||
private static final Logger log = LoggerFactory.getLogger(MigrateActionSet.class);
|
|
||||||
|
|
||||||
private static final String SEPARATOR = "/";
|
|
||||||
private static final String TARGET_PATHS = "target_paths";
|
|
||||||
private static final String RAWSET_PREFIX = "rawset_";
|
|
||||||
|
|
||||||
public static void main(String[] args) throws Exception {
|
|
||||||
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
|
|
||||||
IOUtils
|
|
||||||
.toString(
|
|
||||||
MigrateActionSet.class
|
|
||||||
.getResourceAsStream(
|
|
||||||
"/eu/dnetlib/dhp/actionmanager/migration/migrate_actionsets_parameters.json")));
|
|
||||||
parser.parseArgument(args);
|
|
||||||
|
|
||||||
new MigrateActionSet().run(parser);
|
|
||||||
}
|
|
||||||
|
|
||||||
private void run(ArgumentApplicationParser parser) throws Exception {
|
|
||||||
|
|
||||||
final String isLookupUrl = parser.get("isLookupUrl");
|
|
||||||
final String sourceNN = parser.get("sourceNameNode");
|
|
||||||
final String targetNN = parser.get("targetNameNode");
|
|
||||||
final String workDir = parser.get("workingDirectory");
|
|
||||||
final Integer distcp_num_maps = Integer.parseInt(parser.get("distcp_num_maps"));
|
|
||||||
|
|
||||||
final String distcp_memory_mb = parser.get("distcp_memory_mb");
|
|
||||||
final String distcp_task_timeout = parser.get("distcp_task_timeout");
|
|
||||||
|
|
||||||
final String transform_only_s = parser.get("transform_only");
|
|
||||||
|
|
||||||
log.info("transform only param: {}", transform_only_s);
|
|
||||||
|
|
||||||
final Boolean transformOnly = Boolean.valueOf(parser.get("transform_only"));
|
|
||||||
|
|
||||||
log.info("transform only: {}", transformOnly);
|
|
||||||
|
|
||||||
ISLookUpService isLookUp = ISLookupClientFactory.getLookUpService(isLookupUrl);
|
|
||||||
|
|
||||||
Configuration conf = getConfiguration(distcp_task_timeout, distcp_memory_mb, distcp_num_maps);
|
|
||||||
FileSystem targetFS = FileSystem.get(conf);
|
|
||||||
|
|
||||||
Configuration sourceConf = getConfiguration(distcp_task_timeout, distcp_memory_mb, distcp_num_maps);
|
|
||||||
sourceConf.set(FileSystem.FS_DEFAULT_NAME_KEY, sourceNN);
|
|
||||||
FileSystem sourceFS = FileSystem.get(sourceConf);
|
|
||||||
|
|
||||||
Properties props = new Properties();
|
|
||||||
|
|
||||||
List<Path> targetPaths = new ArrayList<>();
|
|
||||||
|
|
||||||
final List<Path> sourcePaths = getSourcePaths(sourceNN, isLookUp);
|
|
||||||
log
|
|
||||||
.info(
|
|
||||||
"paths to process:\n{}", sourcePaths
|
|
||||||
.stream()
|
|
||||||
.map(p -> p.toString())
|
|
||||||
.collect(Collectors.joining("\n")));
|
|
||||||
|
|
||||||
for (Path source : sourcePaths) {
|
|
||||||
|
|
||||||
if (!sourceFS.exists(source)) {
|
|
||||||
log.warn("skipping unexisting path: {}", source);
|
|
||||||
} else {
|
|
||||||
|
|
||||||
LinkedList<String> pathQ = Lists.newLinkedList(Splitter.on(SEPARATOR).split(source.toUri().getPath()));
|
|
||||||
|
|
||||||
final String rawSet = pathQ.pollLast();
|
|
||||||
log.info("got RAWSET: {}", rawSet);
|
|
||||||
|
|
||||||
if (StringUtils.isNotBlank(rawSet) && rawSet.startsWith(RAWSET_PREFIX)) {
|
|
||||||
|
|
||||||
final String actionSetDirectory = pathQ.pollLast();
|
|
||||||
|
|
||||||
final Path targetPath = new Path(
|
|
||||||
targetNN + workDir + SEPARATOR + actionSetDirectory + SEPARATOR + rawSet);
|
|
||||||
|
|
||||||
log.info("using TARGET PATH: {}", targetPath);
|
|
||||||
|
|
||||||
if (!transformOnly) {
|
|
||||||
if (targetFS.exists(targetPath)) {
|
|
||||||
targetFS.delete(targetPath, true);
|
|
||||||
}
|
|
||||||
runDistcp(
|
|
||||||
distcp_num_maps, distcp_memory_mb, distcp_task_timeout, conf, source, targetPath);
|
|
||||||
}
|
|
||||||
|
|
||||||
targetPaths.add(targetPath);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
final String targetPathsCsv = targetPaths.stream().map(p -> p.toString()).collect(Collectors.joining(","));
|
|
||||||
props.setProperty(TARGET_PATHS, targetPathsCsv);
|
|
||||||
File file = new File(System.getProperty("oozie.action.output.properties"));
|
|
||||||
|
|
||||||
try (OutputStream os = new FileOutputStream(file)) {
|
|
||||||
props.store(os, "");
|
|
||||||
}
|
|
||||||
System.out.println(file.getAbsolutePath());
|
|
||||||
}
|
|
||||||
|
|
||||||
private void runDistcp(
|
|
||||||
Integer distcp_num_maps,
|
|
||||||
String distcp_memory_mb,
|
|
||||||
String distcp_task_timeout,
|
|
||||||
Configuration conf,
|
|
||||||
Path source,
|
|
||||||
Path targetPath)
|
|
||||||
throws Exception {
|
|
||||||
|
|
||||||
final DistCpOptions op = new DistCpOptions(source, targetPath);
|
|
||||||
op.setMaxMaps(distcp_num_maps);
|
|
||||||
op.preserve(DistCpOptions.FileAttribute.BLOCKSIZE);
|
|
||||||
op.preserve(DistCpOptions.FileAttribute.REPLICATION);
|
|
||||||
op.preserve(DistCpOptions.FileAttribute.CHECKSUMTYPE);
|
|
||||||
|
|
||||||
int res = ToolRunner
|
|
||||||
.run(
|
|
||||||
new DistCp(conf, op),
|
|
||||||
new String[] {
|
|
||||||
"-Dmapred.task.timeout=" + distcp_task_timeout,
|
|
||||||
"-Dmapreduce.map.memory.mb=" + distcp_memory_mb,
|
|
||||||
"-pb",
|
|
||||||
"-m " + distcp_num_maps,
|
|
||||||
source.toString(),
|
|
||||||
targetPath.toString()
|
|
||||||
});
|
|
||||||
|
|
||||||
if (res != 0) {
|
|
||||||
throw new RuntimeException(String.format("distcp exited with code %s", res));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private Configuration getConfiguration(
|
|
||||||
String distcp_task_timeout, String distcp_memory_mb, Integer distcp_num_maps) {
|
|
||||||
final Configuration conf = new Configuration();
|
|
||||||
conf.set("dfs.webhdfs.socket.connect-timeout", distcp_task_timeout);
|
|
||||||
conf.set("dfs.webhdfs.socket.read-timeout", distcp_task_timeout);
|
|
||||||
conf.set("dfs.http.client.retry.policy.enabled", "true");
|
|
||||||
conf.set("mapred.task.timeout", distcp_task_timeout);
|
|
||||||
conf.set("mapreduce.map.memory.mb", distcp_memory_mb);
|
|
||||||
conf.set("mapred.map.tasks", String.valueOf(distcp_num_maps));
|
|
||||||
return conf;
|
|
||||||
}
|
|
||||||
|
|
||||||
private List<Path> getSourcePaths(String sourceNN, ISLookUpService isLookUp)
|
|
||||||
throws ISLookUpException {
|
|
||||||
String XQUERY = "distinct-values(\n"
|
|
||||||
+ "let $basePath := collection('/db/DRIVER/ServiceResources/ActionManagerServiceResourceType')//SERVICE_PROPERTIES/PROPERTY[@key = 'basePath']/@value/string()\n"
|
|
||||||
+ "for $x in collection('/db/DRIVER/ActionManagerSetDSResources/ActionManagerSetDSResourceType') \n"
|
|
||||||
+ "let $setDir := $x//SET/@directory/string()\n"
|
|
||||||
+ "let $rawSet := $x//RAW_SETS/LATEST/@id/string()\n"
|
|
||||||
+ "return concat($basePath, '/', $setDir, '/', $rawSet))";
|
|
||||||
|
|
||||||
log.info(String.format("running xquery:\n%s", XQUERY));
|
|
||||||
return isLookUp
|
|
||||||
.quickSearchProfile(XQUERY)
|
|
||||||
.stream()
|
|
||||||
.map(p -> sourceNN + p)
|
|
||||||
.map(Path::new)
|
|
||||||
.collect(Collectors.toList());
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,710 +0,0 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.actionmanager.migration;
|
|
||||||
|
|
||||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.*;
|
|
||||||
|
|
||||||
import java.io.Serializable;
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.Optional;
|
|
||||||
import java.util.stream.Collectors;
|
|
||||||
|
|
||||||
import org.apache.commons.lang3.StringUtils;
|
|
||||||
|
|
||||||
import com.google.common.collect.Lists;
|
|
||||||
import com.googlecode.protobuf.format.JsonFormat;
|
|
||||||
|
|
||||||
import eu.dnetlib.data.proto.*;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.*;
|
|
||||||
|
|
||||||
public class ProtoConverter implements Serializable {
|
|
||||||
|
|
||||||
public static Oaf convert(OafProtos.Oaf oaf) {
|
|
||||||
try {
|
|
||||||
switch (oaf.getKind()) {
|
|
||||||
case entity:
|
|
||||||
return convertEntity(oaf);
|
|
||||||
case relation:
|
|
||||||
return convertRelation(oaf);
|
|
||||||
default:
|
|
||||||
throw new IllegalArgumentException("invalid kind " + oaf.getKind());
|
|
||||||
}
|
|
||||||
} catch (Throwable e) {
|
|
||||||
throw new RuntimeException("error on getting " + JsonFormat.printToString(oaf), e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private static Relation convertRelation(OafProtos.Oaf oaf) {
|
|
||||||
final OafProtos.OafRel r = oaf.getRel();
|
|
||||||
final Relation rel = new Relation();
|
|
||||||
rel.setDataInfo(mapDataInfo(oaf.getDataInfo()));
|
|
||||||
rel.setLastupdatetimestamp(oaf.getLastupdatetimestamp());
|
|
||||||
rel.setSource(r.getSource());
|
|
||||||
rel.setTarget(r.getTarget());
|
|
||||||
rel.setRelType(r.getRelType().toString());
|
|
||||||
rel.setSubRelType(r.getSubRelType().toString());
|
|
||||||
rel.setRelClass(r.getRelClass());
|
|
||||||
rel
|
|
||||||
.setCollectedfrom(
|
|
||||||
r.getCollectedfromCount() > 0
|
|
||||||
? r.getCollectedfromList().stream().map(kv -> mapKV(kv)).collect(Collectors.toList())
|
|
||||||
: null);
|
|
||||||
return rel;
|
|
||||||
}
|
|
||||||
|
|
||||||
private static OafEntity convertEntity(OafProtos.Oaf oaf) {
|
|
||||||
|
|
||||||
switch (oaf.getEntity().getType()) {
|
|
||||||
case result:
|
|
||||||
final Result r = convertResult(oaf);
|
|
||||||
r.setInstance(convertInstances(oaf));
|
|
||||||
r.setExternalReference(convertExternalRefs(oaf));
|
|
||||||
return r;
|
|
||||||
case project:
|
|
||||||
return convertProject(oaf);
|
|
||||||
case datasource:
|
|
||||||
return convertDataSource(oaf);
|
|
||||||
case organization:
|
|
||||||
return convertOrganization(oaf);
|
|
||||||
default:
|
|
||||||
throw new RuntimeException("received unknown type");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private static List<Instance> convertInstances(OafProtos.Oaf oaf) {
|
|
||||||
|
|
||||||
final ResultProtos.Result r = oaf.getEntity().getResult();
|
|
||||||
if (r.getInstanceCount() > 0) {
|
|
||||||
return r.getInstanceList().stream().map(i -> convertInstance(i)).collect(Collectors.toList());
|
|
||||||
}
|
|
||||||
return Lists.newArrayList();
|
|
||||||
}
|
|
||||||
|
|
||||||
private static Instance convertInstance(ResultProtos.Result.Instance ri) {
|
|
||||||
final Instance i = new Instance();
|
|
||||||
i.setAccessright(mapQualifier(ri.getAccessright()));
|
|
||||||
i.setCollectedfrom(mapKV(ri.getCollectedfrom()));
|
|
||||||
i.setDateofacceptance(mapStringField(ri.getDateofacceptance()));
|
|
||||||
i.setDistributionlocation(ri.getDistributionlocation());
|
|
||||||
i.setHostedby(mapKV(ri.getHostedby()));
|
|
||||||
i.setInstancetype(mapQualifier(ri.getInstancetype()));
|
|
||||||
i.setLicense(mapStringField(ri.getLicense()));
|
|
||||||
i
|
|
||||||
.setUrl(
|
|
||||||
ri.getUrlList() != null ? ri
|
|
||||||
.getUrlList()
|
|
||||||
.stream()
|
|
||||||
.distinct()
|
|
||||||
.collect(Collectors.toCollection(ArrayList::new)) : null);
|
|
||||||
i.setRefereed(mapRefereed(ri.getRefereed()));
|
|
||||||
i.setProcessingchargeamount(mapStringField(ri.getProcessingchargeamount()));
|
|
||||||
i.setProcessingchargecurrency(mapStringField(ri.getProcessingchargecurrency()));
|
|
||||||
return i;
|
|
||||||
}
|
|
||||||
|
|
||||||
private static Qualifier mapRefereed(FieldTypeProtos.StringField refereed) {
|
|
||||||
Qualifier q = new Qualifier();
|
|
||||||
q.setClassid(refereed.getValue());
|
|
||||||
q.setSchemename(refereed.getValue());
|
|
||||||
q.setSchemeid("dnet:review_levels");
|
|
||||||
q.setSchemename("dnet:review_levels");
|
|
||||||
return q;
|
|
||||||
}
|
|
||||||
|
|
||||||
private static List<ExternalReference> convertExternalRefs(OafProtos.Oaf oaf) {
|
|
||||||
ResultProtos.Result r = oaf.getEntity().getResult();
|
|
||||||
if (r.getExternalReferenceCount() > 0) {
|
|
||||||
return r
|
|
||||||
.getExternalReferenceList()
|
|
||||||
.stream()
|
|
||||||
.map(e -> convertExtRef(e))
|
|
||||||
.collect(Collectors.toList());
|
|
||||||
}
|
|
||||||
return Lists.newArrayList();
|
|
||||||
}
|
|
||||||
|
|
||||||
private static ExternalReference convertExtRef(ResultProtos.Result.ExternalReference e) {
|
|
||||||
ExternalReference ex = new ExternalReference();
|
|
||||||
ex.setUrl(e.getUrl());
|
|
||||||
ex.setSitename(e.getSitename());
|
|
||||||
ex.setRefidentifier(e.getRefidentifier());
|
|
||||||
ex.setQuery(e.getQuery());
|
|
||||||
ex.setQualifier(mapQualifier(e.getQualifier()));
|
|
||||||
ex.setLabel(e.getLabel());
|
|
||||||
ex.setDescription(e.getDescription());
|
|
||||||
ex.setDataInfo(ex.getDataInfo());
|
|
||||||
return ex;
|
|
||||||
}
|
|
||||||
|
|
||||||
private static Organization convertOrganization(OafProtos.Oaf oaf) {
|
|
||||||
final OrganizationProtos.Organization.Metadata m = oaf.getEntity().getOrganization().getMetadata();
|
|
||||||
final Organization org = setOaf(new Organization(), oaf);
|
|
||||||
setEntity(org, oaf);
|
|
||||||
org.setLegalshortname(mapStringField(m.getLegalshortname()));
|
|
||||||
org.setLegalname(mapStringField(m.getLegalname()));
|
|
||||||
org
|
|
||||||
.setAlternativeNames(
|
|
||||||
m
|
|
||||||
.getAlternativeNamesList()
|
|
||||||
.stream()
|
|
||||||
.map(ProtoConverter::mapStringField)
|
|
||||||
.collect(Collectors.toList()));
|
|
||||||
org.setWebsiteurl(mapStringField(m.getWebsiteurl()));
|
|
||||||
org.setLogourl(mapStringField(m.getLogourl()));
|
|
||||||
org.setEclegalbody(mapStringField(m.getEclegalbody()));
|
|
||||||
org.setEclegalperson(mapStringField(m.getEclegalperson()));
|
|
||||||
org.setEcnonprofit(mapStringField(m.getEcnonprofit()));
|
|
||||||
org.setEcresearchorganization(mapStringField(m.getEcresearchorganization()));
|
|
||||||
org.setEchighereducation(mapStringField(m.getEchighereducation()));
|
|
||||||
org
|
|
||||||
.setEcinternationalorganizationeurinterests(
|
|
||||||
mapStringField(m.getEcinternationalorganizationeurinterests()));
|
|
||||||
org.setEcinternationalorganization(mapStringField(m.getEcinternationalorganization()));
|
|
||||||
org.setEcenterprise(mapStringField(m.getEcenterprise()));
|
|
||||||
org.setEcsmevalidated(mapStringField(m.getEcsmevalidated()));
|
|
||||||
org.setEcnutscode(mapStringField(m.getEcnutscode()));
|
|
||||||
org.setCountry(mapQualifier(m.getCountry()));
|
|
||||||
|
|
||||||
return org;
|
|
||||||
}
|
|
||||||
|
|
||||||
private static Datasource convertDataSource(OafProtos.Oaf oaf) {
|
|
||||||
final DatasourceProtos.Datasource.Metadata m = oaf.getEntity().getDatasource().getMetadata();
|
|
||||||
final Datasource datasource = setOaf(new Datasource(), oaf);
|
|
||||||
setEntity(datasource, oaf);
|
|
||||||
datasource
|
|
||||||
.setAccessinfopackage(
|
|
||||||
m
|
|
||||||
.getAccessinfopackageList()
|
|
||||||
.stream()
|
|
||||||
.map(ProtoConverter::mapStringField)
|
|
||||||
.collect(Collectors.toList()));
|
|
||||||
datasource.setCertificates(mapStringField(m.getCertificates()));
|
|
||||||
datasource.setCitationguidelineurl(mapStringField(m.getCitationguidelineurl()));
|
|
||||||
datasource.setContactemail(mapStringField(m.getContactemail()));
|
|
||||||
datasource.setDatabaseaccessrestriction(mapStringField(m.getDatabaseaccessrestriction()));
|
|
||||||
datasource.setDatabaseaccesstype(mapStringField(m.getDatabaseaccesstype()));
|
|
||||||
datasource.setDataprovider(mapBoolField(m.getDataprovider()));
|
|
||||||
datasource.setDatasourcetype(mapQualifier(m.getDatasourcetype()));
|
|
||||||
datasource.setDatauploadrestriction(mapStringField(m.getDatauploadrestriction()));
|
|
||||||
datasource.setCitationguidelineurl(mapStringField(m.getCitationguidelineurl()));
|
|
||||||
datasource.setDatauploadtype(mapStringField(m.getDatauploadtype()));
|
|
||||||
datasource.setDateofvalidation(mapStringField(m.getDateofvalidation()));
|
|
||||||
datasource.setDescription(mapStringField(m.getDescription()));
|
|
||||||
datasource.setEnglishname(mapStringField(m.getEnglishname()));
|
|
||||||
datasource.setLatitude(mapStringField(m.getLatitude()));
|
|
||||||
datasource.setLongitude(mapStringField(m.getLongitude()));
|
|
||||||
datasource.setLogourl(mapStringField(m.getLogourl()));
|
|
||||||
datasource.setMissionstatementurl(mapStringField(m.getMissionstatementurl()));
|
|
||||||
datasource.setNamespaceprefix(mapStringField(m.getNamespaceprefix()));
|
|
||||||
datasource
|
|
||||||
.setOdcontenttypes(
|
|
||||||
m
|
|
||||||
.getOdcontenttypesList()
|
|
||||||
.stream()
|
|
||||||
.map(ProtoConverter::mapStringField)
|
|
||||||
.collect(Collectors.toList()));
|
|
||||||
datasource
|
|
||||||
.setOdlanguages(
|
|
||||||
m
|
|
||||||
.getOdlanguagesList()
|
|
||||||
.stream()
|
|
||||||
.map(ProtoConverter::mapStringField)
|
|
||||||
.collect(Collectors.toList()));
|
|
||||||
datasource.setOdnumberofitems(mapStringField(m.getOdnumberofitems()));
|
|
||||||
datasource.setOdnumberofitemsdate(mapStringField(m.getOdnumberofitemsdate()));
|
|
||||||
datasource.setOdpolicies(mapStringField(m.getOdpolicies()));
|
|
||||||
datasource.setOfficialname(mapStringField(m.getOfficialname()));
|
|
||||||
datasource.setOpenairecompatibility(mapQualifier(m.getOpenairecompatibility()));
|
|
||||||
datasource.setPidsystems(mapStringField(m.getPidsystems()));
|
|
||||||
datasource
|
|
||||||
.setPolicies(
|
|
||||||
m.getPoliciesList().stream().map(ProtoConverter::mapKV).collect(Collectors.toList()));
|
|
||||||
datasource.setQualitymanagementkind(mapStringField(m.getQualitymanagementkind()));
|
|
||||||
datasource.setReleaseenddate(mapStringField(m.getReleaseenddate()));
|
|
||||||
datasource.setServiceprovider(mapBoolField(m.getServiceprovider()));
|
|
||||||
datasource.setReleasestartdate(mapStringField(m.getReleasestartdate()));
|
|
||||||
datasource
|
|
||||||
.setSubjects(
|
|
||||||
m
|
|
||||||
.getSubjectsList()
|
|
||||||
.stream()
|
|
||||||
.map(ProtoConverter::mapStructuredProperty)
|
|
||||||
.collect(Collectors.toList()));
|
|
||||||
datasource.setVersioning(mapBoolField(m.getVersioning()));
|
|
||||||
datasource.setWebsiteurl(mapStringField(m.getWebsiteurl()));
|
|
||||||
datasource.setJournal(mapJournal(m.getJournal()));
|
|
||||||
|
|
||||||
return datasource;
|
|
||||||
}
|
|
||||||
|
|
||||||
private static Project convertProject(OafProtos.Oaf oaf) {
|
|
||||||
final ProjectProtos.Project.Metadata m = oaf.getEntity().getProject().getMetadata();
|
|
||||||
final Project project = setOaf(new Project(), oaf);
|
|
||||||
setEntity(project, oaf);
|
|
||||||
project.setAcronym(mapStringField(m.getAcronym()));
|
|
||||||
project.setCallidentifier(mapStringField(m.getCallidentifier()));
|
|
||||||
project.setCode(mapStringField(m.getCode()));
|
|
||||||
project.setContactemail(mapStringField(m.getContactemail()));
|
|
||||||
project.setContactfax(mapStringField(m.getContactfax()));
|
|
||||||
project.setContactfullname(mapStringField(m.getContactfullname()));
|
|
||||||
project.setContactphone(mapStringField(m.getContactphone()));
|
|
||||||
project.setContracttype(mapQualifier(m.getContracttype()));
|
|
||||||
project.setCurrency(mapStringField(m.getCurrency()));
|
|
||||||
project.setDuration(mapStringField(m.getDuration()));
|
|
||||||
project.setEcarticle29_3(mapStringField(m.getEcarticle293()));
|
|
||||||
project.setEcsc39(mapStringField(m.getEcsc39()));
|
|
||||||
project.setOamandatepublications(mapStringField(m.getOamandatepublications()));
|
|
||||||
project.setStartdate(mapStringField(m.getStartdate()));
|
|
||||||
project.setEnddate(mapStringField(m.getEnddate()));
|
|
||||||
project.setFundedamount(m.getFundedamount());
|
|
||||||
project.setTotalcost(m.getTotalcost());
|
|
||||||
project.setKeywords(mapStringField(m.getKeywords()));
|
|
||||||
project
|
|
||||||
.setSubjects(
|
|
||||||
m
|
|
||||||
.getSubjectsList()
|
|
||||||
.stream()
|
|
||||||
.map(sp -> mapStructuredProperty(sp))
|
|
||||||
.collect(Collectors.toList()));
|
|
||||||
project.setTitle(mapStringField(m.getTitle()));
|
|
||||||
project.setWebsiteurl(mapStringField(m.getWebsiteurl()));
|
|
||||||
project
|
|
||||||
.setFundingtree(
|
|
||||||
m.getFundingtreeList().stream().map(f -> mapStringField(f)).collect(Collectors.toList()));
|
|
||||||
project.setJsonextrainfo(mapStringField(m.getJsonextrainfo()));
|
|
||||||
project.setSummary(mapStringField(m.getSummary()));
|
|
||||||
project.setOptional1(mapStringField(m.getOptional1()));
|
|
||||||
project.setOptional2(mapStringField(m.getOptional2()));
|
|
||||||
return project;
|
|
||||||
}
|
|
||||||
|
|
||||||
private static Result convertResult(OafProtos.Oaf oaf) {
|
|
||||||
switch (oaf.getEntity().getResult().getMetadata().getResulttype().getClassid()) {
|
|
||||||
case "dataset":
|
|
||||||
return createDataset(oaf);
|
|
||||||
case "publication":
|
|
||||||
return createPublication(oaf);
|
|
||||||
case "software":
|
|
||||||
return createSoftware(oaf);
|
|
||||||
case "other":
|
|
||||||
return createORP(oaf);
|
|
||||||
default:
|
|
||||||
Result result = setOaf(new Result(), oaf);
|
|
||||||
setEntity(result, oaf);
|
|
||||||
return setResult(result, oaf);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private static Software createSoftware(OafProtos.Oaf oaf) {
|
|
||||||
ResultProtos.Result.Metadata m = oaf.getEntity().getResult().getMetadata();
|
|
||||||
Software software = setOaf(new Software(), oaf);
|
|
||||||
setEntity(software, oaf);
|
|
||||||
setResult(software, oaf);
|
|
||||||
|
|
||||||
software
|
|
||||||
.setDocumentationUrl(
|
|
||||||
m
|
|
||||||
.getDocumentationUrlList()
|
|
||||||
.stream()
|
|
||||||
.map(ProtoConverter::mapStringField)
|
|
||||||
.collect(Collectors.toList()));
|
|
||||||
software
|
|
||||||
.setLicense(
|
|
||||||
m
|
|
||||||
.getLicenseList()
|
|
||||||
.stream()
|
|
||||||
.map(ProtoConverter::mapStructuredProperty)
|
|
||||||
.collect(Collectors.toList()));
|
|
||||||
software.setCodeRepositoryUrl(mapStringField(m.getCodeRepositoryUrl()));
|
|
||||||
software.setProgrammingLanguage(mapQualifier(m.getProgrammingLanguage()));
|
|
||||||
return software;
|
|
||||||
}
|
|
||||||
|
|
||||||
private static OtherResearchProduct createORP(OafProtos.Oaf oaf) {
|
|
||||||
ResultProtos.Result.Metadata m = oaf.getEntity().getResult().getMetadata();
|
|
||||||
OtherResearchProduct otherResearchProducts = setOaf(new OtherResearchProduct(), oaf);
|
|
||||||
setEntity(otherResearchProducts, oaf);
|
|
||||||
setResult(otherResearchProducts, oaf);
|
|
||||||
otherResearchProducts
|
|
||||||
.setContactperson(
|
|
||||||
m
|
|
||||||
.getContactpersonList()
|
|
||||||
.stream()
|
|
||||||
.map(ProtoConverter::mapStringField)
|
|
||||||
.collect(Collectors.toList()));
|
|
||||||
otherResearchProducts
|
|
||||||
.setContactgroup(
|
|
||||||
m
|
|
||||||
.getContactgroupList()
|
|
||||||
.stream()
|
|
||||||
.map(ProtoConverter::mapStringField)
|
|
||||||
.collect(Collectors.toList()));
|
|
||||||
otherResearchProducts
|
|
||||||
.setTool(
|
|
||||||
m.getToolList().stream().map(ProtoConverter::mapStringField).collect(Collectors.toList()));
|
|
||||||
|
|
||||||
return otherResearchProducts;
|
|
||||||
}
|
|
||||||
|
|
||||||
private static Publication createPublication(OafProtos.Oaf oaf) {
|
|
||||||
|
|
||||||
ResultProtos.Result.Metadata m = oaf.getEntity().getResult().getMetadata();
|
|
||||||
Publication publication = setOaf(new Publication(), oaf);
|
|
||||||
setEntity(publication, oaf);
|
|
||||||
setResult(publication, oaf);
|
|
||||||
publication.setJournal(mapJournal(m.getJournal()));
|
|
||||||
return publication;
|
|
||||||
}
|
|
||||||
|
|
||||||
private static Dataset createDataset(OafProtos.Oaf oaf) {
|
|
||||||
|
|
||||||
ResultProtos.Result.Metadata m = oaf.getEntity().getResult().getMetadata();
|
|
||||||
Dataset dataset = setOaf(new Dataset(), oaf);
|
|
||||||
setEntity(dataset, oaf);
|
|
||||||
setResult(dataset, oaf);
|
|
||||||
dataset.setStoragedate(mapStringField(m.getStoragedate()));
|
|
||||||
dataset.setDevice(mapStringField(m.getDevice()));
|
|
||||||
dataset.setSize(mapStringField(m.getSize()));
|
|
||||||
dataset.setVersion(mapStringField(m.getVersion()));
|
|
||||||
dataset.setLastmetadataupdate(mapStringField(m.getLastmetadataupdate()));
|
|
||||||
dataset.setMetadataversionnumber(mapStringField(m.getMetadataversionnumber()));
|
|
||||||
dataset
|
|
||||||
.setGeolocation(
|
|
||||||
m
|
|
||||||
.getGeolocationList()
|
|
||||||
.stream()
|
|
||||||
.map(ProtoConverter::mapGeolocation)
|
|
||||||
.collect(Collectors.toList()));
|
|
||||||
return dataset;
|
|
||||||
}
|
|
||||||
|
|
||||||
public static <T extends Oaf> T setOaf(T oaf, OafProtos.Oaf o) {
|
|
||||||
oaf.setDataInfo(mapDataInfo(o.getDataInfo()));
|
|
||||||
oaf.setLastupdatetimestamp(o.getLastupdatetimestamp());
|
|
||||||
return oaf;
|
|
||||||
}
|
|
||||||
|
|
||||||
public static <T extends OafEntity> T setEntity(T entity, OafProtos.Oaf oaf) {
|
|
||||||
// setting Entity fields
|
|
||||||
final OafProtos.OafEntity e = oaf.getEntity();
|
|
||||||
entity.setId(e.getId());
|
|
||||||
entity.setOriginalId(e.getOriginalIdList());
|
|
||||||
entity
|
|
||||||
.setCollectedfrom(
|
|
||||||
e.getCollectedfromList().stream().map(ProtoConverter::mapKV).collect(Collectors.toList()));
|
|
||||||
entity
|
|
||||||
.setPid(
|
|
||||||
e
|
|
||||||
.getPidList()
|
|
||||||
.stream()
|
|
||||||
.map(ProtoConverter::mapStructuredProperty)
|
|
||||||
.collect(Collectors.toList()));
|
|
||||||
entity.setDateofcollection(e.getDateofcollection());
|
|
||||||
entity.setDateoftransformation(e.getDateoftransformation());
|
|
||||||
entity
|
|
||||||
.setExtraInfo(
|
|
||||||
e
|
|
||||||
.getExtraInfoList()
|
|
||||||
.stream()
|
|
||||||
.map(ProtoConverter::mapExtraInfo)
|
|
||||||
.collect(Collectors.toList()));
|
|
||||||
return entity;
|
|
||||||
}
|
|
||||||
|
|
||||||
public static <T extends Result> T setResult(T entity, OafProtos.Oaf oaf) {
|
|
||||||
// setting Entity fields
|
|
||||||
final ResultProtos.Result.Metadata m = oaf.getEntity().getResult().getMetadata();
|
|
||||||
entity
|
|
||||||
.setAuthor(
|
|
||||||
m.getAuthorList().stream().map(ProtoConverter::mapAuthor).collect(Collectors.toList()));
|
|
||||||
entity.setResulttype(mapQualifier(m.getResulttype()));
|
|
||||||
entity.setLanguage(mapQualifier(m.getLanguage()));
|
|
||||||
entity
|
|
||||||
.setCountry(
|
|
||||||
m
|
|
||||||
.getCountryList()
|
|
||||||
.stream()
|
|
||||||
.map(ProtoConverter::mapQualifierAsCountry)
|
|
||||||
.collect(Collectors.toList()));
|
|
||||||
entity
|
|
||||||
.setSubject(
|
|
||||||
m
|
|
||||||
.getSubjectList()
|
|
||||||
.stream()
|
|
||||||
.map(ProtoConverter::mapStructuredProperty)
|
|
||||||
.collect(Collectors.toList()));
|
|
||||||
entity
|
|
||||||
.setTitle(
|
|
||||||
m
|
|
||||||
.getTitleList()
|
|
||||||
.stream()
|
|
||||||
.map(ProtoConverter::mapStructuredProperty)
|
|
||||||
.collect(Collectors.toList()));
|
|
||||||
entity
|
|
||||||
.setRelevantdate(
|
|
||||||
m
|
|
||||||
.getRelevantdateList()
|
|
||||||
.stream()
|
|
||||||
.map(ProtoConverter::mapStructuredProperty)
|
|
||||||
.collect(Collectors.toList()));
|
|
||||||
entity
|
|
||||||
.setDescription(
|
|
||||||
m
|
|
||||||
.getDescriptionList()
|
|
||||||
.stream()
|
|
||||||
.map(ProtoConverter::mapStringField)
|
|
||||||
.collect(Collectors.toList()));
|
|
||||||
entity.setDateofacceptance(mapStringField(m.getDateofacceptance()));
|
|
||||||
entity.setPublisher(mapStringField(m.getPublisher()));
|
|
||||||
entity.setEmbargoenddate(mapStringField(m.getEmbargoenddate()));
|
|
||||||
entity
|
|
||||||
.setSource(
|
|
||||||
m
|
|
||||||
.getSourceList()
|
|
||||||
.stream()
|
|
||||||
.map(ProtoConverter::mapStringField)
|
|
||||||
.collect(Collectors.toList()));
|
|
||||||
entity
|
|
||||||
.setFulltext(
|
|
||||||
m
|
|
||||||
.getFulltextList()
|
|
||||||
.stream()
|
|
||||||
.map(ProtoConverter::mapStringField)
|
|
||||||
.collect(Collectors.toList()));
|
|
||||||
entity
|
|
||||||
.setFormat(
|
|
||||||
m
|
|
||||||
.getFormatList()
|
|
||||||
.stream()
|
|
||||||
.map(ProtoConverter::mapStringField)
|
|
||||||
.collect(Collectors.toList()));
|
|
||||||
entity
|
|
||||||
.setContributor(
|
|
||||||
m
|
|
||||||
.getContributorList()
|
|
||||||
.stream()
|
|
||||||
.map(ProtoConverter::mapStringField)
|
|
||||||
.collect(Collectors.toList()));
|
|
||||||
entity.setResourcetype(mapQualifier(m.getResourcetype()));
|
|
||||||
entity
|
|
||||||
.setCoverage(
|
|
||||||
m
|
|
||||||
.getCoverageList()
|
|
||||||
.stream()
|
|
||||||
.map(ProtoConverter::mapStringField)
|
|
||||||
.collect(Collectors.toList()));
|
|
||||||
entity
|
|
||||||
.setContext(
|
|
||||||
m.getContextList().stream().map(ProtoConverter::mapContext).collect(Collectors.toList()));
|
|
||||||
|
|
||||||
entity.setBestaccessright(getBestAccessRights(oaf.getEntity().getResult().getInstanceList()));
|
|
||||||
|
|
||||||
return entity;
|
|
||||||
}
|
|
||||||
|
|
||||||
private static Qualifier getBestAccessRights(List<ResultProtos.Result.Instance> instanceList) {
|
|
||||||
if (instanceList != null) {
|
|
||||||
final Optional<FieldTypeProtos.Qualifier> min = instanceList
|
|
||||||
.stream()
|
|
||||||
.map(i -> i.getAccessright())
|
|
||||||
.min(new LicenseComparator());
|
|
||||||
|
|
||||||
final Qualifier rights = min.isPresent() ? mapQualifier(min.get()) : new Qualifier();
|
|
||||||
|
|
||||||
if (StringUtils.isBlank(rights.getClassid())) {
|
|
||||||
rights.setClassid(UNKNOWN);
|
|
||||||
}
|
|
||||||
if (StringUtils.isBlank(rights.getClassname())
|
|
||||||
|| UNKNOWN.equalsIgnoreCase(rights.getClassname())) {
|
|
||||||
rights.setClassname(NOT_AVAILABLE);
|
|
||||||
}
|
|
||||||
if (StringUtils.isBlank(rights.getSchemeid())) {
|
|
||||||
rights.setSchemeid(DNET_ACCESS_MODES);
|
|
||||||
}
|
|
||||||
if (StringUtils.isBlank(rights.getSchemename())) {
|
|
||||||
rights.setSchemename(DNET_ACCESS_MODES);
|
|
||||||
}
|
|
||||||
|
|
||||||
return rights;
|
|
||||||
}
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
private static Context mapContext(ResultProtos.Result.Context context) {
|
|
||||||
if (context == null || StringUtils.isBlank(context.getId())) {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
final Context entity = new Context();
|
|
||||||
entity.setId(context.getId());
|
|
||||||
entity
|
|
||||||
.setDataInfo(
|
|
||||||
context
|
|
||||||
.getDataInfoList()
|
|
||||||
.stream()
|
|
||||||
.map(ProtoConverter::mapDataInfo)
|
|
||||||
.collect(Collectors.toList()));
|
|
||||||
return entity;
|
|
||||||
}
|
|
||||||
|
|
||||||
public static KeyValue mapKV(FieldTypeProtos.KeyValue kv) {
|
|
||||||
if (kv == null || StringUtils.isBlank(kv.getKey()) & StringUtils.isBlank(kv.getValue())) {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
final KeyValue keyValue = new KeyValue();
|
|
||||||
keyValue.setKey(kv.getKey());
|
|
||||||
keyValue.setValue(kv.getValue());
|
|
||||||
keyValue.setDataInfo(mapDataInfo(kv.getDataInfo()));
|
|
||||||
return keyValue;
|
|
||||||
}
|
|
||||||
|
|
||||||
public static DataInfo mapDataInfo(FieldTypeProtos.DataInfo d) {
|
|
||||||
final DataInfo dataInfo = new DataInfo();
|
|
||||||
dataInfo.setDeletedbyinference(d.getDeletedbyinference());
|
|
||||||
dataInfo.setInferenceprovenance(d.getInferenceprovenance());
|
|
||||||
dataInfo.setInferred(d.getInferred());
|
|
||||||
dataInfo.setInvisible(d.getInvisible());
|
|
||||||
dataInfo.setProvenanceaction(mapQualifier(d.getProvenanceaction()));
|
|
||||||
dataInfo.setTrust(d.getTrust());
|
|
||||||
return dataInfo;
|
|
||||||
}
|
|
||||||
|
|
||||||
public static Qualifier mapQualifier(FieldTypeProtos.Qualifier q) {
|
|
||||||
final Qualifier qualifier = new Qualifier();
|
|
||||||
qualifier.setClassid(q.getClassid());
|
|
||||||
qualifier.setClassname(q.getClassname());
|
|
||||||
qualifier.setSchemeid(q.getSchemeid());
|
|
||||||
qualifier.setSchemename(q.getSchemename());
|
|
||||||
return qualifier;
|
|
||||||
}
|
|
||||||
|
|
||||||
public static Country mapQualifierAsCountry(FieldTypeProtos.Qualifier q) {
|
|
||||||
final Country c = new Country();
|
|
||||||
c.setClassid(q.getClassid());
|
|
||||||
c.setClassname(q.getClassname());
|
|
||||||
c.setSchemeid(q.getSchemeid());
|
|
||||||
c.setSchemename(q.getSchemename());
|
|
||||||
c.setDataInfo(mapDataInfo(q.getDataInfo()));
|
|
||||||
return c;
|
|
||||||
}
|
|
||||||
|
|
||||||
public static StructuredProperty mapStructuredProperty(FieldTypeProtos.StructuredProperty sp) {
|
|
||||||
if (sp == null | StringUtils.isBlank(sp.getValue())) {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
final StructuredProperty structuredProperty = new StructuredProperty();
|
|
||||||
structuredProperty.setValue(sp.getValue());
|
|
||||||
structuredProperty.setQualifier(mapQualifier(sp.getQualifier()));
|
|
||||||
structuredProperty.setDataInfo(mapDataInfo(sp.getDataInfo()));
|
|
||||||
return structuredProperty;
|
|
||||||
}
|
|
||||||
|
|
||||||
public static ExtraInfo mapExtraInfo(FieldTypeProtos.ExtraInfo extraInfo) {
|
|
||||||
final ExtraInfo entity = new ExtraInfo();
|
|
||||||
entity.setName(extraInfo.getName());
|
|
||||||
entity.setTypology(extraInfo.getTypology());
|
|
||||||
entity.setProvenance(extraInfo.getProvenance());
|
|
||||||
entity.setTrust(extraInfo.getTrust());
|
|
||||||
entity.setValue(extraInfo.getValue());
|
|
||||||
return entity;
|
|
||||||
}
|
|
||||||
|
|
||||||
public static OAIProvenance mapOAIProvenance(FieldTypeProtos.OAIProvenance oaiProvenance) {
|
|
||||||
final OAIProvenance entity = new OAIProvenance();
|
|
||||||
entity.setOriginDescription(mapOriginalDescription(oaiProvenance.getOriginDescription()));
|
|
||||||
return entity;
|
|
||||||
}
|
|
||||||
|
|
||||||
public static OriginDescription mapOriginalDescription(
|
|
||||||
FieldTypeProtos.OAIProvenance.OriginDescription originDescription) {
|
|
||||||
final OriginDescription originDescriptionResult = new OriginDescription();
|
|
||||||
originDescriptionResult.setHarvestDate(originDescription.getHarvestDate());
|
|
||||||
originDescriptionResult.setAltered(originDescription.getAltered());
|
|
||||||
originDescriptionResult.setBaseURL(originDescription.getBaseURL());
|
|
||||||
originDescriptionResult.setIdentifier(originDescription.getIdentifier());
|
|
||||||
originDescriptionResult.setDatestamp(originDescription.getDatestamp());
|
|
||||||
originDescriptionResult.setMetadataNamespace(originDescription.getMetadataNamespace());
|
|
||||||
return originDescriptionResult;
|
|
||||||
}
|
|
||||||
|
|
||||||
public static Field<String> mapStringField(FieldTypeProtos.StringField s) {
|
|
||||||
if (s == null || StringUtils.isBlank(s.getValue())) {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
final Field<String> stringField = new Field<>();
|
|
||||||
stringField.setValue(s.getValue());
|
|
||||||
stringField.setDataInfo(mapDataInfo(s.getDataInfo()));
|
|
||||||
return stringField;
|
|
||||||
}
|
|
||||||
|
|
||||||
public static Field<Boolean> mapBoolField(FieldTypeProtos.BoolField b) {
|
|
||||||
if (b == null) {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
final Field<Boolean> booleanField = new Field<>();
|
|
||||||
booleanField.setValue(b.getValue());
|
|
||||||
booleanField.setDataInfo(mapDataInfo(b.getDataInfo()));
|
|
||||||
return booleanField;
|
|
||||||
}
|
|
||||||
|
|
||||||
public static Journal mapJournal(FieldTypeProtos.Journal j) {
|
|
||||||
final Journal journal = new Journal();
|
|
||||||
journal.setConferencedate(j.getConferencedate());
|
|
||||||
journal.setConferenceplace(j.getConferenceplace());
|
|
||||||
journal.setEdition(j.getEdition());
|
|
||||||
journal.setEp(j.getEp());
|
|
||||||
journal.setIss(j.getIss());
|
|
||||||
journal.setIssnLinking(j.getIssnLinking());
|
|
||||||
journal.setIssnOnline(j.getIssnOnline());
|
|
||||||
journal.setIssnPrinted(j.getIssnPrinted());
|
|
||||||
journal.setName(j.getName());
|
|
||||||
journal.setSp(j.getSp());
|
|
||||||
journal.setVol(j.getVol());
|
|
||||||
journal.setDataInfo(mapDataInfo(j.getDataInfo()));
|
|
||||||
return journal;
|
|
||||||
}
|
|
||||||
|
|
||||||
public static Author mapAuthor(FieldTypeProtos.Author author) {
|
|
||||||
final Author entity = new Author();
|
|
||||||
entity.setFullname(author.getFullname());
|
|
||||||
entity.setName(author.getName());
|
|
||||||
entity.setSurname(author.getSurname());
|
|
||||||
entity.setRank(author.getRank());
|
|
||||||
entity
|
|
||||||
.setPid(
|
|
||||||
author
|
|
||||||
.getPidList()
|
|
||||||
.stream()
|
|
||||||
.map(
|
|
||||||
kv -> {
|
|
||||||
final StructuredProperty sp = new StructuredProperty();
|
|
||||||
sp.setValue(kv.getValue());
|
|
||||||
final Qualifier q = new Qualifier();
|
|
||||||
q.setClassid(kv.getKey());
|
|
||||||
q.setClassname(kv.getKey());
|
|
||||||
sp.setQualifier(q);
|
|
||||||
return sp;
|
|
||||||
})
|
|
||||||
.collect(Collectors.toList()));
|
|
||||||
entity
|
|
||||||
.setAffiliation(
|
|
||||||
author
|
|
||||||
.getAffiliationList()
|
|
||||||
.stream()
|
|
||||||
.map(ProtoConverter::mapStringField)
|
|
||||||
.collect(Collectors.toList()));
|
|
||||||
return entity;
|
|
||||||
}
|
|
||||||
|
|
||||||
public static GeoLocation mapGeolocation(ResultProtos.Result.GeoLocation geoLocation) {
|
|
||||||
final GeoLocation entity = new GeoLocation();
|
|
||||||
entity.setPoint(geoLocation.getPoint());
|
|
||||||
entity.setBox(geoLocation.getBox());
|
|
||||||
entity.setPlace(geoLocation.getPlace());
|
|
||||||
return entity;
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,172 +0,0 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.actionmanager.migration;
|
|
||||||
|
|
||||||
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.io.Serializable;
|
|
||||||
import java.util.LinkedList;
|
|
||||||
import java.util.Objects;
|
|
||||||
import java.util.Optional;
|
|
||||||
|
|
||||||
import org.apache.commons.io.IOUtils;
|
|
||||||
import org.apache.commons.lang3.StringUtils;
|
|
||||||
import org.apache.hadoop.fs.FileSystem;
|
|
||||||
import org.apache.hadoop.fs.Path;
|
|
||||||
import org.apache.hadoop.io.Text;
|
|
||||||
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
|
|
||||||
import org.apache.spark.SparkConf;
|
|
||||||
import org.apache.spark.api.java.JavaSparkContext;
|
|
||||||
import org.apache.spark.sql.SparkSession;
|
|
||||||
import org.slf4j.Logger;
|
|
||||||
import org.slf4j.LoggerFactory;
|
|
||||||
|
|
||||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
|
||||||
import com.google.common.base.Splitter;
|
|
||||||
import com.google.common.collect.Lists;
|
|
||||||
import com.google.protobuf.InvalidProtocolBufferException;
|
|
||||||
|
|
||||||
import eu.dnetlib.data.proto.OafProtos;
|
|
||||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
|
||||||
import eu.dnetlib.dhp.schema.action.AtomicAction;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.*;
|
|
||||||
import eu.dnetlib.dhp.utils.ISLookupClientFactory;
|
|
||||||
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
|
|
||||||
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
|
|
||||||
import scala.Tuple2;
|
|
||||||
|
|
||||||
public class TransformActions implements Serializable {
|
|
||||||
|
|
||||||
private static final Logger log = LoggerFactory.getLogger(TransformActions.class);
|
|
||||||
|
|
||||||
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
|
||||||
|
|
||||||
private static final String SEPARATOR = "/";
|
|
||||||
|
|
||||||
public static void main(String[] args) throws Exception {
|
|
||||||
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
|
|
||||||
IOUtils
|
|
||||||
.toString(
|
|
||||||
MigrateActionSet.class
|
|
||||||
.getResourceAsStream(
|
|
||||||
"/eu/dnetlib/dhp/actionmanager/migration/transform_actionsets_parameters.json")));
|
|
||||||
parser.parseArgument(args);
|
|
||||||
|
|
||||||
Boolean isSparkSessionManaged = Optional
|
|
||||||
.ofNullable(parser.get("isSparkSessionManaged"))
|
|
||||||
.map(Boolean::valueOf)
|
|
||||||
.orElse(Boolean.TRUE);
|
|
||||||
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
|
|
||||||
|
|
||||||
final String isLookupUrl = parser.get("isLookupUrl");
|
|
||||||
log.info("isLookupUrl: {}", isLookupUrl);
|
|
||||||
|
|
||||||
final String inputPaths = parser.get("inputPaths");
|
|
||||||
|
|
||||||
if (StringUtils.isBlank(inputPaths)) {
|
|
||||||
throw new RuntimeException("empty inputPaths");
|
|
||||||
}
|
|
||||||
log.info("inputPaths: {}", inputPaths);
|
|
||||||
|
|
||||||
final String targetBaseDir = getTargetBaseDir(isLookupUrl);
|
|
||||||
|
|
||||||
SparkConf conf = new SparkConf();
|
|
||||||
|
|
||||||
runWithSparkSession(
|
|
||||||
conf, isSparkSessionManaged, spark -> transformActions(inputPaths, targetBaseDir, spark));
|
|
||||||
}
|
|
||||||
|
|
||||||
private static void transformActions(String inputPaths, String targetBaseDir, SparkSession spark)
|
|
||||||
throws IOException {
|
|
||||||
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
|
||||||
final FileSystem fs = FileSystem.get(spark.sparkContext().hadoopConfiguration());
|
|
||||||
|
|
||||||
for (String sourcePath : Lists.newArrayList(Splitter.on(",").split(inputPaths))) {
|
|
||||||
|
|
||||||
LinkedList<String> pathQ = Lists.newLinkedList(Splitter.on(SEPARATOR).split(sourcePath));
|
|
||||||
|
|
||||||
final String rawset = pathQ.pollLast();
|
|
||||||
final String actionSetDirectory = pathQ.pollLast();
|
|
||||||
|
|
||||||
final Path targetDirectory = new Path(targetBaseDir + SEPARATOR + actionSetDirectory + SEPARATOR + rawset);
|
|
||||||
|
|
||||||
if (fs.exists(targetDirectory)) {
|
|
||||||
log.info("found target directory '{}", targetDirectory);
|
|
||||||
fs.delete(targetDirectory, true);
|
|
||||||
log.info("deleted target directory '{}", targetDirectory);
|
|
||||||
}
|
|
||||||
|
|
||||||
log.info("transforming actions from '{}' to '{}'", sourcePath, targetDirectory);
|
|
||||||
|
|
||||||
sc
|
|
||||||
.sequenceFile(sourcePath, Text.class, Text.class)
|
|
||||||
.map(a -> eu.dnetlib.actionmanager.actions.AtomicAction.fromJSON(a._2().toString()))
|
|
||||||
.map(TransformActions::doTransform)
|
|
||||||
.filter(Objects::nonNull)
|
|
||||||
.mapToPair(
|
|
||||||
a -> new Tuple2<>(a.getClazz().toString(), OBJECT_MAPPER.writeValueAsString(a)))
|
|
||||||
.mapToPair(t -> new Tuple2(new Text(t._1()), new Text(t._2())))
|
|
||||||
.saveAsNewAPIHadoopFile(
|
|
||||||
targetDirectory.toString(),
|
|
||||||
Text.class,
|
|
||||||
Text.class,
|
|
||||||
SequenceFileOutputFormat.class,
|
|
||||||
sc.hadoopConfiguration());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private static AtomicAction doTransform(eu.dnetlib.actionmanager.actions.AtomicAction aa)
|
|
||||||
throws InvalidProtocolBufferException {
|
|
||||||
|
|
||||||
// dedup similarity relations had empty target value, don't migrate them
|
|
||||||
if (aa.getTargetValue().length == 0) {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
final OafProtos.Oaf proto_oaf = OafProtos.Oaf.parseFrom(aa.getTargetValue());
|
|
||||||
final Oaf oaf = ProtoConverter.convert(proto_oaf);
|
|
||||||
switch (proto_oaf.getKind()) {
|
|
||||||
case entity:
|
|
||||||
switch (proto_oaf.getEntity().getType()) {
|
|
||||||
case datasource:
|
|
||||||
return new AtomicAction<>(Datasource.class, (Datasource) oaf);
|
|
||||||
case organization:
|
|
||||||
return new AtomicAction<>(Organization.class, (Organization) oaf);
|
|
||||||
case project:
|
|
||||||
return new AtomicAction<>(Project.class, (Project) oaf);
|
|
||||||
case result:
|
|
||||||
final String resulttypeid = proto_oaf
|
|
||||||
.getEntity()
|
|
||||||
.getResult()
|
|
||||||
.getMetadata()
|
|
||||||
.getResulttype()
|
|
||||||
.getClassid();
|
|
||||||
switch (resulttypeid) {
|
|
||||||
case "publication":
|
|
||||||
return new AtomicAction<>(Publication.class, (Publication) oaf);
|
|
||||||
case "software":
|
|
||||||
return new AtomicAction<>(Software.class, (Software) oaf);
|
|
||||||
case "other":
|
|
||||||
return new AtomicAction<>(OtherResearchProduct.class, (OtherResearchProduct) oaf);
|
|
||||||
case "dataset":
|
|
||||||
return new AtomicAction<>(Dataset.class, (Dataset) oaf);
|
|
||||||
default:
|
|
||||||
// can be an update, where the resulttype is not specified
|
|
||||||
return new AtomicAction<>(Result.class, (Result) oaf);
|
|
||||||
}
|
|
||||||
default:
|
|
||||||
throw new IllegalArgumentException(
|
|
||||||
"invalid entity type: " + proto_oaf.getEntity().getType());
|
|
||||||
}
|
|
||||||
case relation:
|
|
||||||
return new AtomicAction<>(Relation.class, (Relation) oaf);
|
|
||||||
default:
|
|
||||||
throw new IllegalArgumentException("invalid kind: " + proto_oaf.getKind());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private static String getTargetBaseDir(String isLookupUrl) throws ISLookUpException {
|
|
||||||
ISLookUpService isLookUp = ISLookupClientFactory.getLookUpService(isLookupUrl);
|
|
||||||
String XQUERY = "collection('/db/DRIVER/ServiceResources/ActionManagerServiceResourceType')//SERVICE_PROPERTIES/PROPERTY[@key = 'basePath']/@value/string()";
|
|
||||||
return isLookUp.getResourceProfileByQuery(XQUERY);
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -62,6 +62,7 @@ public class MergeAndGet {
|
||||||
x.getClass().getCanonicalName(), y.getClass().getCanonicalName()));
|
x.getClass().getCanonicalName(), y.getClass().getCanonicalName()));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@SuppressWarnings("unchecked")
|
||||||
private static <G extends Oaf, A extends Oaf> G selectNewerAndGet(G x, A y) {
|
private static <G extends Oaf, A extends Oaf> G selectNewerAndGet(G x, A y) {
|
||||||
if (x.getClass().equals(y.getClass())
|
if (x.getClass().equals(y.getClass())
|
||||||
&& x.getLastupdatetimestamp() > y.getLastupdatetimestamp()) {
|
&& x.getLastupdatetimestamp() > y.getLastupdatetimestamp()) {
|
||||||
|
|
|
@ -5,12 +5,12 @@ import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
||||||
import static eu.dnetlib.dhp.schema.common.ModelSupport.isSubClass;
|
import static eu.dnetlib.dhp.schema.common.ModelSupport.isSubClass;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.Objects;
|
|
||||||
import java.util.Optional;
|
import java.util.Optional;
|
||||||
import java.util.function.BiFunction;
|
import java.util.function.BiFunction;
|
||||||
import java.util.function.Function;
|
import java.util.function.Function;
|
||||||
|
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
|
import org.apache.commons.lang3.StringUtils;
|
||||||
import org.apache.spark.SparkConf;
|
import org.apache.spark.SparkConf;
|
||||||
import org.apache.spark.api.java.function.MapFunction;
|
import org.apache.spark.api.java.function.MapFunction;
|
||||||
import org.apache.spark.sql.Dataset;
|
import org.apache.spark.sql.Dataset;
|
||||||
|
@ -68,7 +68,15 @@ public class PromoteActionPayloadForGraphTableJob {
|
||||||
MergeAndGet.Strategy strategy = MergeAndGet.Strategy.valueOf(parser.get("mergeAndGetStrategy").toUpperCase());
|
MergeAndGet.Strategy strategy = MergeAndGet.Strategy.valueOf(parser.get("mergeAndGetStrategy").toUpperCase());
|
||||||
logger.info("strategy: {}", strategy);
|
logger.info("strategy: {}", strategy);
|
||||||
|
|
||||||
|
Boolean shouldGroupById = Optional
|
||||||
|
.ofNullable(parser.get("shouldGroupById"))
|
||||||
|
.map(Boolean::valueOf)
|
||||||
|
.orElse(true);
|
||||||
|
logger.info("shouldGroupById: {}", shouldGroupById);
|
||||||
|
|
||||||
|
@SuppressWarnings("unchecked")
|
||||||
Class<? extends Oaf> rowClazz = (Class<? extends Oaf>) Class.forName(graphTableClassName);
|
Class<? extends Oaf> rowClazz = (Class<? extends Oaf>) Class.forName(graphTableClassName);
|
||||||
|
@SuppressWarnings("unchecked")
|
||||||
Class<? extends Oaf> actionPayloadClazz = (Class<? extends Oaf>) Class.forName(actionPayloadClassName);
|
Class<? extends Oaf> actionPayloadClazz = (Class<? extends Oaf>) Class.forName(actionPayloadClassName);
|
||||||
|
|
||||||
throwIfGraphTableClassIsNotSubClassOfActionPayloadClass(rowClazz, actionPayloadClazz);
|
throwIfGraphTableClassIsNotSubClassOfActionPayloadClass(rowClazz, actionPayloadClazz);
|
||||||
|
@ -89,7 +97,8 @@ public class PromoteActionPayloadForGraphTableJob {
|
||||||
outputGraphTablePath,
|
outputGraphTablePath,
|
||||||
strategy,
|
strategy,
|
||||||
rowClazz,
|
rowClazz,
|
||||||
actionPayloadClazz);
|
actionPayloadClazz,
|
||||||
|
shouldGroupById);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -115,12 +124,12 @@ public class PromoteActionPayloadForGraphTableJob {
|
||||||
String outputGraphTablePath,
|
String outputGraphTablePath,
|
||||||
MergeAndGet.Strategy strategy,
|
MergeAndGet.Strategy strategy,
|
||||||
Class<G> rowClazz,
|
Class<G> rowClazz,
|
||||||
Class<A> actionPayloadClazz) {
|
Class<A> actionPayloadClazz, Boolean shouldGroupById) {
|
||||||
Dataset<G> rowDS = readGraphTable(spark, inputGraphTablePath, rowClazz);
|
Dataset<G> rowDS = readGraphTable(spark, inputGraphTablePath, rowClazz);
|
||||||
Dataset<A> actionPayloadDS = readActionPayload(spark, inputActionPayloadPath, actionPayloadClazz);
|
Dataset<A> actionPayloadDS = readActionPayload(spark, inputActionPayloadPath, actionPayloadClazz);
|
||||||
|
|
||||||
Dataset<G> result = promoteActionPayloadForGraphTable(
|
Dataset<G> result = promoteActionPayloadForGraphTable(
|
||||||
rowDS, actionPayloadDS, strategy, rowClazz, actionPayloadClazz)
|
rowDS, actionPayloadDS, strategy, rowClazz, actionPayloadClazz, shouldGroupById)
|
||||||
.map((MapFunction<G, G>) value -> value, Encoders.bean(rowClazz));
|
.map((MapFunction<G, G>) value -> value, Encoders.bean(rowClazz));
|
||||||
|
|
||||||
saveGraphTable(result, outputGraphTablePath);
|
saveGraphTable(result, outputGraphTablePath);
|
||||||
|
@ -145,7 +154,7 @@ public class PromoteActionPayloadForGraphTableJob {
|
||||||
return spark
|
return spark
|
||||||
.read()
|
.read()
|
||||||
.parquet(path)
|
.parquet(path)
|
||||||
.map((MapFunction<Row, String>) value -> extractPayload(value), Encoders.STRING())
|
.map((MapFunction<Row, String>) PromoteActionPayloadForGraphTableJob::extractPayload, Encoders.STRING())
|
||||||
.map(
|
.map(
|
||||||
(MapFunction<String, A>) value -> decodePayload(actionPayloadClazz, value),
|
(MapFunction<String, A>) value -> decodePayload(actionPayloadClazz, value),
|
||||||
Encoders.bean(actionPayloadClazz));
|
Encoders.bean(actionPayloadClazz));
|
||||||
|
@ -153,9 +162,9 @@ public class PromoteActionPayloadForGraphTableJob {
|
||||||
|
|
||||||
private static String extractPayload(Row value) {
|
private static String extractPayload(Row value) {
|
||||||
try {
|
try {
|
||||||
return value.<String> getAs("payload");
|
return value.getAs("payload");
|
||||||
} catch (IllegalArgumentException | ClassCastException e) {
|
} catch (IllegalArgumentException | ClassCastException e) {
|
||||||
logger.error("cannot extract payload from action: {}", value.toString());
|
logger.error("cannot extract payload from action: {}", value);
|
||||||
throw e;
|
throw e;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -174,7 +183,8 @@ public class PromoteActionPayloadForGraphTableJob {
|
||||||
Dataset<A> actionPayloadDS,
|
Dataset<A> actionPayloadDS,
|
||||||
MergeAndGet.Strategy strategy,
|
MergeAndGet.Strategy strategy,
|
||||||
Class<G> rowClazz,
|
Class<G> rowClazz,
|
||||||
Class<A> actionPayloadClazz) {
|
Class<A> actionPayloadClazz,
|
||||||
|
Boolean shouldGroupById) {
|
||||||
logger
|
logger
|
||||||
.info(
|
.info(
|
||||||
"Promoting action payload for graph table: payload={}, table={}",
|
"Promoting action payload for graph table: payload={}, table={}",
|
||||||
|
@ -186,7 +196,7 @@ public class PromoteActionPayloadForGraphTableJob {
|
||||||
SerializableSupplier<BiFunction<G, A, G>> mergeRowWithActionPayloadAndGetFn = MergeAndGet.functionFor(strategy);
|
SerializableSupplier<BiFunction<G, A, G>> mergeRowWithActionPayloadAndGetFn = MergeAndGet.functionFor(strategy);
|
||||||
SerializableSupplier<BiFunction<G, G, G>> mergeRowsAndGetFn = MergeAndGet.functionFor(strategy);
|
SerializableSupplier<BiFunction<G, G, G>> mergeRowsAndGetFn = MergeAndGet.functionFor(strategy);
|
||||||
SerializableSupplier<G> zeroFn = zeroFn(rowClazz);
|
SerializableSupplier<G> zeroFn = zeroFn(rowClazz);
|
||||||
SerializableSupplier<Function<G, Boolean>> isNotZeroFn = PromoteActionPayloadForGraphTableJob::isNotZeroFnUsingIdOrSource;
|
SerializableSupplier<Function<G, Boolean>> isNotZeroFn = PromoteActionPayloadForGraphTableJob::isNotZeroFnUsingIdOrSourceAndTarget;
|
||||||
|
|
||||||
Dataset<G> joinedAndMerged = PromoteActionPayloadFunctions
|
Dataset<G> joinedAndMerged = PromoteActionPayloadFunctions
|
||||||
.joinGraphTableWithActionPayloadAndMerge(
|
.joinGraphTableWithActionPayloadAndMerge(
|
||||||
|
@ -198,9 +208,13 @@ public class PromoteActionPayloadForGraphTableJob {
|
||||||
rowClazz,
|
rowClazz,
|
||||||
actionPayloadClazz);
|
actionPayloadClazz);
|
||||||
|
|
||||||
return PromoteActionPayloadFunctions
|
if (shouldGroupById) {
|
||||||
.groupGraphTableByIdAndMerge(
|
return PromoteActionPayloadFunctions
|
||||||
joinedAndMerged, rowIdFn, mergeRowsAndGetFn, zeroFn, isNotZeroFn, rowClazz);
|
.groupGraphTableByIdAndMerge(
|
||||||
|
joinedAndMerged, rowIdFn, mergeRowsAndGetFn, zeroFn, isNotZeroFn, rowClazz);
|
||||||
|
} else {
|
||||||
|
return joinedAndMerged;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private static <T extends Oaf> SerializableSupplier<T> zeroFn(Class<T> clazz) {
|
private static <T extends Oaf> SerializableSupplier<T> zeroFn(Class<T> clazz) {
|
||||||
|
@ -226,12 +240,13 @@ public class PromoteActionPayloadForGraphTableJob {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private static <T extends Oaf> Function<T, Boolean> isNotZeroFnUsingIdOrSource() {
|
private static <T extends Oaf> Function<T, Boolean> isNotZeroFnUsingIdOrSourceAndTarget() {
|
||||||
return t -> {
|
return t -> {
|
||||||
if (isSubClass(t, Relation.class)) {
|
if (isSubClass(t, Relation.class)) {
|
||||||
return Objects.nonNull(((Relation) t).getSource());
|
final Relation rel = (Relation) t;
|
||||||
|
return StringUtils.isNotBlank(rel.getSource()) && StringUtils.isNotBlank(rel.getTarget());
|
||||||
}
|
}
|
||||||
return Objects.nonNull(((OafEntity) t).getId());
|
return StringUtils.isNotBlank(((OafEntity) t).getId());
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -112,6 +112,7 @@ public class PromoteActionPayloadFunctions {
|
||||||
Class<G> rowClazz) {
|
Class<G> rowClazz) {
|
||||||
TypedColumn<G, G> aggregator = new TableAggregator<>(zeroFn, mergeAndGetFn, isNotZeroFn, rowClazz).toColumn();
|
TypedColumn<G, G> aggregator = new TableAggregator<>(zeroFn, mergeAndGetFn, isNotZeroFn, rowClazz).toColumn();
|
||||||
return rowDS
|
return rowDS
|
||||||
|
.filter((FilterFunction<G>) o -> isNotZeroFn.get().apply(o))
|
||||||
.groupByKey((MapFunction<G, String>) x -> rowIdFn.get().apply(x), Encoders.STRING())
|
.groupByKey((MapFunction<G, String>) x -> rowIdFn.get().apply(x), Encoders.STRING())
|
||||||
.agg(aggregator)
|
.agg(aggregator)
|
||||||
.map((MapFunction<Tuple2<String, G>, G>) Tuple2::_2, Encoders.kryo(rowClazz));
|
.map((MapFunction<Tuple2<String, G>, G>) Tuple2::_2, Encoders.kryo(rowClazz));
|
||||||
|
|
|
@ -1,56 +0,0 @@
|
||||||
[
|
|
||||||
{
|
|
||||||
"paramName": "issm",
|
|
||||||
"paramLongName": "isSparkSessionManaged",
|
|
||||||
"paramDescription": "when true will stop SparkSession after job execution",
|
|
||||||
"paramRequired": false
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"paramName": "is",
|
|
||||||
"paramLongName": "isLookupUrl",
|
|
||||||
"paramDescription": "URL of the isLookUp Service",
|
|
||||||
"paramRequired": true
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"paramName": "sn",
|
|
||||||
"paramLongName": "sourceNameNode",
|
|
||||||
"paramDescription": "nameNode of the source cluster",
|
|
||||||
"paramRequired": true
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"paramName": "tn",
|
|
||||||
"paramLongName": "targetNameNode",
|
|
||||||
"paramDescription": "namoNode of the target cluster",
|
|
||||||
"paramRequired": true
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"paramName": "w",
|
|
||||||
"paramLongName": "workingDirectory",
|
|
||||||
"paramDescription": "working directory",
|
|
||||||
"paramRequired": true
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"paramName": "nm",
|
|
||||||
"paramLongName": "distcp_num_maps",
|
|
||||||
"paramDescription": "maximum number of map tasks used in the distcp process",
|
|
||||||
"paramRequired": true
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"paramName": "mm",
|
|
||||||
"paramLongName": "distcp_memory_mb",
|
|
||||||
"paramDescription": "memory for distcp action copying actionsets from remote cluster",
|
|
||||||
"paramRequired": true
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"paramName": "tt",
|
|
||||||
"paramLongName": "distcp_task_timeout",
|
|
||||||
"paramDescription": "timeout for distcp copying actions from remote cluster",
|
|
||||||
"paramRequired": true
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"paramName": "tr",
|
|
||||||
"paramLongName": "transform_only",
|
|
||||||
"paramDescription": "activate tranform-only mode. Only apply transformation step",
|
|
||||||
"paramRequired": true
|
|
||||||
}
|
|
||||||
]
|
|
|
@ -40,5 +40,11 @@
|
||||||
"paramLongName": "mergeAndGetStrategy",
|
"paramLongName": "mergeAndGetStrategy",
|
||||||
"paramDescription": "strategy for merging graph table objects with action payload instances, MERGE_FROM_AND_GET or SELECT_NEWER_AND_GET",
|
"paramDescription": "strategy for merging graph table objects with action payload instances, MERGE_FROM_AND_GET or SELECT_NEWER_AND_GET",
|
||||||
"paramRequired": true
|
"paramRequired": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"paramName": "sgid",
|
||||||
|
"paramLongName": "shouldGroupById",
|
||||||
|
"paramDescription": "indicates whether the promotion operation should group objects in the graph by id or not",
|
||||||
|
"paramRequired": false
|
||||||
}
|
}
|
||||||
]
|
]
|
|
@ -24,6 +24,10 @@
|
||||||
<name>mergeAndGetStrategy</name>
|
<name>mergeAndGetStrategy</name>
|
||||||
<description>strategy for merging graph table objects with action payload instances, MERGE_FROM_AND_GET or SELECT_NEWER_AND_GET</description>
|
<description>strategy for merging graph table objects with action payload instances, MERGE_FROM_AND_GET or SELECT_NEWER_AND_GET</description>
|
||||||
</property>
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>shouldGroupById</name>
|
||||||
|
<description>indicates whether the promotion operation should group objects in the graph by id or not</description>
|
||||||
|
</property>
|
||||||
<property>
|
<property>
|
||||||
<name>sparkDriverMemory</name>
|
<name>sparkDriverMemory</name>
|
||||||
<description>memory for driver process</description>
|
<description>memory for driver process</description>
|
||||||
|
@ -111,6 +115,7 @@
|
||||||
<arg>--actionPayloadClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg>
|
<arg>--actionPayloadClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg>
|
||||||
<arg>--outputGraphTablePath</arg><arg>${workingDir}/dataset</arg>
|
<arg>--outputGraphTablePath</arg><arg>${workingDir}/dataset</arg>
|
||||||
<arg>--mergeAndGetStrategy</arg><arg>${mergeAndGetStrategy}</arg>
|
<arg>--mergeAndGetStrategy</arg><arg>${mergeAndGetStrategy}</arg>
|
||||||
|
<arg>--shouldGroupById</arg><arg>${shouldGroupById}</arg>
|
||||||
</spark>
|
</spark>
|
||||||
<ok to="DecisionPromoteResultActionPayloadForDatasetTable"/>
|
<ok to="DecisionPromoteResultActionPayloadForDatasetTable"/>
|
||||||
<error to="Kill"/>
|
<error to="Kill"/>
|
||||||
|
@ -162,6 +167,7 @@
|
||||||
<arg>--actionPayloadClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Result</arg>
|
<arg>--actionPayloadClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Result</arg>
|
||||||
<arg>--outputGraphTablePath</arg><arg>${outputGraphRootPath}/dataset</arg>
|
<arg>--outputGraphTablePath</arg><arg>${outputGraphRootPath}/dataset</arg>
|
||||||
<arg>--mergeAndGetStrategy</arg><arg>${mergeAndGetStrategy}</arg>
|
<arg>--mergeAndGetStrategy</arg><arg>${mergeAndGetStrategy}</arg>
|
||||||
|
<arg>--shouldGroupById</arg><arg>${shouldGroupById}</arg>
|
||||||
</spark>
|
</spark>
|
||||||
<ok to="End"/>
|
<ok to="End"/>
|
||||||
<error to="Kill"/>
|
<error to="Kill"/>
|
||||||
|
|
|
@ -56,6 +56,11 @@
|
||||||
<name>mergeAndGetStrategy</name>
|
<name>mergeAndGetStrategy</name>
|
||||||
<description>strategy for merging graph table objects with action payload instances, MERGE_FROM_AND_GET or SELECT_NEWER_AND_GET</description>
|
<description>strategy for merging graph table objects with action payload instances, MERGE_FROM_AND_GET or SELECT_NEWER_AND_GET</description>
|
||||||
</property>
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>shouldGroupById</name>
|
||||||
|
<value>false</value>
|
||||||
|
<description>indicates whether the promotion operation should group objects in the graph by id or not</description>
|
||||||
|
</property>
|
||||||
<property>
|
<property>
|
||||||
<name>sparkDriverMemory</name>
|
<name>sparkDriverMemory</name>
|
||||||
<description>memory for driver process</description>
|
<description>memory for driver process</description>
|
||||||
|
|
|
@ -24,6 +24,10 @@
|
||||||
<name>mergeAndGetStrategy</name>
|
<name>mergeAndGetStrategy</name>
|
||||||
<description>strategy for merging graph table objects with action payload instances, MERGE_FROM_AND_GET or SELECT_NEWER_AND_GET</description>
|
<description>strategy for merging graph table objects with action payload instances, MERGE_FROM_AND_GET or SELECT_NEWER_AND_GET</description>
|
||||||
</property>
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>shouldGroupById</name>
|
||||||
|
<description>indicates whether the promotion operation should group objects in the graph by id or not</description>
|
||||||
|
</property>
|
||||||
<property>
|
<property>
|
||||||
<name>sparkDriverMemory</name>
|
<name>sparkDriverMemory</name>
|
||||||
<description>memory for driver process</description>
|
<description>memory for driver process</description>
|
||||||
|
@ -110,6 +114,7 @@
|
||||||
<arg>--actionPayloadClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg>
|
<arg>--actionPayloadClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg>
|
||||||
<arg>--outputGraphTablePath</arg><arg>${workingDir}/otherresearchproduct</arg>
|
<arg>--outputGraphTablePath</arg><arg>${workingDir}/otherresearchproduct</arg>
|
||||||
<arg>--mergeAndGetStrategy</arg><arg>${mergeAndGetStrategy}</arg>
|
<arg>--mergeAndGetStrategy</arg><arg>${mergeAndGetStrategy}</arg>
|
||||||
|
<arg>--shouldGroupById</arg><arg>${shouldGroupById}</arg>
|
||||||
</spark>
|
</spark>
|
||||||
<ok to="DecisionPromoteResultActionPayloadForOtherResearchProductTable"/>
|
<ok to="DecisionPromoteResultActionPayloadForOtherResearchProductTable"/>
|
||||||
<error to="Kill"/>
|
<error to="Kill"/>
|
||||||
|
@ -161,6 +166,7 @@
|
||||||
<arg>--actionPayloadClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Result</arg>
|
<arg>--actionPayloadClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Result</arg>
|
||||||
<arg>--outputGraphTablePath</arg><arg>${outputGraphRootPath}/otherresearchproduct</arg>
|
<arg>--outputGraphTablePath</arg><arg>${outputGraphRootPath}/otherresearchproduct</arg>
|
||||||
<arg>--mergeAndGetStrategy</arg><arg>${mergeAndGetStrategy}</arg>
|
<arg>--mergeAndGetStrategy</arg><arg>${mergeAndGetStrategy}</arg>
|
||||||
|
<arg>--shouldGroupById</arg><arg>${shouldGroupById}</arg>
|
||||||
</spark>
|
</spark>
|
||||||
<ok to="End"/>
|
<ok to="End"/>
|
||||||
<error to="Kill"/>
|
<error to="Kill"/>
|
||||||
|
|
|
@ -24,6 +24,10 @@
|
||||||
<name>mergeAndGetStrategy</name>
|
<name>mergeAndGetStrategy</name>
|
||||||
<description>strategy for merging graph table objects with action payload instances, MERGE_FROM_AND_GET or SELECT_NEWER_AND_GET</description>
|
<description>strategy for merging graph table objects with action payload instances, MERGE_FROM_AND_GET or SELECT_NEWER_AND_GET</description>
|
||||||
</property>
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>shouldGroupById</name>
|
||||||
|
<description>indicates whether the promotion operation should group objects in the graph by id or not</description>
|
||||||
|
</property>
|
||||||
<property>
|
<property>
|
||||||
<name>sparkDriverMemory</name>
|
<name>sparkDriverMemory</name>
|
||||||
<description>memory for driver process</description>
|
<description>memory for driver process</description>
|
||||||
|
@ -103,7 +107,7 @@
|
||||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||||
--conf spark.sql.shuffle.partitions=2560
|
--conf spark.sql.shuffle.partitions=5000
|
||||||
</spark-opts>
|
</spark-opts>
|
||||||
<arg>--inputGraphTablePath</arg><arg>${inputGraphRootPath}/publication</arg>
|
<arg>--inputGraphTablePath</arg><arg>${inputGraphRootPath}/publication</arg>
|
||||||
<arg>--graphTableClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
|
<arg>--graphTableClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
|
||||||
|
@ -111,6 +115,7 @@
|
||||||
<arg>--actionPayloadClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
|
<arg>--actionPayloadClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
|
||||||
<arg>--outputGraphTablePath</arg><arg>${workingDir}/publication</arg>
|
<arg>--outputGraphTablePath</arg><arg>${workingDir}/publication</arg>
|
||||||
<arg>--mergeAndGetStrategy</arg><arg>${mergeAndGetStrategy}</arg>
|
<arg>--mergeAndGetStrategy</arg><arg>${mergeAndGetStrategy}</arg>
|
||||||
|
<arg>--shouldGroupById</arg><arg>${shouldGroupById}</arg>
|
||||||
</spark>
|
</spark>
|
||||||
<ok to="DecisionPromoteResultActionPayloadForPublicationTable"/>
|
<ok to="DecisionPromoteResultActionPayloadForPublicationTable"/>
|
||||||
<error to="Kill"/>
|
<error to="Kill"/>
|
||||||
|
@ -154,7 +159,7 @@
|
||||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||||
--conf spark.sql.shuffle.partitions=2560
|
--conf spark.sql.shuffle.partitions=5000
|
||||||
</spark-opts>
|
</spark-opts>
|
||||||
<arg>--inputGraphTablePath</arg><arg>${workingDir}/publication</arg>
|
<arg>--inputGraphTablePath</arg><arg>${workingDir}/publication</arg>
|
||||||
<arg>--graphTableClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
|
<arg>--graphTableClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
|
||||||
|
@ -162,6 +167,7 @@
|
||||||
<arg>--actionPayloadClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Result</arg>
|
<arg>--actionPayloadClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Result</arg>
|
||||||
<arg>--outputGraphTablePath</arg><arg>${outputGraphRootPath}/publication</arg>
|
<arg>--outputGraphTablePath</arg><arg>${outputGraphRootPath}/publication</arg>
|
||||||
<arg>--mergeAndGetStrategy</arg><arg>${mergeAndGetStrategy}</arg>
|
<arg>--mergeAndGetStrategy</arg><arg>${mergeAndGetStrategy}</arg>
|
||||||
|
<arg>--shouldGroupById</arg><arg>${shouldGroupById}</arg>
|
||||||
</spark>
|
</spark>
|
||||||
<ok to="End"/>
|
<ok to="End"/>
|
||||||
<error to="Kill"/>
|
<error to="Kill"/>
|
||||||
|
|
|
@ -99,7 +99,7 @@
|
||||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||||
--conf spark.sql.shuffle.partitions=2560
|
--conf spark.sql.shuffle.partitions=5000
|
||||||
</spark-opts>
|
</spark-opts>
|
||||||
<arg>--inputGraphTablePath</arg><arg>${inputGraphRootPath}/relation</arg>
|
<arg>--inputGraphTablePath</arg><arg>${inputGraphRootPath}/relation</arg>
|
||||||
<arg>--graphTableClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Relation</arg>
|
<arg>--graphTableClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Relation</arg>
|
||||||
|
|
|
@ -24,6 +24,10 @@
|
||||||
<name>mergeAndGetStrategy</name>
|
<name>mergeAndGetStrategy</name>
|
||||||
<description>strategy for merging graph table objects with action payload instances, MERGE_FROM_AND_GET or SELECT_NEWER_AND_GET</description>
|
<description>strategy for merging graph table objects with action payload instances, MERGE_FROM_AND_GET or SELECT_NEWER_AND_GET</description>
|
||||||
</property>
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>shouldGroupById</name>
|
||||||
|
<description>indicates whether the promotion operation should group objects in the graph by id or not</description>
|
||||||
|
</property>
|
||||||
<property>
|
<property>
|
||||||
<name>sparkDriverMemory</name>
|
<name>sparkDriverMemory</name>
|
||||||
<description>memory for driver process</description>
|
<description>memory for driver process</description>
|
||||||
|
@ -110,6 +114,7 @@
|
||||||
<arg>--actionPayloadClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg>
|
<arg>--actionPayloadClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg>
|
||||||
<arg>--outputGraphTablePath</arg><arg>${workingDir}/software</arg>
|
<arg>--outputGraphTablePath</arg><arg>${workingDir}/software</arg>
|
||||||
<arg>--mergeAndGetStrategy</arg><arg>${mergeAndGetStrategy}</arg>
|
<arg>--mergeAndGetStrategy</arg><arg>${mergeAndGetStrategy}</arg>
|
||||||
|
<arg>--shouldGroupById</arg><arg>${shouldGroupById}</arg>
|
||||||
</spark>
|
</spark>
|
||||||
<ok to="DecisionPromoteResultActionPayloadForSoftwareTable"/>
|
<ok to="DecisionPromoteResultActionPayloadForSoftwareTable"/>
|
||||||
<error to="Kill"/>
|
<error to="Kill"/>
|
||||||
|
@ -161,6 +166,7 @@
|
||||||
<arg>--actionPayloadClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Result</arg>
|
<arg>--actionPayloadClassName</arg><arg>eu.dnetlib.dhp.schema.oaf.Result</arg>
|
||||||
<arg>--outputGraphTablePath</arg><arg>${outputGraphRootPath}/software</arg>
|
<arg>--outputGraphTablePath</arg><arg>${outputGraphRootPath}/software</arg>
|
||||||
<arg>--mergeAndGetStrategy</arg><arg>${mergeAndGetStrategy}</arg>
|
<arg>--mergeAndGetStrategy</arg><arg>${mergeAndGetStrategy}</arg>
|
||||||
|
<arg>--shouldGroupById</arg><arg>${shouldGroupById}</arg>
|
||||||
</spark>
|
</spark>
|
||||||
<ok to="End"/>
|
<ok to="End"/>
|
||||||
<error to="Kill"/>
|
<error to="Kill"/>
|
||||||
|
|
|
@ -80,7 +80,7 @@ public class PartitionActionSetsByPayloadTypeJobTest {
|
||||||
private ISClient isClient;
|
private ISClient isClient;
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void shouldPartitionActionSetsByPayloadType(@TempDir Path workingDir) throws Exception {
|
void shouldPartitionActionSetsByPayloadType(@TempDir Path workingDir) throws Exception {
|
||||||
// given
|
// given
|
||||||
Path inputActionSetsBaseDir = workingDir.resolve("input").resolve("action_sets");
|
Path inputActionSetsBaseDir = workingDir.resolve("input").resolve("action_sets");
|
||||||
Path outputDir = workingDir.resolve("output");
|
Path outputDir = workingDir.resolve("output");
|
||||||
|
|
|
@ -20,7 +20,7 @@ public class MergeAndGetTest {
|
||||||
class MergeFromAndGetStrategy {
|
class MergeFromAndGetStrategy {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void shouldThrowForOafAndOaf() {
|
void shouldThrowForOafAndOaf() {
|
||||||
// given
|
// given
|
||||||
Oaf a = mock(Oaf.class);
|
Oaf a = mock(Oaf.class);
|
||||||
Oaf b = mock(Oaf.class);
|
Oaf b = mock(Oaf.class);
|
||||||
|
@ -33,7 +33,7 @@ public class MergeAndGetTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void shouldThrowForOafAndRelation() {
|
void shouldThrowForOafAndRelation() {
|
||||||
// given
|
// given
|
||||||
Oaf a = mock(Oaf.class);
|
Oaf a = mock(Oaf.class);
|
||||||
Relation b = mock(Relation.class);
|
Relation b = mock(Relation.class);
|
||||||
|
@ -46,7 +46,7 @@ public class MergeAndGetTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void shouldThrowForOafAndOafEntity() {
|
void shouldThrowForOafAndOafEntity() {
|
||||||
// given
|
// given
|
||||||
Oaf a = mock(Oaf.class);
|
Oaf a = mock(Oaf.class);
|
||||||
OafEntity b = mock(OafEntity.class);
|
OafEntity b = mock(OafEntity.class);
|
||||||
|
@ -59,7 +59,7 @@ public class MergeAndGetTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void shouldThrowForRelationAndOaf() {
|
void shouldThrowForRelationAndOaf() {
|
||||||
// given
|
// given
|
||||||
Relation a = mock(Relation.class);
|
Relation a = mock(Relation.class);
|
||||||
Oaf b = mock(Oaf.class);
|
Oaf b = mock(Oaf.class);
|
||||||
|
@ -72,7 +72,7 @@ public class MergeAndGetTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void shouldThrowForRelationAndOafEntity() {
|
void shouldThrowForRelationAndOafEntity() {
|
||||||
// given
|
// given
|
||||||
Relation a = mock(Relation.class);
|
Relation a = mock(Relation.class);
|
||||||
OafEntity b = mock(OafEntity.class);
|
OafEntity b = mock(OafEntity.class);
|
||||||
|
@ -85,7 +85,7 @@ public class MergeAndGetTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void shouldBehaveProperlyForRelationAndRelation() {
|
void shouldBehaveProperlyForRelationAndRelation() {
|
||||||
// given
|
// given
|
||||||
Relation a = mock(Relation.class);
|
Relation a = mock(Relation.class);
|
||||||
Relation b = mock(Relation.class);
|
Relation b = mock(Relation.class);
|
||||||
|
@ -101,7 +101,7 @@ public class MergeAndGetTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void shouldThrowForOafEntityAndOaf() {
|
void shouldThrowForOafEntityAndOaf() {
|
||||||
// given
|
// given
|
||||||
OafEntity a = mock(OafEntity.class);
|
OafEntity a = mock(OafEntity.class);
|
||||||
Oaf b = mock(Oaf.class);
|
Oaf b = mock(Oaf.class);
|
||||||
|
@ -114,7 +114,7 @@ public class MergeAndGetTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void shouldThrowForOafEntityAndRelation() {
|
void shouldThrowForOafEntityAndRelation() {
|
||||||
// given
|
// given
|
||||||
OafEntity a = mock(OafEntity.class);
|
OafEntity a = mock(OafEntity.class);
|
||||||
Relation b = mock(Relation.class);
|
Relation b = mock(Relation.class);
|
||||||
|
@ -127,7 +127,7 @@ public class MergeAndGetTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void shouldThrowForOafEntityAndOafEntityButNotSubclasses() {
|
void shouldThrowForOafEntityAndOafEntityButNotSubclasses() {
|
||||||
// given
|
// given
|
||||||
class OafEntitySub1 extends OafEntity {
|
class OafEntitySub1 extends OafEntity {
|
||||||
}
|
}
|
||||||
|
@ -145,7 +145,7 @@ public class MergeAndGetTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void shouldBehaveProperlyForOafEntityAndOafEntity() {
|
void shouldBehaveProperlyForOafEntityAndOafEntity() {
|
||||||
// given
|
// given
|
||||||
OafEntity a = mock(OafEntity.class);
|
OafEntity a = mock(OafEntity.class);
|
||||||
OafEntity b = mock(OafEntity.class);
|
OafEntity b = mock(OafEntity.class);
|
||||||
|
@ -165,7 +165,7 @@ public class MergeAndGetTest {
|
||||||
class SelectNewerAndGetStrategy {
|
class SelectNewerAndGetStrategy {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void shouldThrowForOafEntityAndRelation() {
|
void shouldThrowForOafEntityAndRelation() {
|
||||||
// given
|
// given
|
||||||
OafEntity a = mock(OafEntity.class);
|
OafEntity a = mock(OafEntity.class);
|
||||||
Relation b = mock(Relation.class);
|
Relation b = mock(Relation.class);
|
||||||
|
@ -178,7 +178,7 @@ public class MergeAndGetTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void shouldThrowForRelationAndOafEntity() {
|
void shouldThrowForRelationAndOafEntity() {
|
||||||
// given
|
// given
|
||||||
Relation a = mock(Relation.class);
|
Relation a = mock(Relation.class);
|
||||||
OafEntity b = mock(OafEntity.class);
|
OafEntity b = mock(OafEntity.class);
|
||||||
|
@ -191,7 +191,7 @@ public class MergeAndGetTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void shouldThrowForOafEntityAndResult() {
|
void shouldThrowForOafEntityAndResult() {
|
||||||
// given
|
// given
|
||||||
OafEntity a = mock(OafEntity.class);
|
OafEntity a = mock(OafEntity.class);
|
||||||
Result b = mock(Result.class);
|
Result b = mock(Result.class);
|
||||||
|
@ -204,7 +204,7 @@ public class MergeAndGetTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void shouldThrowWhenSuperTypeIsNewerForResultAndOafEntity() {
|
void shouldThrowWhenSuperTypeIsNewerForResultAndOafEntity() {
|
||||||
// given
|
// given
|
||||||
// real types must be used because subclass-superclass resolution does not work for
|
// real types must be used because subclass-superclass resolution does not work for
|
||||||
// mocks
|
// mocks
|
||||||
|
@ -221,7 +221,7 @@ public class MergeAndGetTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void shouldShouldReturnLeftForOafEntityAndOafEntity() {
|
void shouldShouldReturnLeftForOafEntityAndOafEntity() {
|
||||||
// given
|
// given
|
||||||
OafEntity a = mock(OafEntity.class);
|
OafEntity a = mock(OafEntity.class);
|
||||||
when(a.getLastupdatetimestamp()).thenReturn(1L);
|
when(a.getLastupdatetimestamp()).thenReturn(1L);
|
||||||
|
@ -238,7 +238,7 @@ public class MergeAndGetTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void shouldShouldReturnRightForOafEntityAndOafEntity() {
|
void shouldShouldReturnRightForOafEntityAndOafEntity() {
|
||||||
// given
|
// given
|
||||||
OafEntity a = mock(OafEntity.class);
|
OafEntity a = mock(OafEntity.class);
|
||||||
when(a.getLastupdatetimestamp()).thenReturn(2L);
|
when(a.getLastupdatetimestamp()).thenReturn(2L);
|
||||||
|
|
|
@ -77,7 +77,7 @@ public class PromoteActionPayloadForGraphTableJobTest {
|
||||||
class Main {
|
class Main {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void shouldThrowWhenGraphTableClassIsNotASubClassOfActionPayloadClass() {
|
void shouldThrowWhenGraphTableClassIsNotASubClassOfActionPayloadClass() {
|
||||||
// given
|
// given
|
||||||
Class<Relation> rowClazz = Relation.class;
|
Class<Relation> rowClazz = Relation.class;
|
||||||
Class<OafEntity> actionPayloadClazz = OafEntity.class;
|
Class<OafEntity> actionPayloadClazz = OafEntity.class;
|
||||||
|
@ -101,7 +101,9 @@ public class PromoteActionPayloadForGraphTableJobTest {
|
||||||
"-outputGraphTablePath",
|
"-outputGraphTablePath",
|
||||||
"",
|
"",
|
||||||
"-mergeAndGetStrategy",
|
"-mergeAndGetStrategy",
|
||||||
MergeAndGet.Strategy.SELECT_NEWER_AND_GET.name()
|
MergeAndGet.Strategy.SELECT_NEWER_AND_GET.name(),
|
||||||
|
"--shouldGroupById",
|
||||||
|
"true"
|
||||||
}));
|
}));
|
||||||
|
|
||||||
// then
|
// then
|
||||||
|
@ -114,7 +116,7 @@ public class PromoteActionPayloadForGraphTableJobTest {
|
||||||
|
|
||||||
@ParameterizedTest(name = "strategy: {0}, graph table: {1}, action payload: {2}")
|
@ParameterizedTest(name = "strategy: {0}, graph table: {1}, action payload: {2}")
|
||||||
@MethodSource("eu.dnetlib.dhp.actionmanager.promote.PromoteActionPayloadForGraphTableJobTest#promoteJobTestParams")
|
@MethodSource("eu.dnetlib.dhp.actionmanager.promote.PromoteActionPayloadForGraphTableJobTest#promoteJobTestParams")
|
||||||
public void shouldPromoteActionPayloadForGraphTable(
|
void shouldPromoteActionPayloadForGraphTable(
|
||||||
MergeAndGet.Strategy strategy,
|
MergeAndGet.Strategy strategy,
|
||||||
Class<? extends Oaf> rowClazz,
|
Class<? extends Oaf> rowClazz,
|
||||||
Class<? extends Oaf> actionPayloadClazz)
|
Class<? extends Oaf> actionPayloadClazz)
|
||||||
|
@ -141,7 +143,9 @@ public class PromoteActionPayloadForGraphTableJobTest {
|
||||||
"-outputGraphTablePath",
|
"-outputGraphTablePath",
|
||||||
outputGraphTableDir.toString(),
|
outputGraphTableDir.toString(),
|
||||||
"-mergeAndGetStrategy",
|
"-mergeAndGetStrategy",
|
||||||
strategy.name()
|
strategy.name(),
|
||||||
|
"--shouldGroupById",
|
||||||
|
"true"
|
||||||
});
|
});
|
||||||
|
|
||||||
// then
|
// then
|
||||||
|
|
|
@ -44,7 +44,7 @@ public class PromoteActionPayloadFunctionsTest {
|
||||||
class JoinTableWithActionPayloadAndMerge {
|
class JoinTableWithActionPayloadAndMerge {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void shouldThrowWhenTableTypeIsNotSubtypeOfActionPayloadType() {
|
void shouldThrowWhenTableTypeIsNotSubtypeOfActionPayloadType() {
|
||||||
// given
|
// given
|
||||||
class OafImpl extends Oaf {
|
class OafImpl extends Oaf {
|
||||||
}
|
}
|
||||||
|
@ -58,7 +58,7 @@ public class PromoteActionPayloadFunctionsTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void shouldRunProperlyWhenActionPayloadTypeAndTableTypeAreTheSame() {
|
void shouldRunProperlyWhenActionPayloadTypeAndTableTypeAreTheSame() {
|
||||||
// given
|
// given
|
||||||
String id0 = "id0";
|
String id0 = "id0";
|
||||||
String id1 = "id1";
|
String id1 = "id1";
|
||||||
|
@ -138,7 +138,7 @@ public class PromoteActionPayloadFunctionsTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void shouldRunProperlyWhenActionPayloadTypeIsSuperTypeOfTableType() {
|
void shouldRunProperlyWhenActionPayloadTypeIsSuperTypeOfTableType() {
|
||||||
// given
|
// given
|
||||||
String id0 = "id0";
|
String id0 = "id0";
|
||||||
String id1 = "id1";
|
String id1 = "id1";
|
||||||
|
@ -218,7 +218,7 @@ public class PromoteActionPayloadFunctionsTest {
|
||||||
class GroupTableByIdAndMerge {
|
class GroupTableByIdAndMerge {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void shouldRunProperly() {
|
void shouldRunProperly() {
|
||||||
// given
|
// given
|
||||||
String id1 = "id1";
|
String id1 = "id1";
|
||||||
String id2 = "id2";
|
String id2 = "id2";
|
||||||
|
|
|
@ -1,29 +1,27 @@
|
||||||
Description of the Module
|
Description of the Module
|
||||||
--------------------------
|
--------------------------
|
||||||
This module defines a **collector worker application** that runs on Hadoop.
|
This module defines a set of oozie workflows for the **collection** and **transformation** of metadata records.
|
||||||
|
Both workflows interact with the Metadata Store Manager (MdSM) to handle the logical transactions required to ensure
|
||||||
|
the consistency of the read/write operations on the data as the MdSM in fact keeps track of the logical-physical mapping
|
||||||
|
of each MDStore.
|
||||||
|
|
||||||
It is responsible for harvesting metadata using different plugins.
|
## Metadata collection
|
||||||
|
|
||||||
The collector worker uses a message queue to inform the progress
|
The **metadata collection workflow** is responsible for harvesting metadata records from different protocols and responding to
|
||||||
of the harvesting action (using a message queue for sending **ONGOING** messages) furthermore,
|
different formats and to store them as on HDFS so that they can be further processed.
|
||||||
It gives, at the end of the job, some information about the status
|
|
||||||
of the collection i.e Number of records collected(using a message queue for sending **REPORT** messages).
|
|
||||||
|
|
||||||
To work the collection worker need some parameter like:
|
### Collector Plugins
|
||||||
|
|
||||||
* **hdfsPath**: the path where storing the sequential file
|
Different protocols are managed by dedicated Collector plugins, i.e. java programs implementing a defined interface:
|
||||||
* **apidescriptor**: the JSON encoding of the API Descriptor
|
|
||||||
* **namenode**: the Name Node URI
|
|
||||||
* **userHDFS**: the user wich create the hdfs seq file
|
|
||||||
* **rabbitUser**: the user to connect with RabbitMq for messaging
|
|
||||||
* **rabbitPassWord**: the password to connect with RabbitMq for messaging
|
|
||||||
* **rabbitHost**: the host of the RabbitMq server
|
|
||||||
* **rabbitOngoingQueue**: the name of the ongoing queue
|
|
||||||
* **rabbitReportQueue**: the name of the report queue
|
|
||||||
* **workflowId**: the identifier of the dnet Workflow
|
|
||||||
|
|
||||||
##Plugins
|
```eu.dnetlib.dhp.collection.plugin.CollectorPlugin```
|
||||||
* OAI Plugin
|
|
||||||
|
|
||||||
## Usage
|
The list of the supported plugins:
|
||||||
|
|
||||||
|
* OAI Plugin: collects from OAI-PMH compatible endpoints
|
||||||
|
* MDStore plugin: collects from a given D-Net MetadataStore, (identified by moogodb URI, dbName, MDStoreID)
|
||||||
|
* MDStore dump plugin: collects from an MDStore dump stored on the HDFS location indicated by the `path` parameter
|
||||||
|
|
||||||
|
# Transformation Plugins
|
||||||
TODO
|
TODO
|
||||||
|
|
||||||
|
|
|
@ -7,13 +7,20 @@
|
||||||
<version>1.2.4-SNAPSHOT</version>
|
<version>1.2.4-SNAPSHOT</version>
|
||||||
</parent>
|
</parent>
|
||||||
<artifactId>dhp-aggregation</artifactId>
|
<artifactId>dhp-aggregation</artifactId>
|
||||||
|
<<<<<<< HEAD
|
||||||
|
|
||||||
|
=======
|
||||||
|
>>>>>>> upstream/master
|
||||||
<build>
|
<build>
|
||||||
<plugins>
|
<plugins>
|
||||||
<plugin>
|
<plugin>
|
||||||
<groupId>net.alchim31.maven</groupId>
|
<groupId>net.alchim31.maven</groupId>
|
||||||
<artifactId>scala-maven-plugin</artifactId>
|
<artifactId>scala-maven-plugin</artifactId>
|
||||||
|
<<<<<<< HEAD
|
||||||
<version>4.0.1</version>
|
<version>4.0.1</version>
|
||||||
|
=======
|
||||||
|
<version>${net.alchim31.maven.version}</version>
|
||||||
|
>>>>>>> upstream/master
|
||||||
<executions>
|
<executions>
|
||||||
<execution>
|
<execution>
|
||||||
<id>scala-compile-first</id>
|
<id>scala-compile-first</id>
|
||||||
|
@ -30,6 +37,16 @@
|
||||||
<goal>testCompile</goal>
|
<goal>testCompile</goal>
|
||||||
</goals>
|
</goals>
|
||||||
</execution>
|
</execution>
|
||||||
|
<<<<<<< HEAD
|
||||||
|
=======
|
||||||
|
<execution>
|
||||||
|
<id>scala-doc</id>
|
||||||
|
<phase>process-resources</phase> <!-- or wherever -->
|
||||||
|
<goals>
|
||||||
|
<goal>doc</goal>
|
||||||
|
</goals>
|
||||||
|
</execution>
|
||||||
|
>>>>>>> upstream/master
|
||||||
</executions>
|
</executions>
|
||||||
<configuration>
|
<configuration>
|
||||||
<scalaVersion>${scala.version}</scalaVersion>
|
<scalaVersion>${scala.version}</scalaVersion>
|
||||||
|
@ -38,10 +55,19 @@
|
||||||
</plugins>
|
</plugins>
|
||||||
|
|
||||||
</build>
|
</build>
|
||||||
|
<<<<<<< HEAD
|
||||||
|
|
||||||
|
|
||||||
|
=======
|
||||||
|
|
||||||
|
>>>>>>> upstream/master
|
||||||
<dependencies>
|
<dependencies>
|
||||||
|
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.httpcomponents</groupId>
|
||||||
|
<artifactId>httpclient</artifactId>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.spark</groupId>
|
<groupId>org.apache.spark</groupId>
|
||||||
<artifactId>spark-core_2.11</artifactId>
|
<artifactId>spark-core_2.11</artifactId>
|
||||||
|
@ -55,6 +81,7 @@
|
||||||
<groupId>eu.dnetlib.dhp</groupId>
|
<groupId>eu.dnetlib.dhp</groupId>
|
||||||
<artifactId>dhp-common</artifactId>
|
<artifactId>dhp-common</artifactId>
|
||||||
<version>${project.version}</version>
|
<version>${project.version}</version>
|
||||||
|
<<<<<<< HEAD
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
<dependency>
|
<dependency>
|
||||||
|
@ -70,6 +97,10 @@
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
=======
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
>>>>>>> upstream/master
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>net.sf.saxon</groupId>
|
<groupId>net.sf.saxon</groupId>
|
||||||
<artifactId>Saxon-HE</artifactId>
|
<artifactId>Saxon-HE</artifactId>
|
||||||
|
@ -89,14 +120,11 @@
|
||||||
<artifactId>jaxen</artifactId>
|
<artifactId>jaxen</artifactId>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
<!-- https://mvnrepository.com/artifact/org.apache.commons/commons-csv -->
|
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.commons</groupId>
|
<groupId>org.json</groupId>
|
||||||
<artifactId>commons-csv</artifactId>
|
<artifactId>json</artifactId>
|
||||||
<version>1.8</version>
|
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
|
|
||||||
<!-- https://mvnrepository.com/artifact/org.apache.poi/poi-ooxml -->
|
<!-- https://mvnrepository.com/artifact/org.apache.poi/poi-ooxml -->
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.poi</groupId>
|
<groupId>org.apache.poi</groupId>
|
||||||
|
@ -109,7 +137,10 @@
|
||||||
<artifactId>commons-compress</artifactId>
|
<artifactId>commons-compress</artifactId>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.mongodb</groupId>
|
||||||
|
<artifactId>mongo-java-driver</artifactId>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
</dependencies>
|
</dependencies>
|
||||||
|
|
||||||
|
|
|
@ -4,6 +4,7 @@ package eu.dnetlib.dhp.actionmanager.bipfinder;
|
||||||
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
||||||
|
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
|
import java.util.Objects;
|
||||||
import java.util.Optional;
|
import java.util.Optional;
|
||||||
|
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
|
@ -28,15 +29,16 @@ import eu.dnetlib.dhp.schema.oaf.Result;
|
||||||
public class CollectAndSave implements Serializable {
|
public class CollectAndSave implements Serializable {
|
||||||
|
|
||||||
private static final Logger log = LoggerFactory.getLogger(CollectAndSave.class);
|
private static final Logger log = LoggerFactory.getLogger(CollectAndSave.class);
|
||||||
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
|
||||||
|
|
||||||
public static <I extends Result> void main(String[] args) throws Exception {
|
public static void main(String[] args) throws Exception {
|
||||||
|
|
||||||
String jsonConfiguration = IOUtils
|
String jsonConfiguration = IOUtils
|
||||||
.toString(
|
.toString(
|
||||||
CollectAndSave.class
|
Objects
|
||||||
.getResourceAsStream(
|
.requireNonNull(
|
||||||
"/eu/dnetlib/dhp/actionmanager/bipfinder/input_actionset_parameter.json"));
|
CollectAndSave.class
|
||||||
|
.getResourceAsStream(
|
||||||
|
"/eu/dnetlib/dhp/actionmanager/bipfinder/input_actionset_parameter.json")));
|
||||||
|
|
||||||
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
|
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
|
||||||
|
|
||||||
|
@ -75,7 +77,6 @@ public class CollectAndSave implements Serializable {
|
||||||
.union(sc.sequenceFile(inputPath + "/otherresearchproduct", Text.class, Text.class))
|
.union(sc.sequenceFile(inputPath + "/otherresearchproduct", Text.class, Text.class))
|
||||||
.union(sc.sequenceFile(inputPath + "/software", Text.class, Text.class))
|
.union(sc.sequenceFile(inputPath + "/software", Text.class, Text.class))
|
||||||
.saveAsHadoopFile(outputPath, Text.class, Text.class, SequenceFileOutputFormat.class);
|
.saveAsHadoopFile(outputPath, Text.class, Text.class, SequenceFileOutputFormat.class);
|
||||||
;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private static void removeOutputDir(SparkSession spark, String path) {
|
private static void removeOutputDir(SparkSession spark, String path) {
|
||||||
|
|
|
@ -36,7 +36,7 @@ import scala.Tuple2;
|
||||||
*/
|
*/
|
||||||
public class SparkAtomicActionScoreJob implements Serializable {
|
public class SparkAtomicActionScoreJob implements Serializable {
|
||||||
|
|
||||||
private static String DOI = "doi";
|
private static final String DOI = "doi";
|
||||||
private static final Logger log = LoggerFactory.getLogger(SparkAtomicActionScoreJob.class);
|
private static final Logger log = LoggerFactory.getLogger(SparkAtomicActionScoreJob.class);
|
||||||
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
||||||
|
|
||||||
|
@ -87,7 +87,7 @@ public class SparkAtomicActionScoreJob implements Serializable {
|
||||||
private static <I extends Result> void prepareResults(SparkSession spark, String inputPath, String outputPath,
|
private static <I extends Result> void prepareResults(SparkSession spark, String inputPath, String outputPath,
|
||||||
String bipScorePath, Class<I> inputClazz) {
|
String bipScorePath, Class<I> inputClazz) {
|
||||||
|
|
||||||
final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext());
|
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||||
|
|
||||||
JavaRDD<BipDeserialize> bipDeserializeJavaRDD = sc
|
JavaRDD<BipDeserialize> bipDeserializeJavaRDD = sc
|
||||||
.textFile(bipScorePath)
|
.textFile(bipScorePath)
|
||||||
|
@ -101,8 +101,6 @@ public class SparkAtomicActionScoreJob implements Serializable {
|
||||||
return bs;
|
return bs;
|
||||||
}).collect(Collectors.toList()).iterator()).rdd(), Encoders.bean(BipScore.class));
|
}).collect(Collectors.toList()).iterator()).rdd(), Encoders.bean(BipScore.class));
|
||||||
|
|
||||||
System.out.println(bipScores.count());
|
|
||||||
|
|
||||||
Dataset<I> results = readPath(spark, inputPath, inputClazz);
|
Dataset<I> results = readPath(spark, inputPath, inputClazz);
|
||||||
|
|
||||||
results.createOrReplaceTempView("result");
|
results.createOrReplaceTempView("result");
|
||||||
|
@ -124,7 +122,7 @@ public class SparkAtomicActionScoreJob implements Serializable {
|
||||||
ret.setId(value._2().getId());
|
ret.setId(value._2().getId());
|
||||||
return ret;
|
return ret;
|
||||||
}, Encoders.bean(BipScore.class))
|
}, Encoders.bean(BipScore.class))
|
||||||
.groupByKey((MapFunction<BipScore, String>) value -> value.getId(), Encoders.STRING())
|
.groupByKey((MapFunction<BipScore, String>) BipScore::getId, Encoders.STRING())
|
||||||
.mapGroups((MapGroupsFunction<String, BipScore, Result>) (k, it) -> {
|
.mapGroups((MapGroupsFunction<String, BipScore, Result>) (k, it) -> {
|
||||||
Result ret = new Result();
|
Result ret = new Result();
|
||||||
ret.setDataInfo(getDataInfo());
|
ret.setDataInfo(getDataInfo());
|
||||||
|
|
|
@ -0,0 +1,49 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.actionmanager.createunresolvedentities;
|
||||||
|
|
||||||
|
import java.util.Optional;
|
||||||
|
|
||||||
|
import org.apache.spark.api.java.function.MapFunction;
|
||||||
|
import org.apache.spark.sql.Dataset;
|
||||||
|
import org.apache.spark.sql.Encoders;
|
||||||
|
import org.apache.spark.sql.SparkSession;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
|
|
||||||
|
public class Constants {
|
||||||
|
|
||||||
|
public static final String DOI = "doi";
|
||||||
|
|
||||||
|
public static final String UPDATE_DATA_INFO_TYPE = "update";
|
||||||
|
public static final String UPDATE_SUBJECT_FOS_CLASS_ID = "subject:fos";
|
||||||
|
public static final String UPDATE_CLASS_NAME = "Inferred by OpenAIRE";
|
||||||
|
public static final String UPDATE_MEASURE_BIP_CLASS_ID = "measure:bip";
|
||||||
|
|
||||||
|
public static final String FOS_CLASS_ID = "FOS";
|
||||||
|
public static final String FOS_CLASS_NAME = "Fields of Science and Technology classification";
|
||||||
|
|
||||||
|
public static final String NULL = "NULL";
|
||||||
|
|
||||||
|
public static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
||||||
|
|
||||||
|
private Constants() {
|
||||||
|
}
|
||||||
|
|
||||||
|
public static Boolean isSparkSessionManaged(ArgumentApplicationParser parser) {
|
||||||
|
return Optional
|
||||||
|
.ofNullable(parser.get("isSparkSessionManaged"))
|
||||||
|
.map(Boolean::valueOf)
|
||||||
|
.orElse(Boolean.TRUE);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static <R> Dataset<R> readPath(
|
||||||
|
SparkSession spark, String inputPath, Class<R> clazz) {
|
||||||
|
return spark
|
||||||
|
.read()
|
||||||
|
.textFile(inputPath)
|
||||||
|
.map((MapFunction<String, R>) value -> OBJECT_MAPPER.readValue(value, clazz), Encoders.bean(clazz));
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,77 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.actionmanager.createunresolvedentities;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.InputStreamReader;
|
||||||
|
import java.io.Serializable;
|
||||||
|
import java.util.Objects;
|
||||||
|
import java.util.Optional;
|
||||||
|
|
||||||
|
import org.apache.commons.io.IOUtils;
|
||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.fs.FileSystem;
|
||||||
|
import org.apache.hadoop.fs.Path;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
|
import eu.dnetlib.dhp.common.collection.GetCSV;
|
||||||
|
|
||||||
|
public class GetFOSData implements Serializable {
|
||||||
|
|
||||||
|
private static final Logger log = LoggerFactory.getLogger(GetFOSData.class);
|
||||||
|
|
||||||
|
public static final char DEFAULT_DELIMITER = '\t';
|
||||||
|
|
||||||
|
public static void main(final String[] args) throws Exception {
|
||||||
|
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
|
||||||
|
IOUtils
|
||||||
|
.toString(
|
||||||
|
Objects
|
||||||
|
.requireNonNull(
|
||||||
|
GetFOSData.class
|
||||||
|
.getResourceAsStream(
|
||||||
|
"/eu/dnetlib/dhp/actionmanager/createunresolvedentities/get_fos_parameters.json"))));
|
||||||
|
|
||||||
|
parser.parseArgument(args);
|
||||||
|
|
||||||
|
// the path where the original fos csv file is stored
|
||||||
|
final String sourcePath = parser.get("sourcePath");
|
||||||
|
log.info("sourcePath {}", sourcePath);
|
||||||
|
|
||||||
|
// the path where to put the file as json
|
||||||
|
final String outputPath = parser.get("outputPath");
|
||||||
|
log.info("outputPath {}", outputPath);
|
||||||
|
|
||||||
|
final String hdfsNameNode = parser.get("hdfsNameNode");
|
||||||
|
log.info("hdfsNameNode {}", hdfsNameNode);
|
||||||
|
|
||||||
|
final String classForName = parser.get("classForName");
|
||||||
|
log.info("classForName {}", classForName);
|
||||||
|
|
||||||
|
final char delimiter = Optional
|
||||||
|
.ofNullable(parser.get("delimiter"))
|
||||||
|
.map(s -> s.charAt(0))
|
||||||
|
.orElse(DEFAULT_DELIMITER);
|
||||||
|
log.info("delimiter {}", delimiter);
|
||||||
|
|
||||||
|
Configuration conf = new Configuration();
|
||||||
|
conf.set("fs.defaultFS", hdfsNameNode);
|
||||||
|
|
||||||
|
FileSystem fileSystem = FileSystem.get(conf);
|
||||||
|
|
||||||
|
new GetFOSData().doRewrite(sourcePath, outputPath, classForName, delimiter, fileSystem);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
public void doRewrite(String inputPath, String outputFile, String classForName, char delimiter, FileSystem fs)
|
||||||
|
throws IOException, ClassNotFoundException {
|
||||||
|
|
||||||
|
// reads the csv and writes it as its json equivalent
|
||||||
|
try (InputStreamReader reader = new InputStreamReader(fs.open(new Path(inputPath)))) {
|
||||||
|
GetCSV.getCsv(fs, reader, outputFile, classForName, delimiter);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue