forked from D-Net/dnet-hadoop
merge with branch beta
This commit is contained in:
commit
c8321ad31a
|
@ -8,8 +8,6 @@ import java.util.List;
|
||||||
import org.apache.commons.lang.ArrayUtils;
|
import org.apache.commons.lang.ArrayUtils;
|
||||||
import org.apache.commons.lang.StringUtils;
|
import org.apache.commons.lang.StringUtils;
|
||||||
import org.apache.maven.plugin.AbstractMojo;
|
import org.apache.maven.plugin.AbstractMojo;
|
||||||
import org.apache.maven.plugin.MojoExecutionException;
|
|
||||||
import org.apache.maven.plugin.MojoFailureException;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Generates oozie properties which were not provided from commandline.
|
* Generates oozie properties which were not provided from commandline.
|
||||||
|
@ -27,7 +25,7 @@ public class GenerateOoziePropertiesMojo extends AbstractMojo {
|
||||||
};
|
};
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void execute() throws MojoExecutionException, MojoFailureException {
|
public void execute() {
|
||||||
if (System.getProperties().containsKey(PROPERTY_NAME_WF_SOURCE_DIR)
|
if (System.getProperties().containsKey(PROPERTY_NAME_WF_SOURCE_DIR)
|
||||||
&& !System.getProperties().containsKey(PROPERTY_NAME_SANDBOX_NAME)) {
|
&& !System.getProperties().containsKey(PROPERTY_NAME_SANDBOX_NAME)) {
|
||||||
String generatedSandboxName = generateSandboxName(
|
String generatedSandboxName = generateSandboxName(
|
||||||
|
@ -46,24 +44,24 @@ public class GenerateOoziePropertiesMojo extends AbstractMojo {
|
||||||
/**
|
/**
|
||||||
* Generates sandbox name from workflow source directory.
|
* Generates sandbox name from workflow source directory.
|
||||||
*
|
*
|
||||||
* @param wfSourceDir
|
* @param wfSourceDir workflow source directory
|
||||||
* @return generated sandbox name
|
* @return generated sandbox name
|
||||||
*/
|
*/
|
||||||
private String generateSandboxName(String wfSourceDir) {
|
private String generateSandboxName(String wfSourceDir) {
|
||||||
// utilize all dir names until finding one of the limiters
|
// utilize all dir names until finding one of the limiters
|
||||||
List<String> sandboxNameParts = new ArrayList<String>();
|
List<String> sandboxNameParts = new ArrayList<>();
|
||||||
String[] tokens = StringUtils.split(wfSourceDir, File.separatorChar);
|
String[] tokens = StringUtils.split(wfSourceDir, File.separatorChar);
|
||||||
ArrayUtils.reverse(tokens);
|
ArrayUtils.reverse(tokens);
|
||||||
if (tokens.length > 0) {
|
if (tokens.length > 0) {
|
||||||
for (String token : tokens) {
|
for (String token : tokens) {
|
||||||
for (String limiter : limiters) {
|
for (String limiter : limiters) {
|
||||||
if (limiter.equals(token)) {
|
if (limiter.equals(token)) {
|
||||||
return sandboxNameParts.size() > 0
|
return !sandboxNameParts.isEmpty()
|
||||||
? StringUtils.join(sandboxNameParts.toArray())
|
? StringUtils.join(sandboxNameParts.toArray())
|
||||||
: null;
|
: null;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (sandboxNameParts.size() > 0) {
|
if (!sandboxNameParts.isEmpty()) {
|
||||||
sandboxNameParts.add(0, File.separator);
|
sandboxNameParts.add(0, File.separator);
|
||||||
}
|
}
|
||||||
sandboxNameParts.add(0, token);
|
sandboxNameParts.add(0, token);
|
||||||
|
|
|
@ -16,6 +16,7 @@ import java.io.File;
|
||||||
import java.io.FileInputStream;
|
import java.io.FileInputStream;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.InputStream;
|
import java.io.InputStream;
|
||||||
|
import java.nio.charset.StandardCharsets;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
|
@ -289,7 +290,7 @@ public class WritePredefinedProjectProperties extends AbstractMojo {
|
||||||
*/
|
*/
|
||||||
protected List<String> getEscapeChars(String escapeChars) {
|
protected List<String> getEscapeChars(String escapeChars) {
|
||||||
List<String> tokens = getListFromCSV(escapeChars);
|
List<String> tokens = getListFromCSV(escapeChars);
|
||||||
List<String> realTokens = new ArrayList<String>();
|
List<String> realTokens = new ArrayList<>();
|
||||||
for (String token : tokens) {
|
for (String token : tokens) {
|
||||||
String realToken = getRealToken(token);
|
String realToken = getRealToken(token);
|
||||||
realTokens.add(realToken);
|
realTokens.add(realToken);
|
||||||
|
@ -324,7 +325,7 @@ public class WritePredefinedProjectProperties extends AbstractMojo {
|
||||||
* @return content
|
* @return content
|
||||||
*/
|
*/
|
||||||
protected String getContent(String comment, Properties properties, List<String> escapeTokens) {
|
protected String getContent(String comment, Properties properties, List<String> escapeTokens) {
|
||||||
List<String> names = new ArrayList<String>(properties.stringPropertyNames());
|
List<String> names = new ArrayList<>(properties.stringPropertyNames());
|
||||||
Collections.sort(names);
|
Collections.sort(names);
|
||||||
StringBuilder sb = new StringBuilder();
|
StringBuilder sb = new StringBuilder();
|
||||||
if (!StringUtils.isBlank(comment)) {
|
if (!StringUtils.isBlank(comment)) {
|
||||||
|
@ -352,7 +353,7 @@ public class WritePredefinedProjectProperties extends AbstractMojo {
|
||||||
throws MojoExecutionException {
|
throws MojoExecutionException {
|
||||||
try {
|
try {
|
||||||
String content = getContent(comment, properties, escapeTokens);
|
String content = getContent(comment, properties, escapeTokens);
|
||||||
FileUtils.writeStringToFile(file, content, ENCODING_UTF8);
|
FileUtils.writeStringToFile(file, content, StandardCharsets.UTF_8);
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
throw new MojoExecutionException("Error creating properties file", e);
|
throw new MojoExecutionException("Error creating properties file", e);
|
||||||
}
|
}
|
||||||
|
@ -399,9 +400,9 @@ public class WritePredefinedProjectProperties extends AbstractMojo {
|
||||||
*/
|
*/
|
||||||
protected static final List<String> getListFromCSV(String csv) {
|
protected static final List<String> getListFromCSV(String csv) {
|
||||||
if (StringUtils.isBlank(csv)) {
|
if (StringUtils.isBlank(csv)) {
|
||||||
return new ArrayList<String>();
|
return new ArrayList<>();
|
||||||
}
|
}
|
||||||
List<String> list = new ArrayList<String>();
|
List<String> list = new ArrayList<>();
|
||||||
String[] tokens = StringUtils.split(csv, ",");
|
String[] tokens = StringUtils.split(csv, ",");
|
||||||
for (String token : tokens) {
|
for (String token : tokens) {
|
||||||
list.add(token.trim());
|
list.add(token.trim());
|
||||||
|
|
|
@ -9,18 +9,18 @@ import org.junit.jupiter.api.BeforeEach;
|
||||||
import org.junit.jupiter.api.Test;
|
import org.junit.jupiter.api.Test;
|
||||||
|
|
||||||
/** @author mhorst, claudio.atzori */
|
/** @author mhorst, claudio.atzori */
|
||||||
public class GenerateOoziePropertiesMojoTest {
|
class GenerateOoziePropertiesMojoTest {
|
||||||
|
|
||||||
private final GenerateOoziePropertiesMojo mojo = new GenerateOoziePropertiesMojo();
|
private final GenerateOoziePropertiesMojo mojo = new GenerateOoziePropertiesMojo();
|
||||||
|
|
||||||
@BeforeEach
|
@BeforeEach
|
||||||
public void clearSystemProperties() {
|
void clearSystemProperties() {
|
||||||
System.clearProperty(PROPERTY_NAME_SANDBOX_NAME);
|
System.clearProperty(PROPERTY_NAME_SANDBOX_NAME);
|
||||||
System.clearProperty(PROPERTY_NAME_WF_SOURCE_DIR);
|
System.clearProperty(PROPERTY_NAME_WF_SOURCE_DIR);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testExecuteEmpty() throws Exception {
|
void testExecuteEmpty() throws Exception {
|
||||||
// execute
|
// execute
|
||||||
mojo.execute();
|
mojo.execute();
|
||||||
|
|
||||||
|
@ -29,7 +29,7 @@ public class GenerateOoziePropertiesMojoTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testExecuteSandboxNameAlreadySet() throws Exception {
|
void testExecuteSandboxNameAlreadySet() throws Exception {
|
||||||
// given
|
// given
|
||||||
String workflowSourceDir = "eu/dnetlib/dhp/wf/transformers";
|
String workflowSourceDir = "eu/dnetlib/dhp/wf/transformers";
|
||||||
String sandboxName = "originalSandboxName";
|
String sandboxName = "originalSandboxName";
|
||||||
|
@ -44,7 +44,7 @@ public class GenerateOoziePropertiesMojoTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testExecuteEmptyWorkflowSourceDir() throws Exception {
|
void testExecuteEmptyWorkflowSourceDir() throws Exception {
|
||||||
// given
|
// given
|
||||||
String workflowSourceDir = "";
|
String workflowSourceDir = "";
|
||||||
System.setProperty(PROPERTY_NAME_WF_SOURCE_DIR, workflowSourceDir);
|
System.setProperty(PROPERTY_NAME_WF_SOURCE_DIR, workflowSourceDir);
|
||||||
|
@ -57,7 +57,7 @@ public class GenerateOoziePropertiesMojoTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testExecuteNullSandboxNameGenerated() throws Exception {
|
void testExecuteNullSandboxNameGenerated() throws Exception {
|
||||||
// given
|
// given
|
||||||
String workflowSourceDir = "eu/dnetlib/dhp/";
|
String workflowSourceDir = "eu/dnetlib/dhp/";
|
||||||
System.setProperty(PROPERTY_NAME_WF_SOURCE_DIR, workflowSourceDir);
|
System.setProperty(PROPERTY_NAME_WF_SOURCE_DIR, workflowSourceDir);
|
||||||
|
@ -70,7 +70,7 @@ public class GenerateOoziePropertiesMojoTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testExecute() throws Exception {
|
void testExecute() throws Exception {
|
||||||
// given
|
// given
|
||||||
String workflowSourceDir = "eu/dnetlib/dhp/wf/transformers";
|
String workflowSourceDir = "eu/dnetlib/dhp/wf/transformers";
|
||||||
System.setProperty(PROPERTY_NAME_WF_SOURCE_DIR, workflowSourceDir);
|
System.setProperty(PROPERTY_NAME_WF_SOURCE_DIR, workflowSourceDir);
|
||||||
|
@ -83,7 +83,7 @@ public class GenerateOoziePropertiesMojoTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testExecuteWithoutRoot() throws Exception {
|
void testExecuteWithoutRoot() throws Exception {
|
||||||
// given
|
// given
|
||||||
String workflowSourceDir = "wf/transformers";
|
String workflowSourceDir = "wf/transformers";
|
||||||
System.setProperty(PROPERTY_NAME_WF_SOURCE_DIR, workflowSourceDir);
|
System.setProperty(PROPERTY_NAME_WF_SOURCE_DIR, workflowSourceDir);
|
||||||
|
|
|
@ -20,7 +20,7 @@ import org.mockito.junit.jupiter.MockitoExtension;
|
||||||
|
|
||||||
/** @author mhorst, claudio.atzori */
|
/** @author mhorst, claudio.atzori */
|
||||||
@ExtendWith(MockitoExtension.class)
|
@ExtendWith(MockitoExtension.class)
|
||||||
public class WritePredefinedProjectPropertiesTest {
|
class WritePredefinedProjectPropertiesTest {
|
||||||
|
|
||||||
@Mock
|
@Mock
|
||||||
private MavenProject mavenProject;
|
private MavenProject mavenProject;
|
||||||
|
@ -39,7 +39,7 @@ public class WritePredefinedProjectPropertiesTest {
|
||||||
// ----------------------------------- TESTS ---------------------------------------------
|
// ----------------------------------- TESTS ---------------------------------------------
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testExecuteEmpty() throws Exception {
|
void testExecuteEmpty() throws Exception {
|
||||||
// execute
|
// execute
|
||||||
mojo.execute();
|
mojo.execute();
|
||||||
|
|
||||||
|
@ -50,7 +50,7 @@ public class WritePredefinedProjectPropertiesTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testExecuteWithProjectProperties() throws Exception {
|
void testExecuteWithProjectProperties() throws Exception {
|
||||||
// given
|
// given
|
||||||
String key = "projectPropertyKey";
|
String key = "projectPropertyKey";
|
||||||
String value = "projectPropertyValue";
|
String value = "projectPropertyValue";
|
||||||
|
@ -70,7 +70,7 @@ public class WritePredefinedProjectPropertiesTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test()
|
@Test()
|
||||||
public void testExecuteWithProjectPropertiesAndInvalidOutputFile(@TempDir File testFolder) {
|
void testExecuteWithProjectPropertiesAndInvalidOutputFile(@TempDir File testFolder) {
|
||||||
// given
|
// given
|
||||||
String key = "projectPropertyKey";
|
String key = "projectPropertyKey";
|
||||||
String value = "projectPropertyValue";
|
String value = "projectPropertyValue";
|
||||||
|
@ -84,7 +84,7 @@ public class WritePredefinedProjectPropertiesTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testExecuteWithProjectPropertiesExclusion(@TempDir File testFolder) throws Exception {
|
void testExecuteWithProjectPropertiesExclusion(@TempDir File testFolder) throws Exception {
|
||||||
// given
|
// given
|
||||||
String key = "projectPropertyKey";
|
String key = "projectPropertyKey";
|
||||||
String value = "projectPropertyValue";
|
String value = "projectPropertyValue";
|
||||||
|
@ -108,7 +108,7 @@ public class WritePredefinedProjectPropertiesTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testExecuteWithProjectPropertiesInclusion(@TempDir File testFolder) throws Exception {
|
void testExecuteWithProjectPropertiesInclusion(@TempDir File testFolder) throws Exception {
|
||||||
// given
|
// given
|
||||||
String key = "projectPropertyKey";
|
String key = "projectPropertyKey";
|
||||||
String value = "projectPropertyValue";
|
String value = "projectPropertyValue";
|
||||||
|
@ -132,7 +132,7 @@ public class WritePredefinedProjectPropertiesTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testExecuteIncludingPropertyKeysFromFile(@TempDir File testFolder) throws Exception {
|
void testExecuteIncludingPropertyKeysFromFile(@TempDir File testFolder) throws Exception {
|
||||||
// given
|
// given
|
||||||
String key = "projectPropertyKey";
|
String key = "projectPropertyKey";
|
||||||
String value = "projectPropertyValue";
|
String value = "projectPropertyValue";
|
||||||
|
@ -164,7 +164,7 @@ public class WritePredefinedProjectPropertiesTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testExecuteIncludingPropertyKeysFromClasspathResource(@TempDir File testFolder)
|
void testExecuteIncludingPropertyKeysFromClasspathResource(@TempDir File testFolder)
|
||||||
throws Exception {
|
throws Exception {
|
||||||
// given
|
// given
|
||||||
String key = "projectPropertyKey";
|
String key = "projectPropertyKey";
|
||||||
|
@ -194,7 +194,7 @@ public class WritePredefinedProjectPropertiesTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testExecuteIncludingPropertyKeysFromBlankLocation() {
|
void testExecuteIncludingPropertyKeysFromBlankLocation() {
|
||||||
// given
|
// given
|
||||||
String key = "projectPropertyKey";
|
String key = "projectPropertyKey";
|
||||||
String value = "projectPropertyValue";
|
String value = "projectPropertyValue";
|
||||||
|
@ -214,7 +214,7 @@ public class WritePredefinedProjectPropertiesTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testExecuteIncludingPropertyKeysFromXmlFile(@TempDir File testFolder)
|
void testExecuteIncludingPropertyKeysFromXmlFile(@TempDir File testFolder)
|
||||||
throws Exception {
|
throws Exception {
|
||||||
// given
|
// given
|
||||||
String key = "projectPropertyKey";
|
String key = "projectPropertyKey";
|
||||||
|
@ -247,7 +247,7 @@ public class WritePredefinedProjectPropertiesTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testExecuteIncludingPropertyKeysFromInvalidXmlFile(@TempDir File testFolder)
|
void testExecuteIncludingPropertyKeysFromInvalidXmlFile(@TempDir File testFolder)
|
||||||
throws Exception {
|
throws Exception {
|
||||||
// given
|
// given
|
||||||
String key = "projectPropertyKey";
|
String key = "projectPropertyKey";
|
||||||
|
@ -273,7 +273,7 @@ public class WritePredefinedProjectPropertiesTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testExecuteWithQuietModeOn(@TempDir File testFolder) throws Exception {
|
void testExecuteWithQuietModeOn(@TempDir File testFolder) throws Exception {
|
||||||
// given
|
// given
|
||||||
mojo.setQuiet(true);
|
mojo.setQuiet(true);
|
||||||
mojo.setIncludePropertyKeysFromFiles(new String[] {
|
mojo.setIncludePropertyKeysFromFiles(new String[] {
|
||||||
|
@ -290,7 +290,7 @@ public class WritePredefinedProjectPropertiesTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testExecuteIncludingPropertyKeysFromInvalidFile() {
|
void testExecuteIncludingPropertyKeysFromInvalidFile() {
|
||||||
// given
|
// given
|
||||||
mojo.setIncludePropertyKeysFromFiles(new String[] {
|
mojo.setIncludePropertyKeysFromFiles(new String[] {
|
||||||
"invalid location"
|
"invalid location"
|
||||||
|
@ -301,7 +301,7 @@ public class WritePredefinedProjectPropertiesTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testExecuteWithEnvironmentProperties(@TempDir File testFolder) throws Exception {
|
void testExecuteWithEnvironmentProperties(@TempDir File testFolder) throws Exception {
|
||||||
// given
|
// given
|
||||||
mojo.setIncludeEnvironmentVariables(true);
|
mojo.setIncludeEnvironmentVariables(true);
|
||||||
|
|
||||||
|
@ -318,7 +318,7 @@ public class WritePredefinedProjectPropertiesTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testExecuteWithSystemProperties(@TempDir File testFolder) throws Exception {
|
void testExecuteWithSystemProperties(@TempDir File testFolder) throws Exception {
|
||||||
// given
|
// given
|
||||||
String key = "systemPropertyKey";
|
String key = "systemPropertyKey";
|
||||||
String value = "systemPropertyValue";
|
String value = "systemPropertyValue";
|
||||||
|
@ -337,7 +337,7 @@ public class WritePredefinedProjectPropertiesTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testExecuteWithSystemPropertiesAndEscapeChars(@TempDir File testFolder)
|
void testExecuteWithSystemPropertiesAndEscapeChars(@TempDir File testFolder)
|
||||||
throws Exception {
|
throws Exception {
|
||||||
// given
|
// given
|
||||||
String key = "systemPropertyKey ";
|
String key = "systemPropertyKey ";
|
||||||
|
|
|
@ -117,6 +117,11 @@
|
||||||
<groupId>eu.dnetlib.dhp</groupId>
|
<groupId>eu.dnetlib.dhp</groupId>
|
||||||
<artifactId>dhp-schemas</artifactId>
|
<artifactId>dhp-schemas</artifactId>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
|
<dependency>
|
||||||
|
<groupId>com.opencsv</groupId>
|
||||||
|
<artifactId>opencsv</artifactId>
|
||||||
|
</dependency>
|
||||||
</dependencies>
|
</dependencies>
|
||||||
|
|
||||||
</project>
|
</project>
|
||||||
|
|
|
@ -1,14 +0,0 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.application;
|
|
||||||
|
|
||||||
import java.io.*;
|
|
||||||
import java.util.Map;
|
|
||||||
import java.util.Properties;
|
|
||||||
|
|
||||||
import org.apache.hadoop.conf.Configuration;
|
|
||||||
|
|
||||||
import com.google.common.collect.Maps;
|
|
||||||
|
|
||||||
public class ApplicationUtils {
|
|
||||||
|
|
||||||
}
|
|
|
@ -56,13 +56,13 @@ public class ArgumentApplicationParser implements Serializable {
|
||||||
final StringWriter stringWriter = new StringWriter();
|
final StringWriter stringWriter = new StringWriter();
|
||||||
IOUtils.copy(gis, stringWriter);
|
IOUtils.copy(gis, stringWriter);
|
||||||
return stringWriter.toString();
|
return stringWriter.toString();
|
||||||
} catch (Throwable e) {
|
} catch (IOException e) {
|
||||||
log.error("Wrong value to decompress:" + abstractCompressed);
|
log.error("Wrong value to decompress: {}", abstractCompressed);
|
||||||
throw new RuntimeException(e);
|
throw new IllegalArgumentException(e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public static String compressArgument(final String value) throws Exception {
|
public static String compressArgument(final String value) throws IOException {
|
||||||
ByteArrayOutputStream out = new ByteArrayOutputStream();
|
ByteArrayOutputStream out = new ByteArrayOutputStream();
|
||||||
GZIPOutputStream gzip = new GZIPOutputStream(out);
|
GZIPOutputStream gzip = new GZIPOutputStream(out);
|
||||||
gzip.write(value.getBytes());
|
gzip.write(value.getBytes());
|
||||||
|
|
|
@ -9,9 +9,6 @@ public class OptionsParameter {
|
||||||
private boolean paramRequired;
|
private boolean paramRequired;
|
||||||
private boolean compressed;
|
private boolean compressed;
|
||||||
|
|
||||||
public OptionsParameter() {
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getParamName() {
|
public String getParamName() {
|
||||||
return paramName;
|
return paramName;
|
||||||
}
|
}
|
||||||
|
|
|
@ -34,7 +34,7 @@ public class ApiDescriptor {
|
||||||
return params;
|
return params;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setParams(final HashMap<String, String> params) {
|
public void setParams(final Map<String, String> params) {
|
||||||
this.params = params;
|
this.params = params;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -12,6 +12,9 @@ public class Constants {
|
||||||
|
|
||||||
public static String COAR_ACCESS_RIGHT_SCHEMA = "http://vocabularies.coar-repositories.org/documentation/access_rights/";
|
public static String COAR_ACCESS_RIGHT_SCHEMA = "http://vocabularies.coar-repositories.org/documentation/access_rights/";
|
||||||
|
|
||||||
|
private Constants() {
|
||||||
|
}
|
||||||
|
|
||||||
static {
|
static {
|
||||||
accessRightsCoarMap.put("OPEN", "c_abf2");
|
accessRightsCoarMap.put("OPEN", "c_abf2");
|
||||||
accessRightsCoarMap.put("RESTRICTED", "c_16ec");
|
accessRightsCoarMap.put("RESTRICTED", "c_16ec");
|
||||||
|
@ -49,4 +52,10 @@ public class Constants {
|
||||||
public static final String CONTENT_INVALIDRECORDS = "InvalidRecords";
|
public static final String CONTENT_INVALIDRECORDS = "InvalidRecords";
|
||||||
public static final String CONTENT_TRANSFORMEDRECORDS = "transformedItems";
|
public static final String CONTENT_TRANSFORMEDRECORDS = "transformedItems";
|
||||||
|
|
||||||
|
// IETF Draft and used by Repositories like ZENODO , not included in APACHE HTTP java packages
|
||||||
|
// see https://ietf-wg-httpapi.github.io/ratelimit-headers/draft-ietf-httpapi-ratelimit-headers.html
|
||||||
|
public static final String HTTPHEADER_IETF_DRAFT_RATELIMIT_LIMIT = "X-RateLimit-Limit";
|
||||||
|
public static final String HTTPHEADER_IETF_DRAFT_RATELIMIT_REMAINING = "X-RateLimit-Remaining";
|
||||||
|
public static final String HTTPHEADER_IETF_DRAFT_RATELIMIT_RESET = "X-RateLimit-Reset";
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -84,7 +84,7 @@ public class GraphResultMapper implements Serializable {
|
||||||
.setDocumentationUrl(
|
.setDocumentationUrl(
|
||||||
value
|
value
|
||||||
.stream()
|
.stream()
|
||||||
.map(v -> v.getValue())
|
.map(Field::getValue)
|
||||||
.collect(Collectors.toList())));
|
.collect(Collectors.toList())));
|
||||||
|
|
||||||
Optional
|
Optional
|
||||||
|
@ -100,20 +100,20 @@ public class GraphResultMapper implements Serializable {
|
||||||
.setContactgroup(
|
.setContactgroup(
|
||||||
Optional
|
Optional
|
||||||
.ofNullable(ir.getContactgroup())
|
.ofNullable(ir.getContactgroup())
|
||||||
.map(value -> value.stream().map(cg -> cg.getValue()).collect(Collectors.toList()))
|
.map(value -> value.stream().map(Field::getValue).collect(Collectors.toList()))
|
||||||
.orElse(null));
|
.orElse(null));
|
||||||
|
|
||||||
out
|
out
|
||||||
.setContactperson(
|
.setContactperson(
|
||||||
Optional
|
Optional
|
||||||
.ofNullable(ir.getContactperson())
|
.ofNullable(ir.getContactperson())
|
||||||
.map(value -> value.stream().map(cp -> cp.getValue()).collect(Collectors.toList()))
|
.map(value -> value.stream().map(Field::getValue).collect(Collectors.toList()))
|
||||||
.orElse(null));
|
.orElse(null));
|
||||||
out
|
out
|
||||||
.setTool(
|
.setTool(
|
||||||
Optional
|
Optional
|
||||||
.ofNullable(ir.getTool())
|
.ofNullable(ir.getTool())
|
||||||
.map(value -> value.stream().map(t -> t.getValue()).collect(Collectors.toList()))
|
.map(value -> value.stream().map(Field::getValue).collect(Collectors.toList()))
|
||||||
.orElse(null));
|
.orElse(null));
|
||||||
|
|
||||||
out.setType(ModelConstants.ORP_DEFAULT_RESULTTYPE.getClassname());
|
out.setType(ModelConstants.ORP_DEFAULT_RESULTTYPE.getClassname());
|
||||||
|
@ -123,7 +123,8 @@ public class GraphResultMapper implements Serializable {
|
||||||
|
|
||||||
Optional
|
Optional
|
||||||
.ofNullable(input.getAuthor())
|
.ofNullable(input.getAuthor())
|
||||||
.ifPresent(ats -> out.setAuthor(ats.stream().map(at -> getAuthor(at)).collect(Collectors.toList())));
|
.ifPresent(
|
||||||
|
ats -> out.setAuthor(ats.stream().map(GraphResultMapper::getAuthor).collect(Collectors.toList())));
|
||||||
|
|
||||||
// I do not map Access Right UNKNOWN or OTHER
|
// I do not map Access Right UNKNOWN or OTHER
|
||||||
|
|
||||||
|
@ -210,7 +211,7 @@ public class GraphResultMapper implements Serializable {
|
||||||
if (oInst.isPresent()) {
|
if (oInst.isPresent()) {
|
||||||
out
|
out
|
||||||
.setInstance(
|
.setInstance(
|
||||||
oInst.get().stream().map(i -> getInstance(i)).collect(Collectors.toList()));
|
oInst.get().stream().map(GraphResultMapper::getInstance).collect(Collectors.toList()));
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -230,7 +231,7 @@ public class GraphResultMapper implements Serializable {
|
||||||
.stream()
|
.stream()
|
||||||
.filter(t -> t.getQualifier().getClassid().equalsIgnoreCase("main title"))
|
.filter(t -> t.getQualifier().getClassid().equalsIgnoreCase("main title"))
|
||||||
.collect(Collectors.toList());
|
.collect(Collectors.toList());
|
||||||
if (iTitle.size() > 0) {
|
if (!iTitle.isEmpty()) {
|
||||||
out.setMaintitle(iTitle.get(0).getValue());
|
out.setMaintitle(iTitle.get(0).getValue());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -239,7 +240,7 @@ public class GraphResultMapper implements Serializable {
|
||||||
.stream()
|
.stream()
|
||||||
.filter(t -> t.getQualifier().getClassid().equalsIgnoreCase("subtitle"))
|
.filter(t -> t.getQualifier().getClassid().equalsIgnoreCase("subtitle"))
|
||||||
.collect(Collectors.toList());
|
.collect(Collectors.toList());
|
||||||
if (iTitle.size() > 0) {
|
if (!iTitle.isEmpty()) {
|
||||||
out.setSubtitle(iTitle.get(0).getValue());
|
out.setSubtitle(iTitle.get(0).getValue());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -14,38 +14,33 @@ public class MakeTarArchive implements Serializable {
|
||||||
|
|
||||||
private static TarArchiveOutputStream getTar(FileSystem fileSystem, String outputPath) throws IOException {
|
private static TarArchiveOutputStream getTar(FileSystem fileSystem, String outputPath) throws IOException {
|
||||||
Path hdfsWritePath = new Path(outputPath);
|
Path hdfsWritePath = new Path(outputPath);
|
||||||
FSDataOutputStream fsDataOutputStream = null;
|
|
||||||
if (fileSystem.exists(hdfsWritePath)) {
|
if (fileSystem.exists(hdfsWritePath)) {
|
||||||
fileSystem.delete(hdfsWritePath, true);
|
fileSystem.delete(hdfsWritePath, true);
|
||||||
|
|
||||||
}
|
}
|
||||||
fsDataOutputStream = fileSystem.create(hdfsWritePath);
|
return new TarArchiveOutputStream(fileSystem.create(hdfsWritePath).getWrappedStream());
|
||||||
|
|
||||||
return new TarArchiveOutputStream(fsDataOutputStream.getWrappedStream());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private static void write(FileSystem fileSystem, String inputPath, String outputPath, String dir_name)
|
private static void write(FileSystem fileSystem, String inputPath, String outputPath, String dir_name)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
|
|
||||||
Path hdfsWritePath = new Path(outputPath);
|
Path hdfsWritePath = new Path(outputPath);
|
||||||
FSDataOutputStream fsDataOutputStream = null;
|
|
||||||
if (fileSystem.exists(hdfsWritePath)) {
|
if (fileSystem.exists(hdfsWritePath)) {
|
||||||
fileSystem.delete(hdfsWritePath, true);
|
fileSystem.delete(hdfsWritePath, true);
|
||||||
|
|
||||||
}
|
}
|
||||||
fsDataOutputStream = fileSystem.create(hdfsWritePath);
|
try (TarArchiveOutputStream ar = new TarArchiveOutputStream(
|
||||||
|
fileSystem.create(hdfsWritePath).getWrappedStream())) {
|
||||||
|
|
||||||
TarArchiveOutputStream ar = new TarArchiveOutputStream(fsDataOutputStream.getWrappedStream());
|
RemoteIterator<LocatedFileStatus> iterator = fileSystem
|
||||||
|
|
||||||
RemoteIterator<LocatedFileStatus> fileStatusListIterator = fileSystem
|
|
||||||
.listFiles(
|
.listFiles(
|
||||||
new Path(inputPath), true);
|
new Path(inputPath), true);
|
||||||
|
|
||||||
while (fileStatusListIterator.hasNext()) {
|
while (iterator.hasNext()) {
|
||||||
writeCurrentFile(fileSystem, dir_name, fileStatusListIterator, ar, 0);
|
writeCurrentFile(fileSystem, dir_name, iterator, ar, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
ar.close();
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public static void tarMaxSize(FileSystem fileSystem, String inputPath, String outputPath, String dir_name,
|
public static void tarMaxSize(FileSystem fileSystem, String inputPath, String outputPath, String dir_name,
|
||||||
|
|
|
@ -10,8 +10,6 @@ import java.util.Optional;
|
||||||
import java.util.stream.StreamSupport;
|
import java.util.stream.StreamSupport;
|
||||||
|
|
||||||
import org.apache.commons.lang3.StringUtils;
|
import org.apache.commons.lang3.StringUtils;
|
||||||
import org.apache.commons.logging.Log;
|
|
||||||
import org.apache.commons.logging.LogFactory;
|
|
||||||
import org.bson.Document;
|
import org.bson.Document;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
@ -21,6 +19,7 @@ import com.mongodb.BasicDBObject;
|
||||||
import com.mongodb.MongoClient;
|
import com.mongodb.MongoClient;
|
||||||
import com.mongodb.MongoClientURI;
|
import com.mongodb.MongoClientURI;
|
||||||
import com.mongodb.QueryBuilder;
|
import com.mongodb.QueryBuilder;
|
||||||
|
import com.mongodb.client.FindIterable;
|
||||||
import com.mongodb.client.MongoCollection;
|
import com.mongodb.client.MongoCollection;
|
||||||
import com.mongodb.client.MongoDatabase;
|
import com.mongodb.client.MongoDatabase;
|
||||||
|
|
||||||
|
@ -46,7 +45,7 @@ public class MdstoreClient implements Closeable {
|
||||||
|
|
||||||
final String currentId = Optional
|
final String currentId = Optional
|
||||||
.ofNullable(getColl(db, COLL_METADATA_MANAGER, true).find(query))
|
.ofNullable(getColl(db, COLL_METADATA_MANAGER, true).find(query))
|
||||||
.map(r -> r.first())
|
.map(FindIterable::first)
|
||||||
.map(d -> d.getString("currentId"))
|
.map(d -> d.getString("currentId"))
|
||||||
.orElseThrow(() -> new IllegalArgumentException("cannot find current mdstore id for: " + mdId));
|
.orElseThrow(() -> new IllegalArgumentException("cannot find current mdstore id for: " + mdId));
|
||||||
|
|
||||||
|
@ -84,7 +83,7 @@ public class MdstoreClient implements Closeable {
|
||||||
if (!Iterables.contains(client.listDatabaseNames(), dbName)) {
|
if (!Iterables.contains(client.listDatabaseNames(), dbName)) {
|
||||||
final String err = String.format("Database '%s' not found in %s", dbName, client.getAddress());
|
final String err = String.format("Database '%s' not found in %s", dbName, client.getAddress());
|
||||||
log.warn(err);
|
log.warn(err);
|
||||||
throw new RuntimeException(err);
|
throw new IllegalArgumentException(err);
|
||||||
}
|
}
|
||||||
return client.getDatabase(dbName);
|
return client.getDatabase(dbName);
|
||||||
}
|
}
|
||||||
|
@ -97,7 +96,7 @@ public class MdstoreClient implements Closeable {
|
||||||
String.format("Missing collection '%s' in database '%s'", collName, db.getName()));
|
String.format("Missing collection '%s' in database '%s'", collName, db.getName()));
|
||||||
log.warn(err);
|
log.warn(err);
|
||||||
if (abortIfMissing) {
|
if (abortIfMissing) {
|
||||||
throw new RuntimeException(err);
|
throw new IllegalArgumentException(err);
|
||||||
} else {
|
} else {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
|
@ -24,7 +24,6 @@ import com.google.common.hash.Hashing;
|
||||||
*/
|
*/
|
||||||
public class PacePerson {
|
public class PacePerson {
|
||||||
|
|
||||||
private static final String UTF8 = "UTF-8";
|
|
||||||
private List<String> name = Lists.newArrayList();
|
private List<String> name = Lists.newArrayList();
|
||||||
private List<String> surname = Lists.newArrayList();
|
private List<String> surname = Lists.newArrayList();
|
||||||
private List<String> fullname = Lists.newArrayList();
|
private List<String> fullname = Lists.newArrayList();
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.aggregation.common;
|
package eu.dnetlib.dhp.common.aggregation;
|
||||||
|
|
||||||
import java.io.Closeable;
|
import java.io.Closeable;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
@ -11,8 +11,6 @@ import java.util.Objects;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
import com.google.gson.Gson;
|
|
||||||
|
|
||||||
import eu.dnetlib.dhp.message.MessageSender;
|
import eu.dnetlib.dhp.message.MessageSender;
|
||||||
import eu.dnetlib.dhp.utils.DHPUtils;
|
import eu.dnetlib.dhp.utils.DHPUtils;
|
||||||
|
|
||||||
|
@ -20,12 +18,12 @@ public class AggregatorReport extends LinkedHashMap<String, String> implements C
|
||||||
|
|
||||||
private static final Logger log = LoggerFactory.getLogger(AggregatorReport.class);
|
private static final Logger log = LoggerFactory.getLogger(AggregatorReport.class);
|
||||||
|
|
||||||
private MessageSender messageSender;
|
private transient MessageSender messageSender;
|
||||||
|
|
||||||
public AggregatorReport() {
|
public AggregatorReport() {
|
||||||
}
|
}
|
||||||
|
|
||||||
public AggregatorReport(MessageSender messageSender) throws IOException {
|
public AggregatorReport(MessageSender messageSender) {
|
||||||
this.messageSender = messageSender;
|
this.messageSender = messageSender;
|
||||||
}
|
}
|
||||||
|
|
|
@ -5,6 +5,9 @@ import java.io.*;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.concurrent.TimeUnit;
|
import java.util.concurrent.TimeUnit;
|
||||||
|
|
||||||
|
import org.apache.http.HttpHeaders;
|
||||||
|
import org.apache.http.entity.ContentType;
|
||||||
|
|
||||||
import com.google.gson.Gson;
|
import com.google.gson.Gson;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.common.api.zenodo.ZenodoModel;
|
import eu.dnetlib.dhp.common.api.zenodo.ZenodoModel;
|
||||||
|
@ -43,7 +46,7 @@ public class ZenodoAPIClient implements Serializable {
|
||||||
this.deposition_id = deposition_id;
|
this.deposition_id = deposition_id;
|
||||||
}
|
}
|
||||||
|
|
||||||
public ZenodoAPIClient(String urlString, String access_token) throws IOException {
|
public ZenodoAPIClient(String urlString, String access_token) {
|
||||||
|
|
||||||
this.urlString = urlString;
|
this.urlString = urlString;
|
||||||
this.access_token = access_token;
|
this.access_token = access_token;
|
||||||
|
@ -63,8 +66,8 @@ public class ZenodoAPIClient implements Serializable {
|
||||||
|
|
||||||
Request request = new Request.Builder()
|
Request request = new Request.Builder()
|
||||||
.url(urlString)
|
.url(urlString)
|
||||||
.addHeader("Content-Type", "application/json") // add request headers
|
.addHeader(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON.toString()) // add request headers
|
||||||
.addHeader("Authorization", "Bearer " + access_token)
|
.addHeader(HttpHeaders.AUTHORIZATION, "Bearer " + access_token)
|
||||||
.post(body)
|
.post(body)
|
||||||
.build();
|
.build();
|
||||||
|
|
||||||
|
@ -103,8 +106,8 @@ public class ZenodoAPIClient implements Serializable {
|
||||||
|
|
||||||
Request request = new Request.Builder()
|
Request request = new Request.Builder()
|
||||||
.url(bucket + "/" + file_name)
|
.url(bucket + "/" + file_name)
|
||||||
.addHeader("Content-Type", "application/zip") // add request headers
|
.addHeader(HttpHeaders.CONTENT_TYPE, "application/zip") // add request headers
|
||||||
.addHeader("Authorization", "Bearer " + access_token)
|
.addHeader(HttpHeaders.AUTHORIZATION, "Bearer " + access_token)
|
||||||
.put(InputStreamRequestBody.create(MEDIA_TYPE_ZIP, is, len))
|
.put(InputStreamRequestBody.create(MEDIA_TYPE_ZIP, is, len))
|
||||||
.build();
|
.build();
|
||||||
|
|
||||||
|
@ -130,8 +133,8 @@ public class ZenodoAPIClient implements Serializable {
|
||||||
|
|
||||||
Request request = new Request.Builder()
|
Request request = new Request.Builder()
|
||||||
.url(urlString + "/" + deposition_id)
|
.url(urlString + "/" + deposition_id)
|
||||||
.addHeader("Content-Type", "application/json") // add request headers
|
.addHeader(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON.toString()) // add request headers
|
||||||
.addHeader("Authorization", "Bearer " + access_token)
|
.addHeader(HttpHeaders.AUTHORIZATION, "Bearer " + access_token)
|
||||||
.put(body)
|
.put(body)
|
||||||
.build();
|
.build();
|
||||||
|
|
||||||
|
@ -197,7 +200,7 @@ public class ZenodoAPIClient implements Serializable {
|
||||||
|
|
||||||
Request request = new Request.Builder()
|
Request request = new Request.Builder()
|
||||||
.url(urlString + "/" + deposition_id + "/actions/newversion")
|
.url(urlString + "/" + deposition_id + "/actions/newversion")
|
||||||
.addHeader("Authorization", "Bearer " + access_token)
|
.addHeader(HttpHeaders.AUTHORIZATION, "Bearer " + access_token)
|
||||||
.post(body)
|
.post(body)
|
||||||
.build();
|
.build();
|
||||||
|
|
||||||
|
@ -270,8 +273,8 @@ public class ZenodoAPIClient implements Serializable {
|
||||||
|
|
||||||
Request request = new Request.Builder()
|
Request request = new Request.Builder()
|
||||||
.url(urlString)
|
.url(urlString)
|
||||||
.addHeader("Content-Type", "application/json") // add request headers
|
.addHeader(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON.toString()) // add request headers
|
||||||
.addHeader("Authorization", "Bearer " + access_token)
|
.addHeader(HttpHeaders.AUTHORIZATION, "Bearer " + access_token)
|
||||||
.get()
|
.get()
|
||||||
.build();
|
.build();
|
||||||
|
|
||||||
|
@ -293,8 +296,8 @@ public class ZenodoAPIClient implements Serializable {
|
||||||
|
|
||||||
Request request = new Request.Builder()
|
Request request = new Request.Builder()
|
||||||
.url(url)
|
.url(url)
|
||||||
.addHeader("Content-Type", "application/json") // add request headers
|
.addHeader(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON.toString()) // add request headers
|
||||||
.addHeader("Authorization", "Bearer " + access_token)
|
.addHeader(HttpHeaders.AUTHORIZATION, "Bearer " + access_token)
|
||||||
.get()
|
.get()
|
||||||
.build();
|
.build();
|
||||||
|
|
||||||
|
|
|
@ -32,13 +32,13 @@ public class Creator {
|
||||||
|
|
||||||
public static Creator newInstance(String name, String affiliation, String orcid) {
|
public static Creator newInstance(String name, String affiliation, String orcid) {
|
||||||
Creator c = new Creator();
|
Creator c = new Creator();
|
||||||
if (!(name == null)) {
|
if (name != null) {
|
||||||
c.name = name;
|
c.name = name;
|
||||||
}
|
}
|
||||||
if (!(affiliation == null)) {
|
if (affiliation != null) {
|
||||||
c.affiliation = affiliation;
|
c.affiliation = affiliation;
|
||||||
}
|
}
|
||||||
if (!(orcid == null)) {
|
if (orcid != null) {
|
||||||
c.orcid = orcid;
|
c.orcid = orcid;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -3,17 +3,12 @@ package eu.dnetlib.dhp.common.api.zenodo;
|
||||||
|
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
|
|
||||||
import net.minidev.json.annotate.JsonIgnore;
|
|
||||||
|
|
||||||
public class File implements Serializable {
|
public class File implements Serializable {
|
||||||
private String checksum;
|
private String checksum;
|
||||||
private String filename;
|
private String filename;
|
||||||
private long filesize;
|
private long filesize;
|
||||||
private String id;
|
private String id;
|
||||||
|
|
||||||
@JsonIgnore
|
|
||||||
// private Links links;
|
|
||||||
|
|
||||||
public String getChecksum() {
|
public String getChecksum() {
|
||||||
return checksum;
|
return checksum;
|
||||||
}
|
}
|
||||||
|
@ -46,13 +41,4 @@ public class File implements Serializable {
|
||||||
this.id = id;
|
this.id = id;
|
||||||
}
|
}
|
||||||
|
|
||||||
// @JsonIgnore
|
|
||||||
// public Links getLinks() {
|
|
||||||
// return links;
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// @JsonIgnore
|
|
||||||
// public void setLinks(Links links) {
|
|
||||||
// this.links = links;
|
|
||||||
// }
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.collection;
|
package eu.dnetlib.dhp.common.collection;
|
||||||
|
|
||||||
public class CollectorException extends Exception {
|
public class CollectorException extends Exception {
|
||||||
|
|
|
@ -0,0 +1,56 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.common.collection;
|
||||||
|
|
||||||
|
import java.io.*;
|
||||||
|
import java.nio.charset.StandardCharsets;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import org.apache.hadoop.fs.FSDataOutputStream;
|
||||||
|
import org.apache.hadoop.fs.FileSystem;
|
||||||
|
import org.apache.hadoop.fs.Path;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
import com.opencsv.bean.CsvToBeanBuilder;
|
||||||
|
|
||||||
|
public class GetCSV {
|
||||||
|
|
||||||
|
public static final char DEFAULT_DELIMITER = ',';
|
||||||
|
|
||||||
|
private GetCSV() {
|
||||||
|
}
|
||||||
|
|
||||||
|
public static void getCsv(FileSystem fileSystem, BufferedReader reader, String hdfsPath,
|
||||||
|
String modelClass) throws IOException, ClassNotFoundException {
|
||||||
|
getCsv(fileSystem, reader, hdfsPath, modelClass, DEFAULT_DELIMITER);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static void getCsv(FileSystem fileSystem, Reader reader, String hdfsPath,
|
||||||
|
String modelClass, char delimiter) throws IOException, ClassNotFoundException {
|
||||||
|
|
||||||
|
Path hdfsWritePath = new Path(hdfsPath);
|
||||||
|
FSDataOutputStream fsDataOutputStream = null;
|
||||||
|
if (fileSystem.exists(hdfsWritePath)) {
|
||||||
|
fileSystem.delete(hdfsWritePath, false);
|
||||||
|
}
|
||||||
|
fsDataOutputStream = fileSystem.create(hdfsWritePath);
|
||||||
|
|
||||||
|
try (BufferedWriter writer = new BufferedWriter(
|
||||||
|
new OutputStreamWriter(fsDataOutputStream, StandardCharsets.UTF_8))) {
|
||||||
|
|
||||||
|
final ObjectMapper mapper = new ObjectMapper();
|
||||||
|
|
||||||
|
@SuppressWarnings("unchecked")
|
||||||
|
final List lines = new CsvToBeanBuilder(reader)
|
||||||
|
.withType(Class.forName(modelClass))
|
||||||
|
.withSeparator(delimiter)
|
||||||
|
.build()
|
||||||
|
.parse();
|
||||||
|
|
||||||
|
for (Object line : lines) {
|
||||||
|
writer.write(mapper.writeValueAsString(line));
|
||||||
|
writer.newLine();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -1,5 +1,5 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.collection;
|
package eu.dnetlib.dhp.common.collection;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Bundles the http connection parameters driving the client behaviour.
|
* Bundles the http connection parameters driving the client behaviour.
|
|
@ -1,5 +1,5 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.collection;
|
package eu.dnetlib.dhp.common.collection;
|
||||||
|
|
||||||
import static eu.dnetlib.dhp.utils.DHPUtils.*;
|
import static eu.dnetlib.dhp.utils.DHPUtils.*;
|
||||||
|
|
||||||
|
@ -15,12 +15,13 @@ import org.apache.http.HttpHeaders;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.aggregation.common.AggregatorReport;
|
import eu.dnetlib.dhp.common.Constants;
|
||||||
|
import eu.dnetlib.dhp.common.aggregation.AggregatorReport;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Migrated from https://svn.driver.research-infrastructures.eu/driver/dnet45/modules/dnet-modular-collector-service/trunk/src/main/java/eu/dnetlib/data/collector/plugins/HttpConnector.java
|
* Migrated from https://svn.driver.research-infrastructures.eu/driver/dnet45/modules/dnet-modular-collector-service/trunk/src/main/java/eu/dnetlib/data/collector/plugins/HttpConnector.java
|
||||||
*
|
*
|
||||||
* @author jochen, michele, andrea, alessia, claudio
|
* @author jochen, michele, andrea, alessia, claudio, andreas
|
||||||
*/
|
*/
|
||||||
public class HttpConnector2 {
|
public class HttpConnector2 {
|
||||||
|
|
||||||
|
@ -32,7 +33,7 @@ public class HttpConnector2 {
|
||||||
|
|
||||||
private String responseType = null;
|
private String responseType = null;
|
||||||
|
|
||||||
private final String userAgent = "Mozilla/5.0 (compatible; OAI; +http://www.openaire.eu)";
|
private static final String userAgent = "Mozilla/5.0 (compatible; OAI; +http://www.openaire.eu)";
|
||||||
|
|
||||||
public HttpConnector2() {
|
public HttpConnector2() {
|
||||||
this(new HttpClientParams());
|
this(new HttpClientParams());
|
||||||
|
@ -112,6 +113,17 @@ public class HttpConnector2 {
|
||||||
}
|
}
|
||||||
|
|
||||||
int retryAfter = obtainRetryAfter(urlConn.getHeaderFields());
|
int retryAfter = obtainRetryAfter(urlConn.getHeaderFields());
|
||||||
|
String rateLimit = urlConn.getHeaderField(Constants.HTTPHEADER_IETF_DRAFT_RATELIMIT_LIMIT);
|
||||||
|
String rateRemaining = urlConn.getHeaderField(Constants.HTTPHEADER_IETF_DRAFT_RATELIMIT_REMAINING);
|
||||||
|
|
||||||
|
if ((rateLimit != null) && (rateRemaining != null) && (Integer.parseInt(rateRemaining) < 2)) {
|
||||||
|
if (retryAfter > 0) {
|
||||||
|
backoffAndSleep(retryAfter);
|
||||||
|
} else {
|
||||||
|
backoffAndSleep(1000);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (is2xx(urlConn.getResponseCode())) {
|
if (is2xx(urlConn.getResponseCode())) {
|
||||||
input = urlConn.getInputStream();
|
input = urlConn.getInputStream();
|
||||||
responseType = urlConn.getContentType();
|
responseType = urlConn.getContentType();
|
||||||
|
@ -120,7 +132,7 @@ public class HttpConnector2 {
|
||||||
if (is3xx(urlConn.getResponseCode())) {
|
if (is3xx(urlConn.getResponseCode())) {
|
||||||
// REDIRECTS
|
// REDIRECTS
|
||||||
final String newUrl = obtainNewLocation(urlConn.getHeaderFields());
|
final String newUrl = obtainNewLocation(urlConn.getHeaderFields());
|
||||||
log.info(String.format("The requested url has been moved to %s", newUrl));
|
log.info("The requested url has been moved to {}", newUrl);
|
||||||
report
|
report
|
||||||
.put(
|
.put(
|
||||||
REPORT_PREFIX + urlConn.getResponseCode(),
|
REPORT_PREFIX + urlConn.getResponseCode(),
|
||||||
|
@ -140,14 +152,14 @@ public class HttpConnector2 {
|
||||||
if (retryAfter > 0) {
|
if (retryAfter > 0) {
|
||||||
log
|
log
|
||||||
.warn(
|
.warn(
|
||||||
requestUrl + " - waiting and repeating request after suggested retry-after "
|
"{} - waiting and repeating request after suggested retry-after {} sec.",
|
||||||
+ retryAfter + " sec.");
|
requestUrl, retryAfter);
|
||||||
backoffAndSleep(retryAfter * 1000);
|
backoffAndSleep(retryAfter * 1000);
|
||||||
} else {
|
} else {
|
||||||
log
|
log
|
||||||
.warn(
|
.warn(
|
||||||
requestUrl + " - waiting and repeating request after default delay of "
|
"{} - waiting and repeating request after default delay of {} sec.",
|
||||||
+ getClientParams().getRetryDelay() + " sec.");
|
requestUrl, getClientParams().getRetryDelay());
|
||||||
backoffAndSleep(retryNumber * getClientParams().getRetryDelay() * 1000);
|
backoffAndSleep(retryNumber * getClientParams().getRetryDelay() * 1000);
|
||||||
}
|
}
|
||||||
report.put(REPORT_PREFIX + urlConn.getResponseCode(), requestUrl);
|
report.put(REPORT_PREFIX + urlConn.getResponseCode(), requestUrl);
|
||||||
|
@ -181,12 +193,12 @@ public class HttpConnector2 {
|
||||||
}
|
}
|
||||||
|
|
||||||
private void logHeaderFields(final HttpURLConnection urlConn) throws IOException {
|
private void logHeaderFields(final HttpURLConnection urlConn) throws IOException {
|
||||||
log.debug("StatusCode: " + urlConn.getResponseMessage());
|
log.debug("StatusCode: {}", urlConn.getResponseMessage());
|
||||||
|
|
||||||
for (Map.Entry<String, List<String>> e : urlConn.getHeaderFields().entrySet()) {
|
for (Map.Entry<String, List<String>> e : urlConn.getHeaderFields().entrySet()) {
|
||||||
if (e.getKey() != null) {
|
if (e.getKey() != null) {
|
||||||
for (String v : e.getValue()) {
|
for (String v : e.getValue()) {
|
||||||
log.debug(" key: " + e.getKey() + " - value: " + v);
|
log.debug(" key: {} - value: {}", e.getKey(), v);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -204,7 +216,7 @@ public class HttpConnector2 {
|
||||||
|
|
||||||
private int obtainRetryAfter(final Map<String, List<String>> headerMap) {
|
private int obtainRetryAfter(final Map<String, List<String>> headerMap) {
|
||||||
for (String key : headerMap.keySet()) {
|
for (String key : headerMap.keySet()) {
|
||||||
if ((key != null) && key.equalsIgnoreCase(HttpHeaders.RETRY_AFTER) && (headerMap.get(key).size() > 0)
|
if ((key != null) && key.equalsIgnoreCase(HttpHeaders.RETRY_AFTER) && (!headerMap.get(key).isEmpty())
|
||||||
&& NumberUtils.isCreatable(headerMap.get(key).get(0))) {
|
&& NumberUtils.isCreatable(headerMap.get(key).get(0))) {
|
||||||
return Integer.parseInt(headerMap.get(key).get(0)) + 10;
|
return Integer.parseInt(headerMap.get(key).get(0)) + 10;
|
||||||
}
|
}
|
|
@ -1,11 +1,11 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.common.rest;
|
package eu.dnetlib.dhp.common.rest;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
import org.apache.http.client.methods.CloseableHttpResponse;
|
|
||||||
import org.apache.http.client.methods.HttpGet;
|
import org.apache.http.client.methods.HttpGet;
|
||||||
import org.apache.http.client.methods.HttpPost;
|
import org.apache.http.client.methods.HttpPost;
|
||||||
import org.apache.http.client.methods.HttpUriRequest;
|
import org.apache.http.client.methods.HttpUriRequest;
|
||||||
|
@ -23,17 +23,20 @@ public class DNetRestClient {
|
||||||
|
|
||||||
private static final ObjectMapper mapper = new ObjectMapper();
|
private static final ObjectMapper mapper = new ObjectMapper();
|
||||||
|
|
||||||
|
private DNetRestClient() {
|
||||||
|
}
|
||||||
|
|
||||||
public static <T> T doGET(final String url, Class<T> clazz) throws Exception {
|
public static <T> T doGET(final String url, Class<T> clazz) throws Exception {
|
||||||
final HttpGet httpGet = new HttpGet(url);
|
final HttpGet httpGet = new HttpGet(url);
|
||||||
return doHTTPRequest(httpGet, clazz);
|
return doHTTPRequest(httpGet, clazz);
|
||||||
}
|
}
|
||||||
|
|
||||||
public static String doGET(final String url) throws Exception {
|
public static String doGET(final String url) throws IOException {
|
||||||
final HttpGet httpGet = new HttpGet(url);
|
final HttpGet httpGet = new HttpGet(url);
|
||||||
return doHTTPRequest(httpGet);
|
return doHTTPRequest(httpGet);
|
||||||
}
|
}
|
||||||
|
|
||||||
public static <V> String doPOST(final String url, V objParam) throws Exception {
|
public static <V> String doPOST(final String url, V objParam) throws IOException {
|
||||||
final HttpPost httpPost = new HttpPost(url);
|
final HttpPost httpPost = new HttpPost(url);
|
||||||
|
|
||||||
if (objParam != null) {
|
if (objParam != null) {
|
||||||
|
@ -45,12 +48,12 @@ public class DNetRestClient {
|
||||||
return doHTTPRequest(httpPost);
|
return doHTTPRequest(httpPost);
|
||||||
}
|
}
|
||||||
|
|
||||||
public static <T, V> T doPOST(final String url, V objParam, Class<T> clazz) throws Exception {
|
public static <T, V> T doPOST(final String url, V objParam, Class<T> clazz) throws IOException {
|
||||||
return mapper.readValue(doPOST(url, objParam), clazz);
|
return mapper.readValue(doPOST(url, objParam), clazz);
|
||||||
}
|
}
|
||||||
|
|
||||||
private static String doHTTPRequest(final HttpUriRequest r) throws Exception {
|
private static String doHTTPRequest(final HttpUriRequest r) throws IOException {
|
||||||
CloseableHttpClient client = HttpClients.createDefault();
|
try (CloseableHttpClient client = HttpClients.createDefault()) {
|
||||||
|
|
||||||
log.info("performing HTTP request, method {} on URI {}", r.getMethod(), r.getURI().toString());
|
log.info("performing HTTP request, method {} on URI {}", r.getMethod(), r.getURI().toString());
|
||||||
log
|
log
|
||||||
|
@ -62,8 +65,8 @@ public class DNetRestClient {
|
||||||
.map(h -> h.getName() + ":" + h.getValue())
|
.map(h -> h.getName() + ":" + h.getValue())
|
||||||
.collect(Collectors.joining(",")));
|
.collect(Collectors.joining(",")));
|
||||||
|
|
||||||
CloseableHttpResponse response = client.execute(r);
|
return IOUtils.toString(client.execute(r).getEntity().getContent());
|
||||||
return IOUtils.toString(response.getEntity().getContent());
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private static <T> T doHTTPRequest(final HttpUriRequest r, Class<T> clazz) throws Exception {
|
private static <T> T doHTTPRequest(final HttpUriRequest r, Class<T> clazz) throws Exception {
|
||||||
|
|
|
@ -46,7 +46,7 @@ public class Vocabulary implements Serializable {
|
||||||
}
|
}
|
||||||
|
|
||||||
public VocabularyTerm getTerm(final String id) {
|
public VocabularyTerm getTerm(final String id) {
|
||||||
return Optional.ofNullable(id).map(s -> s.toLowerCase()).map(s -> terms.get(s)).orElse(null);
|
return Optional.ofNullable(id).map(String::toLowerCase).map(terms::get).orElse(null);
|
||||||
}
|
}
|
||||||
|
|
||||||
protected void addTerm(final String id, final String name) {
|
protected void addTerm(final String id, final String name) {
|
||||||
|
@ -81,7 +81,6 @@ public class Vocabulary implements Serializable {
|
||||||
.ofNullable(getTermBySynonym(syn))
|
.ofNullable(getTermBySynonym(syn))
|
||||||
.map(term -> getTermAsQualifier(term.getId()))
|
.map(term -> getTermAsQualifier(term.getId()))
|
||||||
.orElse(null);
|
.orElse(null);
|
||||||
// .orElse(OafMapperUtils.unknown(getId(), getName()));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -46,7 +46,6 @@ public class VocabularyGroup implements Serializable {
|
||||||
}
|
}
|
||||||
|
|
||||||
vocs.addTerm(vocId, termId, termName);
|
vocs.addTerm(vocId, termId, termName);
|
||||||
// vocs.addSynonyms(vocId, termId, termId);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -58,7 +57,6 @@ public class VocabularyGroup implements Serializable {
|
||||||
final String syn = arr[2].trim();
|
final String syn = arr[2].trim();
|
||||||
|
|
||||||
vocs.addSynonyms(vocId, termId, syn);
|
vocs.addSynonyms(vocId, termId, syn);
|
||||||
// vocs.addSynonyms(vocId, termId, termId);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -98,7 +96,7 @@ public class VocabularyGroup implements Serializable {
|
||||||
.getTerms()
|
.getTerms()
|
||||||
.values()
|
.values()
|
||||||
.stream()
|
.stream()
|
||||||
.map(t -> t.getId())
|
.map(VocabularyTerm::getId)
|
||||||
.collect(Collectors.toCollection(HashSet::new));
|
.collect(Collectors.toCollection(HashSet::new));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -154,16 +152,19 @@ public class VocabularyGroup implements Serializable {
|
||||||
return Optional
|
return Optional
|
||||||
.ofNullable(vocId)
|
.ofNullable(vocId)
|
||||||
.map(String::toLowerCase)
|
.map(String::toLowerCase)
|
||||||
.map(id -> vocs.containsKey(id))
|
.map(vocs::containsKey)
|
||||||
.orElse(false);
|
.orElse(false);
|
||||||
}
|
}
|
||||||
|
|
||||||
private void addSynonyms(final String vocId, final String termId, final String syn) {
|
private void addSynonyms(final String vocId, final String termId, final String syn) {
|
||||||
String id = Optional
|
String id = Optional
|
||||||
.ofNullable(vocId)
|
.ofNullable(vocId)
|
||||||
.map(s -> s.toLowerCase())
|
.map(String::toLowerCase)
|
||||||
.orElseThrow(
|
.orElseThrow(
|
||||||
() -> new IllegalArgumentException(String.format("empty vocabulary id for [term:%s, synonym:%s]")));
|
() -> new IllegalArgumentException(
|
||||||
|
String
|
||||||
|
.format(
|
||||||
|
"empty vocabulary id for [term:%s, synonym:%s]", termId, syn)));
|
||||||
Optional
|
Optional
|
||||||
.ofNullable(vocs.get(id))
|
.ofNullable(vocs.get(id))
|
||||||
.orElseThrow(() -> new IllegalArgumentException("missing vocabulary id: " + vocId))
|
.orElseThrow(() -> new IllegalArgumentException("missing vocabulary id: " + vocId))
|
||||||
|
|
|
@ -2,7 +2,6 @@
|
||||||
package eu.dnetlib.dhp.message;
|
package eu.dnetlib.dhp.message;
|
||||||
|
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
import java.util.HashMap;
|
|
||||||
import java.util.LinkedHashMap;
|
import java.util.LinkedHashMap;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
|
@ -10,8 +9,8 @@ public class Message implements Serializable {
|
||||||
|
|
||||||
private static final long serialVersionUID = 401753881204524893L;
|
private static final long serialVersionUID = 401753881204524893L;
|
||||||
|
|
||||||
public static String CURRENT_PARAM = "current";
|
public static final String CURRENT_PARAM = "current";
|
||||||
public static String TOTAL_PARAM = "total";
|
public static final String TOTAL_PARAM = "total";
|
||||||
|
|
||||||
private MessageType messageType;
|
private MessageType messageType;
|
||||||
|
|
||||||
|
|
|
@ -4,7 +4,6 @@ package eu.dnetlib.dhp.oa.merge;
|
||||||
import java.text.Normalizer;
|
import java.text.Normalizer;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
import java.util.stream.Stream;
|
|
||||||
|
|
||||||
import org.apache.commons.lang3.StringUtils;
|
import org.apache.commons.lang3.StringUtils;
|
||||||
|
|
||||||
|
@ -19,6 +18,9 @@ public class AuthorMerger {
|
||||||
|
|
||||||
private static final Double THRESHOLD = 0.95;
|
private static final Double THRESHOLD = 0.95;
|
||||||
|
|
||||||
|
private AuthorMerger() {
|
||||||
|
}
|
||||||
|
|
||||||
public static List<Author> merge(List<List<Author>> authors) {
|
public static List<Author> merge(List<List<Author>> authors) {
|
||||||
|
|
||||||
authors.sort((o1, o2) -> -Integer.compare(countAuthorsPids(o1), countAuthorsPids(o2)));
|
authors.sort((o1, o2) -> -Integer.compare(countAuthorsPids(o1), countAuthorsPids(o2)));
|
||||||
|
@ -36,7 +38,8 @@ public class AuthorMerger {
|
||||||
public static List<Author> mergeAuthor(final List<Author> a, final List<Author> b, Double threshold) {
|
public static List<Author> mergeAuthor(final List<Author> a, final List<Author> b, Double threshold) {
|
||||||
int pa = countAuthorsPids(a);
|
int pa = countAuthorsPids(a);
|
||||||
int pb = countAuthorsPids(b);
|
int pb = countAuthorsPids(b);
|
||||||
List<Author> base, enrich;
|
List<Author> base;
|
||||||
|
List<Author> enrich;
|
||||||
int sa = authorsSize(a);
|
int sa = authorsSize(a);
|
||||||
int sb = authorsSize(b);
|
int sb = authorsSize(b);
|
||||||
|
|
||||||
|
@ -62,7 +65,7 @@ public class AuthorMerger {
|
||||||
// <pidComparableString, Author> (if an Author has more than 1 pid, it appears 2 times in the list)
|
// <pidComparableString, Author> (if an Author has more than 1 pid, it appears 2 times in the list)
|
||||||
final Map<String, Author> basePidAuthorMap = base
|
final Map<String, Author> basePidAuthorMap = base
|
||||||
.stream()
|
.stream()
|
||||||
.filter(a -> a.getPid() != null && a.getPid().size() > 0)
|
.filter(a -> a.getPid() != null && !a.getPid().isEmpty())
|
||||||
.flatMap(
|
.flatMap(
|
||||||
a -> a
|
a -> a
|
||||||
.getPid()
|
.getPid()
|
||||||
|
@ -74,7 +77,7 @@ public class AuthorMerger {
|
||||||
// <pid, Author> (list of pid that are missing in the other list)
|
// <pid, Author> (list of pid that are missing in the other list)
|
||||||
final List<Tuple2<StructuredProperty, Author>> pidToEnrich = enrich
|
final List<Tuple2<StructuredProperty, Author>> pidToEnrich = enrich
|
||||||
.stream()
|
.stream()
|
||||||
.filter(a -> a.getPid() != null && a.getPid().size() > 0)
|
.filter(a -> a.getPid() != null && !a.getPid().isEmpty())
|
||||||
.flatMap(
|
.flatMap(
|
||||||
a -> a
|
a -> a
|
||||||
.getPid()
|
.getPid()
|
||||||
|
@ -117,9 +120,9 @@ public class AuthorMerger {
|
||||||
}
|
}
|
||||||
|
|
||||||
public static String pidToComparableString(StructuredProperty pid) {
|
public static String pidToComparableString(StructuredProperty pid) {
|
||||||
return (pid.getQualifier() != null
|
final String classid = pid.getQualifier().getClassid() != null ? pid.getQualifier().getClassid().toLowerCase()
|
||||||
? pid.getQualifier().getClassid() != null ? pid.getQualifier().getClassid().toLowerCase() : ""
|
: "";
|
||||||
: "")
|
return (pid.getQualifier() != null ? classid : "")
|
||||||
+ (pid.getValue() != null ? pid.getValue().toLowerCase() : "");
|
+ (pid.getValue() != null ? pid.getValue().toLowerCase() : "");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -12,6 +12,9 @@ import com.ximpleware.VTDNav;
|
||||||
/** Created by sandro on 9/29/16. */
|
/** Created by sandro on 9/29/16. */
|
||||||
public class VtdUtilityParser {
|
public class VtdUtilityParser {
|
||||||
|
|
||||||
|
private VtdUtilityParser() {
|
||||||
|
}
|
||||||
|
|
||||||
public static List<Node> getTextValuesWithAttributes(
|
public static List<Node> getTextValuesWithAttributes(
|
||||||
final AutoPilot ap, final VTDNav vn, final String xpath, final List<String> attributes)
|
final AutoPilot ap, final VTDNav vn, final String xpath, final List<String> attributes)
|
||||||
throws VtdException {
|
throws VtdException {
|
||||||
|
|
|
@ -284,7 +284,7 @@ public class GraphCleaningFunctions extends CleaningFunctions {
|
||||||
r
|
r
|
||||||
.getAuthor()
|
.getAuthor()
|
||||||
.stream()
|
.stream()
|
||||||
.filter(a -> Objects.nonNull(a))
|
.filter(Objects::nonNull)
|
||||||
.filter(a -> StringUtils.isNotBlank(a.getFullname()))
|
.filter(a -> StringUtils.isNotBlank(a.getFullname()))
|
||||||
.filter(a -> StringUtils.isNotBlank(a.getFullname().replaceAll("[\\W]", "")))
|
.filter(a -> StringUtils.isNotBlank(a.getFullname().replaceAll("[\\W]", "")))
|
||||||
.collect(Collectors.toList()));
|
.collect(Collectors.toList()));
|
||||||
|
|
|
@ -17,13 +17,16 @@ import eu.dnetlib.dhp.schema.oaf.*;
|
||||||
|
|
||||||
public class OafMapperUtils {
|
public class OafMapperUtils {
|
||||||
|
|
||||||
|
private OafMapperUtils() {
|
||||||
|
}
|
||||||
|
|
||||||
public static Oaf merge(final Oaf left, final Oaf right) {
|
public static Oaf merge(final Oaf left, final Oaf right) {
|
||||||
if (ModelSupport.isSubClass(left, OafEntity.class)) {
|
if (ModelSupport.isSubClass(left, OafEntity.class)) {
|
||||||
return mergeEntities((OafEntity) left, (OafEntity) right);
|
return mergeEntities((OafEntity) left, (OafEntity) right);
|
||||||
} else if (ModelSupport.isSubClass(left, Relation.class)) {
|
} else if (ModelSupport.isSubClass(left, Relation.class)) {
|
||||||
((Relation) left).mergeFrom((Relation) right);
|
((Relation) left).mergeFrom((Relation) right);
|
||||||
} else {
|
} else {
|
||||||
throw new RuntimeException("invalid Oaf type:" + left.getClass().getCanonicalName());
|
throw new IllegalArgumentException("invalid Oaf type:" + left.getClass().getCanonicalName());
|
||||||
}
|
}
|
||||||
return left;
|
return left;
|
||||||
}
|
}
|
||||||
|
@ -38,7 +41,7 @@ public class OafMapperUtils {
|
||||||
} else if (ModelSupport.isSubClass(left, Project.class)) {
|
} else if (ModelSupport.isSubClass(left, Project.class)) {
|
||||||
left.mergeFrom(right);
|
left.mergeFrom(right);
|
||||||
} else {
|
} else {
|
||||||
throw new RuntimeException("invalid OafEntity subtype:" + left.getClass().getCanonicalName());
|
throw new IllegalArgumentException("invalid OafEntity subtype:" + left.getClass().getCanonicalName());
|
||||||
}
|
}
|
||||||
return left;
|
return left;
|
||||||
}
|
}
|
||||||
|
@ -62,7 +65,7 @@ public class OafMapperUtils {
|
||||||
|
|
||||||
public static List<KeyValue> listKeyValues(final String... s) {
|
public static List<KeyValue> listKeyValues(final String... s) {
|
||||||
if (s.length % 2 > 0) {
|
if (s.length % 2 > 0) {
|
||||||
throw new RuntimeException("Invalid number of parameters (k,v,k,v,....)");
|
throw new IllegalArgumentException("Invalid number of parameters (k,v,k,v,....)");
|
||||||
}
|
}
|
||||||
|
|
||||||
final List<KeyValue> list = new ArrayList<>();
|
final List<KeyValue> list = new ArrayList<>();
|
||||||
|
@ -88,7 +91,7 @@ public class OafMapperUtils {
|
||||||
.stream(values)
|
.stream(values)
|
||||||
.map(v -> field(v, info))
|
.map(v -> field(v, info))
|
||||||
.filter(Objects::nonNull)
|
.filter(Objects::nonNull)
|
||||||
.filter(distinctByKey(f -> f.getValue()))
|
.filter(distinctByKey(Field::getValue))
|
||||||
.collect(Collectors.toList());
|
.collect(Collectors.toList());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -97,7 +100,7 @@ public class OafMapperUtils {
|
||||||
.stream()
|
.stream()
|
||||||
.map(v -> field(v, info))
|
.map(v -> field(v, info))
|
||||||
.filter(Objects::nonNull)
|
.filter(Objects::nonNull)
|
||||||
.filter(distinctByKey(f -> f.getValue()))
|
.filter(distinctByKey(Field::getValue))
|
||||||
.collect(Collectors.toList());
|
.collect(Collectors.toList());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -342,10 +345,10 @@ public class OafMapperUtils {
|
||||||
if (instanceList != null) {
|
if (instanceList != null) {
|
||||||
final Optional<AccessRight> min = instanceList
|
final Optional<AccessRight> min = instanceList
|
||||||
.stream()
|
.stream()
|
||||||
.map(i -> i.getAccessright())
|
.map(Instance::getAccessright)
|
||||||
.min(new AccessRightComparator<>());
|
.min(new AccessRightComparator<>());
|
||||||
|
|
||||||
final Qualifier rights = min.isPresent() ? qualifier(min.get()) : new Qualifier();
|
final Qualifier rights = min.map(OafMapperUtils::qualifier).orElseGet(Qualifier::new);
|
||||||
|
|
||||||
if (StringUtils.isBlank(rights.getClassid())) {
|
if (StringUtils.isBlank(rights.getClassid())) {
|
||||||
rights.setClassid(UNKNOWN);
|
rights.setClassid(UNKNOWN);
|
||||||
|
|
|
@ -34,6 +34,9 @@ public class DHPUtils {
|
||||||
|
|
||||||
private static final Logger log = LoggerFactory.getLogger(DHPUtils.class);
|
private static final Logger log = LoggerFactory.getLogger(DHPUtils.class);
|
||||||
|
|
||||||
|
private DHPUtils() {
|
||||||
|
}
|
||||||
|
|
||||||
public static Seq<String> toSeq(List<String> list) {
|
public static Seq<String> toSeq(List<String> list) {
|
||||||
return JavaConverters.asScalaIteratorConverter(list.iterator()).asScala().toSeq();
|
return JavaConverters.asScalaIteratorConverter(list.iterator()).asScala().toSeq();
|
||||||
}
|
}
|
||||||
|
@ -44,7 +47,7 @@ public class DHPUtils {
|
||||||
md.update(s.getBytes(StandardCharsets.UTF_8));
|
md.update(s.getBytes(StandardCharsets.UTF_8));
|
||||||
return new String(Hex.encodeHex(md.digest()));
|
return new String(Hex.encodeHex(md.digest()));
|
||||||
} catch (final Exception e) {
|
} catch (final Exception e) {
|
||||||
System.err.println("Error creating id");
|
log.error("Error creating id from {}", s);
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -53,33 +56,6 @@ public class DHPUtils {
|
||||||
return String.format("%s::%s", nsPrefix, DHPUtils.md5(originalId));
|
return String.format("%s::%s", nsPrefix, DHPUtils.md5(originalId));
|
||||||
}
|
}
|
||||||
|
|
||||||
public static String compressString(final String input) {
|
|
||||||
try (ByteArrayOutputStream out = new ByteArrayOutputStream();
|
|
||||||
Base64OutputStream b64os = new Base64OutputStream(out)) {
|
|
||||||
GZIPOutputStream gzip = new GZIPOutputStream(b64os);
|
|
||||||
gzip.write(input.getBytes(StandardCharsets.UTF_8));
|
|
||||||
gzip.close();
|
|
||||||
return out.toString();
|
|
||||||
} catch (Throwable e) {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public static String decompressString(final String input) {
|
|
||||||
byte[] byteArray = Base64.decodeBase64(input.getBytes());
|
|
||||||
int len;
|
|
||||||
try (GZIPInputStream gis = new GZIPInputStream(new ByteArrayInputStream((byteArray)));
|
|
||||||
ByteArrayOutputStream bos = new ByteArrayOutputStream(byteArray.length)) {
|
|
||||||
byte[] buffer = new byte[1024];
|
|
||||||
while ((len = gis.read(buffer)) != -1) {
|
|
||||||
bos.write(buffer, 0, len);
|
|
||||||
}
|
|
||||||
return bos.toString();
|
|
||||||
} catch (Exception e) {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public static String getJPathString(final String jsonPath, final String json) {
|
public static String getJPathString(final String jsonPath, final String json) {
|
||||||
try {
|
try {
|
||||||
Object o = JsonPath.read(json, jsonPath);
|
Object o = JsonPath.read(json, jsonPath);
|
||||||
|
|
|
@ -18,13 +18,16 @@ public class ISLookupClientFactory {
|
||||||
private static final int requestTimeout = 60000 * 10;
|
private static final int requestTimeout = 60000 * 10;
|
||||||
private static final int connectTimeout = 60000 * 10;
|
private static final int connectTimeout = 60000 * 10;
|
||||||
|
|
||||||
|
private ISLookupClientFactory() {
|
||||||
|
}
|
||||||
|
|
||||||
public static ISLookUpService getLookUpService(final String isLookupUrl) {
|
public static ISLookUpService getLookUpService(final String isLookupUrl) {
|
||||||
return getServiceStub(ISLookUpService.class, isLookupUrl);
|
return getServiceStub(ISLookUpService.class, isLookupUrl);
|
||||||
}
|
}
|
||||||
|
|
||||||
@SuppressWarnings("unchecked")
|
@SuppressWarnings("unchecked")
|
||||||
private static <T> T getServiceStub(final Class<T> clazz, final String endpoint) {
|
private static <T> T getServiceStub(final Class<T> clazz, final String endpoint) {
|
||||||
log.info(String.format("creating %s stub from %s", clazz.getName(), endpoint));
|
log.info("creating {} stub from {}", clazz.getName(), endpoint);
|
||||||
final JaxWsProxyFactoryBean jaxWsProxyFactory = new JaxWsProxyFactoryBean();
|
final JaxWsProxyFactoryBean jaxWsProxyFactory = new JaxWsProxyFactoryBean();
|
||||||
jaxWsProxyFactory.setServiceClass(clazz);
|
jaxWsProxyFactory.setServiceClass(clazz);
|
||||||
jaxWsProxyFactory.setAddress(endpoint);
|
jaxWsProxyFactory.setAddress(endpoint);
|
||||||
|
@ -38,12 +41,10 @@ public class ISLookupClientFactory {
|
||||||
|
|
||||||
log
|
log
|
||||||
.info(
|
.info(
|
||||||
String
|
"setting connectTimeout to {}, requestTimeout to {} for service {}",
|
||||||
.format(
|
|
||||||
"setting connectTimeout to %s, requestTimeout to %s for service %s",
|
|
||||||
connectTimeout,
|
connectTimeout,
|
||||||
requestTimeout,
|
requestTimeout,
|
||||||
clazz.getCanonicalName()));
|
clazz.getCanonicalName());
|
||||||
|
|
||||||
policy.setConnectionTimeout(connectTimeout);
|
policy.setConnectionTimeout(connectTimeout);
|
||||||
policy.setReceiveTimeout(requestTimeout);
|
policy.setReceiveTimeout(requestTimeout);
|
||||||
|
|
|
@ -10,7 +10,7 @@ import net.sf.saxon.trans.XPathException;
|
||||||
|
|
||||||
public abstract class AbstractExtensionFunction extends ExtensionFunctionDefinition {
|
public abstract class AbstractExtensionFunction extends ExtensionFunctionDefinition {
|
||||||
|
|
||||||
public static String DEFAULT_SAXON_EXT_NS_URI = "http://www.d-net.research-infrastructures.eu/saxon-extension";
|
public static final String DEFAULT_SAXON_EXT_NS_URI = "http://www.d-net.research-infrastructures.eu/saxon-extension";
|
||||||
|
|
||||||
public abstract String getName();
|
public abstract String getName();
|
||||||
|
|
||||||
|
|
|
@ -26,7 +26,7 @@ public class ExtractYear extends AbstractExtensionFunction {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Sequence doCall(XPathContext context, Sequence[] arguments) throws XPathException {
|
public Sequence doCall(XPathContext context, Sequence[] arguments) throws XPathException {
|
||||||
if (arguments == null | arguments.length == 0) {
|
if (arguments == null || arguments.length == 0) {
|
||||||
return new StringValue("");
|
return new StringValue("");
|
||||||
}
|
}
|
||||||
final Item item = arguments[0].head();
|
final Item item = arguments[0].head();
|
||||||
|
@ -63,8 +63,7 @@ public class ExtractYear extends AbstractExtensionFunction {
|
||||||
for (String format : dateFormats) {
|
for (String format : dateFormats) {
|
||||||
try {
|
try {
|
||||||
c.setTime(new SimpleDateFormat(format).parse(s));
|
c.setTime(new SimpleDateFormat(format).parse(s));
|
||||||
String year = String.valueOf(c.get(Calendar.YEAR));
|
return String.valueOf(c.get(Calendar.YEAR));
|
||||||
return year;
|
|
||||||
} catch (ParseException e) {
|
} catch (ParseException e) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -30,7 +30,7 @@ public class NormalizeDate extends AbstractExtensionFunction {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Sequence doCall(XPathContext context, Sequence[] arguments) throws XPathException {
|
public Sequence doCall(XPathContext context, Sequence[] arguments) throws XPathException {
|
||||||
if (arguments == null | arguments.length == 0) {
|
if (arguments == null || arguments.length == 0) {
|
||||||
return new StringValue(BLANK);
|
return new StringValue(BLANK);
|
||||||
}
|
}
|
||||||
String s = arguments[0].head().getStringValue();
|
String s = arguments[0].head().getStringValue();
|
||||||
|
|
|
@ -1,6 +1,8 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.utils.saxon;
|
package eu.dnetlib.dhp.utils.saxon;
|
||||||
|
|
||||||
|
import static org.apache.commons.lang3.StringUtils.isNotBlank;
|
||||||
|
|
||||||
import org.apache.commons.lang3.StringUtils;
|
import org.apache.commons.lang3.StringUtils;
|
||||||
|
|
||||||
import net.sf.saxon.expr.XPathContext;
|
import net.sf.saxon.expr.XPathContext;
|
||||||
|
@ -26,7 +28,8 @@ public class PickFirst extends AbstractExtensionFunction {
|
||||||
final String s1 = getValue(arguments[0]);
|
final String s1 = getValue(arguments[0]);
|
||||||
final String s2 = getValue(arguments[1]);
|
final String s2 = getValue(arguments[1]);
|
||||||
|
|
||||||
return new StringValue(StringUtils.isNotBlank(s1) ? s1 : StringUtils.isNotBlank(s2) ? s2 : "");
|
final String value = isNotBlank(s1) ? s1 : isNotBlank(s2) ? s2 : "";
|
||||||
|
return new StringValue(value);
|
||||||
}
|
}
|
||||||
|
|
||||||
private String getValue(final Sequence arg) throws XPathException {
|
private String getValue(final Sequence arg) throws XPathException {
|
||||||
|
|
|
@ -12,6 +12,9 @@ import net.sf.saxon.TransformerFactoryImpl;
|
||||||
|
|
||||||
public class SaxonTransformerFactory {
|
public class SaxonTransformerFactory {
|
||||||
|
|
||||||
|
private SaxonTransformerFactory() {
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Creates the index record transformer from the given XSLT
|
* Creates the index record transformer from the given XSLT
|
||||||
*
|
*
|
||||||
|
|
|
@ -7,10 +7,10 @@ import static org.junit.jupiter.api.Assertions.assertNotNull;
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
import org.junit.jupiter.api.Test;
|
import org.junit.jupiter.api.Test;
|
||||||
|
|
||||||
public class ArgumentApplicationParserTest {
|
class ArgumentApplicationParserTest {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testParseParameter() throws Exception {
|
void testParseParameter() throws Exception {
|
||||||
final String jsonConfiguration = IOUtils
|
final String jsonConfiguration = IOUtils
|
||||||
.toString(
|
.toString(
|
||||||
this.getClass().getResourceAsStream("/eu/dnetlib/application/parameters.json"));
|
this.getClass().getResourceAsStream("/eu/dnetlib/application/parameters.json"));
|
||||||
|
|
|
@ -21,13 +21,13 @@ public class HdfsSupportTest {
|
||||||
class Remove {
|
class Remove {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void shouldThrowARuntimeExceptionOnError() {
|
void shouldThrowARuntimeExceptionOnError() {
|
||||||
// when
|
// when
|
||||||
assertThrows(RuntimeException.class, () -> HdfsSupport.remove(null, new Configuration()));
|
assertThrows(RuntimeException.class, () -> HdfsSupport.remove(null, new Configuration()));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void shouldRemoveADirFromHDFS(@TempDir Path tempDir) {
|
void shouldRemoveADirFromHDFS(@TempDir Path tempDir) {
|
||||||
// when
|
// when
|
||||||
HdfsSupport.remove(tempDir.toString(), new Configuration());
|
HdfsSupport.remove(tempDir.toString(), new Configuration());
|
||||||
|
|
||||||
|
@ -36,7 +36,7 @@ public class HdfsSupportTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void shouldRemoveAFileFromHDFS(@TempDir Path tempDir) throws IOException {
|
void shouldRemoveAFileFromHDFS(@TempDir Path tempDir) throws IOException {
|
||||||
// given
|
// given
|
||||||
Path file = Files.createTempFile(tempDir, "p", "s");
|
Path file = Files.createTempFile(tempDir, "p", "s");
|
||||||
|
|
||||||
|
@ -52,13 +52,13 @@ public class HdfsSupportTest {
|
||||||
class ListFiles {
|
class ListFiles {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void shouldThrowARuntimeExceptionOnError() {
|
void shouldThrowARuntimeExceptionOnError() {
|
||||||
// when
|
// when
|
||||||
assertThrows(RuntimeException.class, () -> HdfsSupport.listFiles(null, new Configuration()));
|
assertThrows(RuntimeException.class, () -> HdfsSupport.listFiles(null, new Configuration()));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void shouldListFilesLocatedInPath(@TempDir Path tempDir) throws IOException {
|
void shouldListFilesLocatedInPath(@TempDir Path tempDir) throws IOException {
|
||||||
Path subDir1 = Files.createTempDirectory(tempDir, "list_me");
|
Path subDir1 = Files.createTempDirectory(tempDir, "list_me");
|
||||||
Path subDir2 = Files.createTempDirectory(tempDir, "list_me");
|
Path subDir2 = Files.createTempDirectory(tempDir, "list_me");
|
||||||
|
|
||||||
|
|
|
@ -5,10 +5,10 @@ import static org.junit.jupiter.api.Assertions.*;
|
||||||
|
|
||||||
import org.junit.jupiter.api.Test;
|
import org.junit.jupiter.api.Test;
|
||||||
|
|
||||||
public class PacePersonTest {
|
class PacePersonTest {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void pacePersonTest1() {
|
void pacePersonTest1() {
|
||||||
|
|
||||||
PacePerson p = new PacePerson("Artini, Michele", false);
|
PacePerson p = new PacePerson("Artini, Michele", false);
|
||||||
assertEquals("Artini", p.getSurnameString());
|
assertEquals("Artini", p.getSurnameString());
|
||||||
|
@ -17,7 +17,7 @@ public class PacePersonTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void pacePersonTest2() {
|
void pacePersonTest2() {
|
||||||
PacePerson p = new PacePerson("Michele G. Artini", false);
|
PacePerson p = new PacePerson("Michele G. Artini", false);
|
||||||
assertEquals("Artini, Michele G.", p.getNormalisedFullname());
|
assertEquals("Artini, Michele G.", p.getNormalisedFullname());
|
||||||
assertEquals("Michele G", p.getNameString());
|
assertEquals("Michele G", p.getNameString());
|
||||||
|
|
|
@ -18,7 +18,8 @@ public class SparkSessionSupportTest {
|
||||||
class RunWithSparkSession {
|
class RunWithSparkSession {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void shouldExecuteFunctionAndNotStopSparkSessionWhenSparkSessionIsNotManaged()
|
@SuppressWarnings("unchecked")
|
||||||
|
void shouldExecuteFunctionAndNotStopSparkSessionWhenSparkSessionIsNotManaged()
|
||||||
throws Exception {
|
throws Exception {
|
||||||
// given
|
// given
|
||||||
SparkSession spark = mock(SparkSession.class);
|
SparkSession spark = mock(SparkSession.class);
|
||||||
|
@ -37,7 +38,8 @@ public class SparkSessionSupportTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void shouldExecuteFunctionAndStopSparkSessionWhenSparkSessionIsManaged()
|
@SuppressWarnings("unchecked")
|
||||||
|
void shouldExecuteFunctionAndStopSparkSessionWhenSparkSessionIsManaged()
|
||||||
throws Exception {
|
throws Exception {
|
||||||
// given
|
// given
|
||||||
SparkSession spark = mock(SparkSession.class);
|
SparkSession spark = mock(SparkSession.class);
|
||||||
|
|
|
@ -12,7 +12,7 @@ import org.junit.jupiter.api.Disabled;
|
||||||
import org.junit.jupiter.api.Test;
|
import org.junit.jupiter.api.Test;
|
||||||
|
|
||||||
@Disabled
|
@Disabled
|
||||||
public class ZenodoAPIClientTest {
|
class ZenodoAPIClientTest {
|
||||||
|
|
||||||
private final String URL_STRING = "https://sandbox.zenodo.org/api/deposit/depositions";
|
private final String URL_STRING = "https://sandbox.zenodo.org/api/deposit/depositions";
|
||||||
private final String ACCESS_TOKEN = "";
|
private final String ACCESS_TOKEN = "";
|
||||||
|
@ -22,7 +22,7 @@ public class ZenodoAPIClientTest {
|
||||||
private final String depositionId = "674915";
|
private final String depositionId = "674915";
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testUploadOldDeposition() throws IOException, MissingConceptDoiException {
|
void testUploadOldDeposition() throws IOException, MissingConceptDoiException {
|
||||||
ZenodoAPIClient client = new ZenodoAPIClient(URL_STRING,
|
ZenodoAPIClient client = new ZenodoAPIClient(URL_STRING,
|
||||||
ACCESS_TOKEN);
|
ACCESS_TOKEN);
|
||||||
Assertions.assertEquals(200, client.uploadOpenDeposition(depositionId));
|
Assertions.assertEquals(200, client.uploadOpenDeposition(depositionId));
|
||||||
|
@ -44,7 +44,7 @@ public class ZenodoAPIClientTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testNewDeposition() throws IOException {
|
void testNewDeposition() throws IOException {
|
||||||
|
|
||||||
ZenodoAPIClient client = new ZenodoAPIClient(URL_STRING,
|
ZenodoAPIClient client = new ZenodoAPIClient(URL_STRING,
|
||||||
ACCESS_TOKEN);
|
ACCESS_TOKEN);
|
||||||
|
@ -67,7 +67,7 @@ public class ZenodoAPIClientTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testNewVersionNewName() throws IOException, MissingConceptDoiException {
|
void testNewVersionNewName() throws IOException, MissingConceptDoiException {
|
||||||
|
|
||||||
ZenodoAPIClient client = new ZenodoAPIClient(URL_STRING,
|
ZenodoAPIClient client = new ZenodoAPIClient(URL_STRING,
|
||||||
ACCESS_TOKEN);
|
ACCESS_TOKEN);
|
||||||
|
@ -87,7 +87,7 @@ public class ZenodoAPIClientTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testNewVersionOldName() throws IOException, MissingConceptDoiException {
|
void testNewVersionOldName() throws IOException, MissingConceptDoiException {
|
||||||
|
|
||||||
ZenodoAPIClient client = new ZenodoAPIClient(URL_STRING,
|
ZenodoAPIClient client = new ZenodoAPIClient(URL_STRING,
|
||||||
ACCESS_TOKEN);
|
ACCESS_TOKEN);
|
||||||
|
|
|
@ -21,7 +21,7 @@ import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
||||||
import eu.dnetlib.pace.util.MapDocumentUtil;
|
import eu.dnetlib.pace.util.MapDocumentUtil;
|
||||||
import scala.Tuple2;
|
import scala.Tuple2;
|
||||||
|
|
||||||
public class AuthorMergerTest {
|
class AuthorMergerTest {
|
||||||
|
|
||||||
private String publicationsBasePath;
|
private String publicationsBasePath;
|
||||||
|
|
||||||
|
@ -43,7 +43,7 @@ public class AuthorMergerTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void mergeTest() { // used in the dedup: threshold set to 0.95
|
void mergeTest() { // used in the dedup: threshold set to 0.95
|
||||||
|
|
||||||
for (List<Author> authors1 : authors) {
|
for (List<Author> authors1 : authors) {
|
||||||
System.out.println("List " + (authors.indexOf(authors1) + 1));
|
System.out.println("List " + (authors.indexOf(authors1) + 1));
|
||||||
|
|
|
@ -21,7 +21,7 @@ import eu.dnetlib.dhp.schema.oaf.Publication;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Result;
|
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||||
import me.xuender.unidecode.Unidecode;
|
import me.xuender.unidecode.Unidecode;
|
||||||
|
|
||||||
public class OafMapperUtilsTest {
|
class OafMapperUtilsTest {
|
||||||
|
|
||||||
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper()
|
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper()
|
||||||
.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false);
|
.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false);
|
||||||
|
@ -42,7 +42,7 @@ public class OafMapperUtilsTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testDateValidation() {
|
void testDateValidation() {
|
||||||
|
|
||||||
assertTrue(GraphCleaningFunctions.doCleanDate("2016-05-07T12:41:19.202Z ").isPresent());
|
assertTrue(GraphCleaningFunctions.doCleanDate("2016-05-07T12:41:19.202Z ").isPresent());
|
||||||
assertTrue(GraphCleaningFunctions.doCleanDate("2020-09-10 11:08:52 ").isPresent());
|
assertTrue(GraphCleaningFunctions.doCleanDate("2020-09-10 11:08:52 ").isPresent());
|
||||||
|
@ -147,44 +147,46 @@ public class OafMapperUtilsTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testDate() {
|
void testDate() {
|
||||||
System.out.println(GraphCleaningFunctions.cleanDate("23-FEB-1998"));
|
final String date = GraphCleaningFunctions.cleanDate("23-FEB-1998");
|
||||||
|
assertNotNull(date);
|
||||||
|
System.out.println(date);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testMergePubs() throws IOException {
|
void testMergePubs() throws IOException {
|
||||||
Publication p1 = read("publication_1.json", Publication.class);
|
Publication p1 = read("publication_1.json", Publication.class);
|
||||||
Publication p2 = read("publication_2.json", Publication.class);
|
Publication p2 = read("publication_2.json", Publication.class);
|
||||||
Dataset d1 = read("dataset_1.json", Dataset.class);
|
Dataset d1 = read("dataset_1.json", Dataset.class);
|
||||||
Dataset d2 = read("dataset_2.json", Dataset.class);
|
Dataset d2 = read("dataset_2.json", Dataset.class);
|
||||||
|
|
||||||
assertEquals(p1.getCollectedfrom().size(), 1);
|
assertEquals(1, p1.getCollectedfrom().size());
|
||||||
assertEquals(p1.getCollectedfrom().get(0).getKey(), ModelConstants.CROSSREF_ID);
|
assertEquals(ModelConstants.CROSSREF_ID, p1.getCollectedfrom().get(0).getKey());
|
||||||
assertEquals(d2.getCollectedfrom().size(), 1);
|
assertEquals(1, d2.getCollectedfrom().size());
|
||||||
assertFalse(cfId(d2.getCollectedfrom()).contains(ModelConstants.CROSSREF_ID));
|
assertFalse(cfId(d2.getCollectedfrom()).contains(ModelConstants.CROSSREF_ID));
|
||||||
|
|
||||||
assertTrue(
|
assertEquals(
|
||||||
|
ModelConstants.PUBLICATION_RESULTTYPE_CLASSID,
|
||||||
OafMapperUtils
|
OafMapperUtils
|
||||||
.mergeResults(p1, d2)
|
.mergeResults(p1, d2)
|
||||||
.getResulttype()
|
.getResulttype()
|
||||||
.getClassid()
|
.getClassid());
|
||||||
.equals(ModelConstants.PUBLICATION_RESULTTYPE_CLASSID));
|
|
||||||
|
|
||||||
assertEquals(p2.getCollectedfrom().size(), 1);
|
assertEquals(1, p2.getCollectedfrom().size());
|
||||||
assertFalse(cfId(p2.getCollectedfrom()).contains(ModelConstants.CROSSREF_ID));
|
assertFalse(cfId(p2.getCollectedfrom()).contains(ModelConstants.CROSSREF_ID));
|
||||||
assertEquals(d1.getCollectedfrom().size(), 1);
|
assertEquals(1, d1.getCollectedfrom().size());
|
||||||
assertTrue(cfId(d1.getCollectedfrom()).contains(ModelConstants.CROSSREF_ID));
|
assertTrue(cfId(d1.getCollectedfrom()).contains(ModelConstants.CROSSREF_ID));
|
||||||
|
|
||||||
assertTrue(
|
assertEquals(
|
||||||
|
ModelConstants.DATASET_RESULTTYPE_CLASSID,
|
||||||
OafMapperUtils
|
OafMapperUtils
|
||||||
.mergeResults(p2, d1)
|
.mergeResults(p2, d1)
|
||||||
.getResulttype()
|
.getResulttype()
|
||||||
.getClassid()
|
.getClassid());
|
||||||
.equals(ModelConstants.DATASET_RESULTTYPE_CLASSID));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
protected HashSet<String> cfId(List<KeyValue> collectedfrom) {
|
protected HashSet<String> cfId(List<KeyValue> collectedfrom) {
|
||||||
return collectedfrom.stream().map(c -> c.getKey()).collect(Collectors.toCollection(HashSet::new));
|
return collectedfrom.stream().map(KeyValue::getKey).collect(Collectors.toCollection(HashSet::new));
|
||||||
}
|
}
|
||||||
|
|
||||||
protected <T extends Result> T read(String filename, Class<T> clazz) throws IOException {
|
protected <T extends Result> T read(String filename, Class<T> clazz) throws IOException {
|
||||||
|
|
|
@ -3,10 +3,10 @@ package eu.dnetlib.scholexplorer.relation;
|
||||||
|
|
||||||
import org.junit.jupiter.api.Test;
|
import org.junit.jupiter.api.Test;
|
||||||
|
|
||||||
public class RelationMapperTest {
|
class RelationMapperTest {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testLoadRels() throws Exception {
|
void testLoadRels() throws Exception {
|
||||||
|
|
||||||
RelationMapper relationMapper = RelationMapper.load();
|
RelationMapper relationMapper = RelationMapper.load();
|
||||||
relationMapper.keySet().forEach(System.out::println);
|
relationMapper.keySet().forEach(System.out::println);
|
||||||
|
|
|
@ -3,40 +3,37 @@ package eu.dnetlib.dhp.actionmanager;
|
||||||
|
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
import java.io.StringReader;
|
import java.io.StringReader;
|
||||||
import java.util.*;
|
import java.util.List;
|
||||||
|
import java.util.NoSuchElementException;
|
||||||
|
import java.util.Optional;
|
||||||
|
import java.util.Set;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
import org.apache.commons.lang3.tuple.Triple;
|
import org.apache.commons.lang3.tuple.Triple;
|
||||||
import org.dom4j.Document;
|
import org.dom4j.Document;
|
||||||
import org.dom4j.DocumentException;
|
import org.dom4j.DocumentException;
|
||||||
import org.dom4j.Element;
|
|
||||||
import org.dom4j.io.SAXReader;
|
import org.dom4j.io.SAXReader;
|
||||||
import org.jetbrains.annotations.NotNull;
|
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
import org.xml.sax.SAXException;
|
||||||
|
|
||||||
import com.google.common.base.Joiner;
|
import com.google.common.base.Joiner;
|
||||||
import com.google.common.base.Splitter;
|
import com.google.common.base.Splitter;
|
||||||
import com.google.common.collect.Iterables;
|
import com.google.common.collect.Iterables;
|
||||||
import com.google.common.collect.Lists;
|
|
||||||
import com.google.common.collect.Sets;
|
import com.google.common.collect.Sets;
|
||||||
|
|
||||||
import eu.dnetlib.actionmanager.rmi.ActionManagerException;
|
import eu.dnetlib.actionmanager.rmi.ActionManagerException;
|
||||||
import eu.dnetlib.actionmanager.set.ActionManagerSet;
|
|
||||||
import eu.dnetlib.actionmanager.set.ActionManagerSet.ImpactTypes;
|
|
||||||
import eu.dnetlib.dhp.actionmanager.partition.PartitionActionSetsByPayloadTypeJob;
|
|
||||||
import eu.dnetlib.dhp.utils.ISLookupClientFactory;
|
import eu.dnetlib.dhp.utils.ISLookupClientFactory;
|
||||||
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
|
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
|
||||||
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
|
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
|
||||||
import scala.Tuple2;
|
|
||||||
|
|
||||||
public class ISClient implements Serializable {
|
public class ISClient implements Serializable {
|
||||||
|
|
||||||
private static final Logger log = LoggerFactory.getLogger(PartitionActionSetsByPayloadTypeJob.class);
|
private static final Logger log = LoggerFactory.getLogger(ISClient.class);
|
||||||
|
|
||||||
private static final String INPUT_ACTION_SET_ID_SEPARATOR = ",";
|
private static final String INPUT_ACTION_SET_ID_SEPARATOR = ",";
|
||||||
|
|
||||||
private final ISLookUpService isLookup;
|
private final transient ISLookUpService isLookup;
|
||||||
|
|
||||||
public ISClient(String isLookupUrl) {
|
public ISClient(String isLookupUrl) {
|
||||||
isLookup = ISLookupClientFactory.getLookUpService(isLookupUrl);
|
isLookup = ISLookupClientFactory.getLookUpService(isLookupUrl);
|
||||||
|
@ -63,7 +60,7 @@ public class ISClient implements Serializable {
|
||||||
.map(
|
.map(
|
||||||
sets -> sets
|
sets -> sets
|
||||||
.stream()
|
.stream()
|
||||||
.map(set -> parseSetInfo(set))
|
.map(ISClient::parseSetInfo)
|
||||||
.filter(t -> ids.contains(t.getLeft()))
|
.filter(t -> ids.contains(t.getLeft()))
|
||||||
.map(t -> buildDirectory(basePath, t))
|
.map(t -> buildDirectory(basePath, t))
|
||||||
.collect(Collectors.toList()))
|
.collect(Collectors.toList()))
|
||||||
|
@ -73,15 +70,17 @@ public class ISClient implements Serializable {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private Triple<String, String, String> parseSetInfo(String set) {
|
private static Triple<String, String, String> parseSetInfo(String set) {
|
||||||
try {
|
try {
|
||||||
Document doc = new SAXReader().read(new StringReader(set));
|
final SAXReader reader = new SAXReader();
|
||||||
|
reader.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true);
|
||||||
|
Document doc = reader.read(new StringReader(set));
|
||||||
return Triple
|
return Triple
|
||||||
.of(
|
.of(
|
||||||
doc.valueOf("//SET/@id"),
|
doc.valueOf("//SET/@id"),
|
||||||
doc.valueOf("//SET/@directory"),
|
doc.valueOf("//SET/@directory"),
|
||||||
doc.valueOf("//SET/@latest"));
|
doc.valueOf("//SET/@latest"));
|
||||||
} catch (DocumentException e) {
|
} catch (DocumentException | SAXException e) {
|
||||||
throw new IllegalStateException(e);
|
throw new IllegalStateException(e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -99,7 +98,7 @@ public class ISClient implements Serializable {
|
||||||
final String q = "for $x in /RESOURCE_PROFILE[.//RESOURCE_TYPE/@value='ActionManagerServiceResourceType'] return $x//SERVICE_PROPERTIES/PROPERTY[./@ key='"
|
final String q = "for $x in /RESOURCE_PROFILE[.//RESOURCE_TYPE/@value='ActionManagerServiceResourceType'] return $x//SERVICE_PROPERTIES/PROPERTY[./@ key='"
|
||||||
+ propertyName
|
+ propertyName
|
||||||
+ "']/@value/string()";
|
+ "']/@value/string()";
|
||||||
log.debug("quering for service property: " + q);
|
log.debug("quering for service property: {}", q);
|
||||||
try {
|
try {
|
||||||
final List<String> value = isLookup.quickSearchProfile(q);
|
final List<String> value = isLookup.quickSearchProfile(q);
|
||||||
return Iterables.getOnlyElement(value);
|
return Iterables.getOnlyElement(value);
|
||||||
|
|
|
@ -62,6 +62,7 @@ public class MergeAndGet {
|
||||||
x.getClass().getCanonicalName(), y.getClass().getCanonicalName()));
|
x.getClass().getCanonicalName(), y.getClass().getCanonicalName()));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@SuppressWarnings("unchecked")
|
||||||
private static <G extends Oaf, A extends Oaf> G selectNewerAndGet(G x, A y) {
|
private static <G extends Oaf, A extends Oaf> G selectNewerAndGet(G x, A y) {
|
||||||
if (x.getClass().equals(y.getClass())
|
if (x.getClass().equals(y.getClass())
|
||||||
&& x.getLastupdatetimestamp() > y.getLastupdatetimestamp()) {
|
&& x.getLastupdatetimestamp() > y.getLastupdatetimestamp()) {
|
||||||
|
|
|
@ -74,7 +74,9 @@ public class PromoteActionPayloadForGraphTableJob {
|
||||||
.orElse(true);
|
.orElse(true);
|
||||||
logger.info("shouldGroupById: {}", shouldGroupById);
|
logger.info("shouldGroupById: {}", shouldGroupById);
|
||||||
|
|
||||||
|
@SuppressWarnings("unchecked")
|
||||||
Class<? extends Oaf> rowClazz = (Class<? extends Oaf>) Class.forName(graphTableClassName);
|
Class<? extends Oaf> rowClazz = (Class<? extends Oaf>) Class.forName(graphTableClassName);
|
||||||
|
@SuppressWarnings("unchecked")
|
||||||
Class<? extends Oaf> actionPayloadClazz = (Class<? extends Oaf>) Class.forName(actionPayloadClassName);
|
Class<? extends Oaf> actionPayloadClazz = (Class<? extends Oaf>) Class.forName(actionPayloadClassName);
|
||||||
|
|
||||||
throwIfGraphTableClassIsNotSubClassOfActionPayloadClass(rowClazz, actionPayloadClazz);
|
throwIfGraphTableClassIsNotSubClassOfActionPayloadClass(rowClazz, actionPayloadClazz);
|
||||||
|
@ -152,7 +154,7 @@ public class PromoteActionPayloadForGraphTableJob {
|
||||||
return spark
|
return spark
|
||||||
.read()
|
.read()
|
||||||
.parquet(path)
|
.parquet(path)
|
||||||
.map((MapFunction<Row, String>) value -> extractPayload(value), Encoders.STRING())
|
.map((MapFunction<Row, String>) PromoteActionPayloadForGraphTableJob::extractPayload, Encoders.STRING())
|
||||||
.map(
|
.map(
|
||||||
(MapFunction<String, A>) value -> decodePayload(actionPayloadClazz, value),
|
(MapFunction<String, A>) value -> decodePayload(actionPayloadClazz, value),
|
||||||
Encoders.bean(actionPayloadClazz));
|
Encoders.bean(actionPayloadClazz));
|
||||||
|
|
|
@ -80,7 +80,7 @@ public class PartitionActionSetsByPayloadTypeJobTest {
|
||||||
private ISClient isClient;
|
private ISClient isClient;
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void shouldPartitionActionSetsByPayloadType(@TempDir Path workingDir) throws Exception {
|
void shouldPartitionActionSetsByPayloadType(@TempDir Path workingDir) throws Exception {
|
||||||
// given
|
// given
|
||||||
Path inputActionSetsBaseDir = workingDir.resolve("input").resolve("action_sets");
|
Path inputActionSetsBaseDir = workingDir.resolve("input").resolve("action_sets");
|
||||||
Path outputDir = workingDir.resolve("output");
|
Path outputDir = workingDir.resolve("output");
|
||||||
|
|
|
@ -20,7 +20,7 @@ public class MergeAndGetTest {
|
||||||
class MergeFromAndGetStrategy {
|
class MergeFromAndGetStrategy {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void shouldThrowForOafAndOaf() {
|
void shouldThrowForOafAndOaf() {
|
||||||
// given
|
// given
|
||||||
Oaf a = mock(Oaf.class);
|
Oaf a = mock(Oaf.class);
|
||||||
Oaf b = mock(Oaf.class);
|
Oaf b = mock(Oaf.class);
|
||||||
|
@ -33,7 +33,7 @@ public class MergeAndGetTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void shouldThrowForOafAndRelation() {
|
void shouldThrowForOafAndRelation() {
|
||||||
// given
|
// given
|
||||||
Oaf a = mock(Oaf.class);
|
Oaf a = mock(Oaf.class);
|
||||||
Relation b = mock(Relation.class);
|
Relation b = mock(Relation.class);
|
||||||
|
@ -46,7 +46,7 @@ public class MergeAndGetTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void shouldThrowForOafAndOafEntity() {
|
void shouldThrowForOafAndOafEntity() {
|
||||||
// given
|
// given
|
||||||
Oaf a = mock(Oaf.class);
|
Oaf a = mock(Oaf.class);
|
||||||
OafEntity b = mock(OafEntity.class);
|
OafEntity b = mock(OafEntity.class);
|
||||||
|
@ -59,7 +59,7 @@ public class MergeAndGetTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void shouldThrowForRelationAndOaf() {
|
void shouldThrowForRelationAndOaf() {
|
||||||
// given
|
// given
|
||||||
Relation a = mock(Relation.class);
|
Relation a = mock(Relation.class);
|
||||||
Oaf b = mock(Oaf.class);
|
Oaf b = mock(Oaf.class);
|
||||||
|
@ -72,7 +72,7 @@ public class MergeAndGetTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void shouldThrowForRelationAndOafEntity() {
|
void shouldThrowForRelationAndOafEntity() {
|
||||||
// given
|
// given
|
||||||
Relation a = mock(Relation.class);
|
Relation a = mock(Relation.class);
|
||||||
OafEntity b = mock(OafEntity.class);
|
OafEntity b = mock(OafEntity.class);
|
||||||
|
@ -85,7 +85,7 @@ public class MergeAndGetTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void shouldBehaveProperlyForRelationAndRelation() {
|
void shouldBehaveProperlyForRelationAndRelation() {
|
||||||
// given
|
// given
|
||||||
Relation a = mock(Relation.class);
|
Relation a = mock(Relation.class);
|
||||||
Relation b = mock(Relation.class);
|
Relation b = mock(Relation.class);
|
||||||
|
@ -101,7 +101,7 @@ public class MergeAndGetTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void shouldThrowForOafEntityAndOaf() {
|
void shouldThrowForOafEntityAndOaf() {
|
||||||
// given
|
// given
|
||||||
OafEntity a = mock(OafEntity.class);
|
OafEntity a = mock(OafEntity.class);
|
||||||
Oaf b = mock(Oaf.class);
|
Oaf b = mock(Oaf.class);
|
||||||
|
@ -114,7 +114,7 @@ public class MergeAndGetTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void shouldThrowForOafEntityAndRelation() {
|
void shouldThrowForOafEntityAndRelation() {
|
||||||
// given
|
// given
|
||||||
OafEntity a = mock(OafEntity.class);
|
OafEntity a = mock(OafEntity.class);
|
||||||
Relation b = mock(Relation.class);
|
Relation b = mock(Relation.class);
|
||||||
|
@ -127,7 +127,7 @@ public class MergeAndGetTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void shouldThrowForOafEntityAndOafEntityButNotSubclasses() {
|
void shouldThrowForOafEntityAndOafEntityButNotSubclasses() {
|
||||||
// given
|
// given
|
||||||
class OafEntitySub1 extends OafEntity {
|
class OafEntitySub1 extends OafEntity {
|
||||||
}
|
}
|
||||||
|
@ -145,7 +145,7 @@ public class MergeAndGetTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void shouldBehaveProperlyForOafEntityAndOafEntity() {
|
void shouldBehaveProperlyForOafEntityAndOafEntity() {
|
||||||
// given
|
// given
|
||||||
OafEntity a = mock(OafEntity.class);
|
OafEntity a = mock(OafEntity.class);
|
||||||
OafEntity b = mock(OafEntity.class);
|
OafEntity b = mock(OafEntity.class);
|
||||||
|
@ -165,7 +165,7 @@ public class MergeAndGetTest {
|
||||||
class SelectNewerAndGetStrategy {
|
class SelectNewerAndGetStrategy {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void shouldThrowForOafEntityAndRelation() {
|
void shouldThrowForOafEntityAndRelation() {
|
||||||
// given
|
// given
|
||||||
OafEntity a = mock(OafEntity.class);
|
OafEntity a = mock(OafEntity.class);
|
||||||
Relation b = mock(Relation.class);
|
Relation b = mock(Relation.class);
|
||||||
|
@ -178,7 +178,7 @@ public class MergeAndGetTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void shouldThrowForRelationAndOafEntity() {
|
void shouldThrowForRelationAndOafEntity() {
|
||||||
// given
|
// given
|
||||||
Relation a = mock(Relation.class);
|
Relation a = mock(Relation.class);
|
||||||
OafEntity b = mock(OafEntity.class);
|
OafEntity b = mock(OafEntity.class);
|
||||||
|
@ -191,7 +191,7 @@ public class MergeAndGetTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void shouldThrowForOafEntityAndResult() {
|
void shouldThrowForOafEntityAndResult() {
|
||||||
// given
|
// given
|
||||||
OafEntity a = mock(OafEntity.class);
|
OafEntity a = mock(OafEntity.class);
|
||||||
Result b = mock(Result.class);
|
Result b = mock(Result.class);
|
||||||
|
@ -204,7 +204,7 @@ public class MergeAndGetTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void shouldThrowWhenSuperTypeIsNewerForResultAndOafEntity() {
|
void shouldThrowWhenSuperTypeIsNewerForResultAndOafEntity() {
|
||||||
// given
|
// given
|
||||||
// real types must be used because subclass-superclass resolution does not work for
|
// real types must be used because subclass-superclass resolution does not work for
|
||||||
// mocks
|
// mocks
|
||||||
|
@ -221,7 +221,7 @@ public class MergeAndGetTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void shouldShouldReturnLeftForOafEntityAndOafEntity() {
|
void shouldShouldReturnLeftForOafEntityAndOafEntity() {
|
||||||
// given
|
// given
|
||||||
OafEntity a = mock(OafEntity.class);
|
OafEntity a = mock(OafEntity.class);
|
||||||
when(a.getLastupdatetimestamp()).thenReturn(1L);
|
when(a.getLastupdatetimestamp()).thenReturn(1L);
|
||||||
|
@ -238,7 +238,7 @@ public class MergeAndGetTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void shouldShouldReturnRightForOafEntityAndOafEntity() {
|
void shouldShouldReturnRightForOafEntityAndOafEntity() {
|
||||||
// given
|
// given
|
||||||
OafEntity a = mock(OafEntity.class);
|
OafEntity a = mock(OafEntity.class);
|
||||||
when(a.getLastupdatetimestamp()).thenReturn(2L);
|
when(a.getLastupdatetimestamp()).thenReturn(2L);
|
||||||
|
|
|
@ -77,7 +77,7 @@ public class PromoteActionPayloadForGraphTableJobTest {
|
||||||
class Main {
|
class Main {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void shouldThrowWhenGraphTableClassIsNotASubClassOfActionPayloadClass() {
|
void shouldThrowWhenGraphTableClassIsNotASubClassOfActionPayloadClass() {
|
||||||
// given
|
// given
|
||||||
Class<Relation> rowClazz = Relation.class;
|
Class<Relation> rowClazz = Relation.class;
|
||||||
Class<OafEntity> actionPayloadClazz = OafEntity.class;
|
Class<OafEntity> actionPayloadClazz = OafEntity.class;
|
||||||
|
@ -116,7 +116,7 @@ public class PromoteActionPayloadForGraphTableJobTest {
|
||||||
|
|
||||||
@ParameterizedTest(name = "strategy: {0}, graph table: {1}, action payload: {2}")
|
@ParameterizedTest(name = "strategy: {0}, graph table: {1}, action payload: {2}")
|
||||||
@MethodSource("eu.dnetlib.dhp.actionmanager.promote.PromoteActionPayloadForGraphTableJobTest#promoteJobTestParams")
|
@MethodSource("eu.dnetlib.dhp.actionmanager.promote.PromoteActionPayloadForGraphTableJobTest#promoteJobTestParams")
|
||||||
public void shouldPromoteActionPayloadForGraphTable(
|
void shouldPromoteActionPayloadForGraphTable(
|
||||||
MergeAndGet.Strategy strategy,
|
MergeAndGet.Strategy strategy,
|
||||||
Class<? extends Oaf> rowClazz,
|
Class<? extends Oaf> rowClazz,
|
||||||
Class<? extends Oaf> actionPayloadClazz)
|
Class<? extends Oaf> actionPayloadClazz)
|
||||||
|
|
|
@ -44,7 +44,7 @@ public class PromoteActionPayloadFunctionsTest {
|
||||||
class JoinTableWithActionPayloadAndMerge {
|
class JoinTableWithActionPayloadAndMerge {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void shouldThrowWhenTableTypeIsNotSubtypeOfActionPayloadType() {
|
void shouldThrowWhenTableTypeIsNotSubtypeOfActionPayloadType() {
|
||||||
// given
|
// given
|
||||||
class OafImpl extends Oaf {
|
class OafImpl extends Oaf {
|
||||||
}
|
}
|
||||||
|
@ -58,7 +58,7 @@ public class PromoteActionPayloadFunctionsTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void shouldRunProperlyWhenActionPayloadTypeAndTableTypeAreTheSame() {
|
void shouldRunProperlyWhenActionPayloadTypeAndTableTypeAreTheSame() {
|
||||||
// given
|
// given
|
||||||
String id0 = "id0";
|
String id0 = "id0";
|
||||||
String id1 = "id1";
|
String id1 = "id1";
|
||||||
|
@ -138,7 +138,7 @@ public class PromoteActionPayloadFunctionsTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void shouldRunProperlyWhenActionPayloadTypeIsSuperTypeOfTableType() {
|
void shouldRunProperlyWhenActionPayloadTypeIsSuperTypeOfTableType() {
|
||||||
// given
|
// given
|
||||||
String id0 = "id0";
|
String id0 = "id0";
|
||||||
String id1 = "id1";
|
String id1 = "id1";
|
||||||
|
@ -218,7 +218,7 @@ public class PromoteActionPayloadFunctionsTest {
|
||||||
class GroupTableByIdAndMerge {
|
class GroupTableByIdAndMerge {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void shouldRunProperly() {
|
void shouldRunProperly() {
|
||||||
// given
|
// given
|
||||||
String id1 = "id1";
|
String id1 = "id1";
|
||||||
String id2 = "id2";
|
String id2 = "id2";
|
||||||
|
|
|
@ -84,14 +84,6 @@
|
||||||
<artifactId>json</artifactId>
|
<artifactId>json</artifactId>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
<!-- https://mvnrepository.com/artifact/org.apache.commons/commons-csv -->
|
|
||||||
<dependency>
|
|
||||||
<groupId>org.apache.commons</groupId>
|
|
||||||
<artifactId>commons-csv</artifactId>
|
|
||||||
<version>1.8</version>
|
|
||||||
</dependency>
|
|
||||||
|
|
||||||
|
|
||||||
<!-- https://mvnrepository.com/artifact/org.apache.poi/poi-ooxml -->
|
<!-- https://mvnrepository.com/artifact/org.apache.poi/poi-ooxml -->
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.poi</groupId>
|
<groupId>org.apache.poi</groupId>
|
||||||
|
|
|
@ -4,6 +4,7 @@ package eu.dnetlib.dhp.actionmanager.bipfinder;
|
||||||
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
||||||
|
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
|
import java.util.Objects;
|
||||||
import java.util.Optional;
|
import java.util.Optional;
|
||||||
|
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
|
@ -28,15 +29,16 @@ import eu.dnetlib.dhp.schema.oaf.Result;
|
||||||
public class CollectAndSave implements Serializable {
|
public class CollectAndSave implements Serializable {
|
||||||
|
|
||||||
private static final Logger log = LoggerFactory.getLogger(CollectAndSave.class);
|
private static final Logger log = LoggerFactory.getLogger(CollectAndSave.class);
|
||||||
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
|
||||||
|
|
||||||
public static <I extends Result> void main(String[] args) throws Exception {
|
public static void main(String[] args) throws Exception {
|
||||||
|
|
||||||
String jsonConfiguration = IOUtils
|
String jsonConfiguration = IOUtils
|
||||||
.toString(
|
.toString(
|
||||||
|
Objects
|
||||||
|
.requireNonNull(
|
||||||
CollectAndSave.class
|
CollectAndSave.class
|
||||||
.getResourceAsStream(
|
.getResourceAsStream(
|
||||||
"/eu/dnetlib/dhp/actionmanager/bipfinder/input_actionset_parameter.json"));
|
"/eu/dnetlib/dhp/actionmanager/bipfinder/input_actionset_parameter.json")));
|
||||||
|
|
||||||
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
|
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
|
||||||
|
|
||||||
|
|
|
@ -87,7 +87,7 @@ public class SparkAtomicActionScoreJob implements Serializable {
|
||||||
private static <I extends Result> void prepareResults(SparkSession spark, String inputPath, String outputPath,
|
private static <I extends Result> void prepareResults(SparkSession spark, String inputPath, String outputPath,
|
||||||
String bipScorePath, Class<I> inputClazz) {
|
String bipScorePath, Class<I> inputClazz) {
|
||||||
|
|
||||||
final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext());
|
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||||
|
|
||||||
JavaRDD<BipDeserialize> bipDeserializeJavaRDD = sc
|
JavaRDD<BipDeserialize> bipDeserializeJavaRDD = sc
|
||||||
.textFile(bipScorePath)
|
.textFile(bipScorePath)
|
||||||
|
@ -101,8 +101,6 @@ public class SparkAtomicActionScoreJob implements Serializable {
|
||||||
return bs;
|
return bs;
|
||||||
}).collect(Collectors.toList()).iterator()).rdd(), Encoders.bean(BipScore.class));
|
}).collect(Collectors.toList()).iterator()).rdd(), Encoders.bean(BipScore.class));
|
||||||
|
|
||||||
System.out.println(bipScores.count());
|
|
||||||
|
|
||||||
Dataset<I> results = readPath(spark, inputPath, inputClazz);
|
Dataset<I> results = readPath(spark, inputPath, inputClazz);
|
||||||
|
|
||||||
results.createOrReplaceTempView("result");
|
results.createOrReplaceTempView("result");
|
||||||
|
@ -124,7 +122,7 @@ public class SparkAtomicActionScoreJob implements Serializable {
|
||||||
ret.setId(value._2().getId());
|
ret.setId(value._2().getId());
|
||||||
return ret;
|
return ret;
|
||||||
}, Encoders.bean(BipScore.class))
|
}, Encoders.bean(BipScore.class))
|
||||||
.groupByKey((MapFunction<BipScore, String>) value -> value.getId(), Encoders.STRING())
|
.groupByKey((MapFunction<BipScore, String>) BipScore::getId, Encoders.STRING())
|
||||||
.mapGroups((MapGroupsFunction<String, BipScore, Result>) (k, it) -> {
|
.mapGroups((MapGroupsFunction<String, BipScore, Result>) (k, it) -> {
|
||||||
Result ret = new Result();
|
Result ret = new Result();
|
||||||
ret.setDataInfo(getDataInfo());
|
ret.setDataInfo(getDataInfo());
|
||||||
|
|
|
@ -8,14 +8,15 @@ import org.apache.http.impl.client.{HttpClientBuilder, HttpClients}
|
||||||
|
|
||||||
import java.io.IOException
|
import java.io.IOException
|
||||||
|
|
||||||
abstract class AbstractRestClient extends Iterator[String]{
|
|
||||||
|
abstract class AbstractRestClient extends Iterator[String] {
|
||||||
|
|
||||||
var buffer: List[String] = List()
|
var buffer: List[String] = List()
|
||||||
var current_index:Int = 0
|
var current_index: Int = 0
|
||||||
|
|
||||||
var scroll_value: Option[String] = None
|
var scroll_value: Option[String] = None
|
||||||
|
|
||||||
var complete:Boolean = false
|
var complete: Boolean = false
|
||||||
|
|
||||||
|
|
||||||
def extractInfo(input: String): Unit
|
def extractInfo(input: String): Unit
|
||||||
|
@ -23,13 +24,13 @@ abstract class AbstractRestClient extends Iterator[String]{
|
||||||
protected def getBufferData(): Unit
|
protected def getBufferData(): Unit
|
||||||
|
|
||||||
|
|
||||||
def doHTTPGETRequest(url:String): String = {
|
def doHTTPGETRequest(url: String): String = {
|
||||||
val httpGet = new HttpGet(url)
|
val httpGet = new HttpGet(url)
|
||||||
doHTTPRequest(httpGet)
|
doHTTPRequest(httpGet)
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
def doHTTPPOSTRequest(url:String, json:String): String = {
|
def doHTTPPOSTRequest(url: String, json: String): String = {
|
||||||
val httpPost = new HttpPost(url)
|
val httpPost = new HttpPost(url)
|
||||||
if (json != null) {
|
if (json != null) {
|
||||||
val entity = new StringEntity(json)
|
val entity = new StringEntity(json)
|
||||||
|
@ -46,7 +47,7 @@ abstract class AbstractRestClient extends Iterator[String]{
|
||||||
|
|
||||||
|
|
||||||
override def next(): String = {
|
override def next(): String = {
|
||||||
val next_item:String = buffer(current_index)
|
val next_item: String = buffer(current_index)
|
||||||
current_index = current_index + 1
|
current_index = current_index + 1
|
||||||
if (current_index == buffer.size)
|
if (current_index == buffer.size)
|
||||||
getBufferData()
|
getBufferData()
|
||||||
|
@ -54,15 +55,14 @@ abstract class AbstractRestClient extends Iterator[String]{
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
private def doHTTPRequest[A <: HttpUriRequest](r: A): String = {
|
||||||
|
|
||||||
private def doHTTPRequest[A <: HttpUriRequest](r: A) :String ={
|
|
||||||
val timeout = 60; // seconds
|
val timeout = 60; // seconds
|
||||||
val config = RequestConfig.custom()
|
val config = RequestConfig.custom()
|
||||||
.setConnectTimeout(timeout * 1000)
|
.setConnectTimeout(timeout * 1000)
|
||||||
.setConnectionRequestTimeout(timeout * 1000)
|
.setConnectionRequestTimeout(timeout * 1000)
|
||||||
.setSocketTimeout(timeout * 1000).build()
|
.setSocketTimeout(timeout * 1000).build()
|
||||||
val client =HttpClientBuilder.create().setDefaultRequestConfig(config).build()
|
val client = HttpClientBuilder.create().setDefaultRequestConfig(config).build()
|
||||||
|
try {
|
||||||
var tries = 4
|
var tries = 4
|
||||||
while (tries > 0) {
|
while (tries > 0) {
|
||||||
println(s"requesting ${r.getURI}")
|
println(s"requesting ${r.getURI}")
|
||||||
|
@ -78,10 +78,15 @@ abstract class AbstractRestClient extends Iterator[String]{
|
||||||
case e: Throwable =>
|
case e: Throwable =>
|
||||||
println(s"Error on requesting ${r.getURI}")
|
println(s"Error on requesting ${r.getURI}")
|
||||||
e.printStackTrace()
|
e.printStackTrace()
|
||||||
tries-=1
|
tries -= 1
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
""
|
""
|
||||||
|
} finally {
|
||||||
|
if (client != null)
|
||||||
|
client.close()
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
getBufferData()
|
getBufferData()
|
||||||
}
|
}
|
|
@ -0,0 +1,53 @@
|
||||||
|
package eu.dnetlib.dhp.actionmanager.datacite
|
||||||
|
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.{Oaf, Result}
|
||||||
|
import org.apache.hadoop.conf.Configuration
|
||||||
|
import org.apache.hadoop.fs.LocalFileSystem
|
||||||
|
import org.apache.hadoop.hdfs.DistributedFileSystem
|
||||||
|
import org.apache.spark.SparkConf
|
||||||
|
import org.apache.spark.sql.{Encoder, Encoders, SparkSession}
|
||||||
|
import org.apache.spark.sql.functions.max
|
||||||
|
import org.slf4j.{Logger, LoggerFactory}
|
||||||
|
|
||||||
|
import java.text.SimpleDateFormat
|
||||||
|
import java.util.{Date, Locale}
|
||||||
|
import scala.io.Source
|
||||||
|
|
||||||
|
object SparkDownloadUpdateDatacite {
|
||||||
|
val log: Logger = LoggerFactory.getLogger(getClass)
|
||||||
|
|
||||||
|
def main(args: Array[String]): Unit = {
|
||||||
|
|
||||||
|
val conf = new SparkConf
|
||||||
|
val parser = new ArgumentApplicationParser(Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/actionmanager/datacite/generate_dataset_params.json")).mkString)
|
||||||
|
parser.parseArgument(args)
|
||||||
|
val master = parser.get("master")
|
||||||
|
val sourcePath = parser.get("sourcePath")
|
||||||
|
val workingPath = parser.get("workingPath")
|
||||||
|
|
||||||
|
val hdfsuri = parser.get("namenode")
|
||||||
|
log.info(s"namenode is $hdfsuri")
|
||||||
|
|
||||||
|
|
||||||
|
val spark: SparkSession = SparkSession.builder().config(conf)
|
||||||
|
.appName(getClass.getSimpleName)
|
||||||
|
.master(master)
|
||||||
|
.getOrCreate()
|
||||||
|
|
||||||
|
implicit val oafEncoder: Encoder[Oaf] = Encoders.kryo[Oaf]
|
||||||
|
implicit val resEncoder: Encoder[Result] = Encoders.kryo[Result]
|
||||||
|
|
||||||
|
import spark.implicits._
|
||||||
|
|
||||||
|
|
||||||
|
val maxDate:String = spark.read.load(workingPath).as[Oaf].filter(s => s.isInstanceOf[Result]).map(r => r.asInstanceOf[Result].getDateofcollection).select(max("value")).first().getString(0)
|
||||||
|
val ISO8601FORMAT = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssZ", Locale.US)
|
||||||
|
val string_to_date =ISO8601FORMAT.parse(maxDate)
|
||||||
|
val ts = string_to_date.getTime
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -20,7 +20,7 @@ import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.actionmanager.project.utils.CSVProgramme;
|
import eu.dnetlib.dhp.actionmanager.project.utils.model.CSVProgramme;
|
||||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
import eu.dnetlib.dhp.common.HdfsSupport;
|
import eu.dnetlib.dhp.common.HdfsSupport;
|
||||||
import scala.Tuple2;
|
import scala.Tuple2;
|
||||||
|
@ -171,26 +171,23 @@ public class PrepareProgramme {
|
||||||
}
|
}
|
||||||
|
|
||||||
private static CSVProgramme groupProgrammeByCode(CSVProgramme a, CSVProgramme b) {
|
private static CSVProgramme groupProgrammeByCode(CSVProgramme a, CSVProgramme b) {
|
||||||
if (!a.getLanguage().equals("en")) {
|
if (!a.getLanguage().equals("en") && b.getLanguage().equalsIgnoreCase("en")) {
|
||||||
if (b.getLanguage().equalsIgnoreCase("en")) {
|
|
||||||
a.setTitle(b.getTitle());
|
a.setTitle(b.getTitle());
|
||||||
a.setLanguage(b.getLanguage());
|
a.setLanguage(b.getLanguage());
|
||||||
}
|
}
|
||||||
}
|
if (StringUtils.isEmpty(a.getShortTitle()) && !StringUtils.isEmpty(b.getShortTitle())) {
|
||||||
if (StringUtils.isEmpty(a.getShortTitle())) {
|
|
||||||
if (!StringUtils.isEmpty(b.getShortTitle())) {
|
|
||||||
a.setShortTitle(b.getShortTitle());
|
a.setShortTitle(b.getShortTitle());
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
return a;
|
return a;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@SuppressWarnings("unchecked")
|
||||||
private static List<CSVProgramme> prepareClassification(JavaRDD<CSVProgramme> h2020Programmes) {
|
private static List<CSVProgramme> prepareClassification(JavaRDD<CSVProgramme> h2020Programmes) {
|
||||||
Object[] codedescription = h2020Programmes
|
Object[] codedescription = h2020Programmes
|
||||||
.map(
|
.map(
|
||||||
value -> new Tuple2<>(value.getCode(),
|
value -> new Tuple2<>(value.getCode(),
|
||||||
new Tuple2<String, String>(value.getTitle(), value.getShortTitle())))
|
new Tuple2<>(value.getTitle(), value.getShortTitle())))
|
||||||
.collect()
|
.collect()
|
||||||
.toArray();
|
.toArray();
|
||||||
|
|
||||||
|
@ -216,7 +213,7 @@ public class PrepareProgramme {
|
||||||
String[] tmp = ent.split("\\.");
|
String[] tmp = ent.split("\\.");
|
||||||
if (tmp.length <= 2) {
|
if (tmp.length <= 2) {
|
||||||
if (StringUtils.isEmpty(entry._2()._2())) {
|
if (StringUtils.isEmpty(entry._2()._2())) {
|
||||||
map.put(entry._1(), new Tuple2<String, String>(entry._2()._1(), entry._2()._1()));
|
map.put(entry._1(), new Tuple2<>(entry._2()._1(), entry._2()._1()));
|
||||||
} else {
|
} else {
|
||||||
map.put(entry._1(), entry._2());
|
map.put(entry._1(), entry._2());
|
||||||
}
|
}
|
||||||
|
|
|
@ -18,7 +18,7 @@ import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.actionmanager.project.utils.CSVProject;
|
import eu.dnetlib.dhp.actionmanager.project.utils.model.CSVProject;
|
||||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
import eu.dnetlib.dhp.common.HdfsSupport;
|
import eu.dnetlib.dhp.common.HdfsSupport;
|
||||||
import scala.Tuple2;
|
import scala.Tuple2;
|
||||||
|
@ -29,7 +29,7 @@ import scala.Tuple2;
|
||||||
*/
|
*/
|
||||||
public class PrepareProjects {
|
public class PrepareProjects {
|
||||||
|
|
||||||
private static final Logger log = LoggerFactory.getLogger(PrepareProgramme.class);
|
private static final Logger log = LoggerFactory.getLogger(PrepareProjects.class);
|
||||||
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
||||||
|
|
||||||
public static void main(String[] args) throws Exception {
|
public static void main(String[] args) throws Exception {
|
||||||
|
|
|
@ -31,15 +31,16 @@ import eu.dnetlib.dhp.common.DbClient;
|
||||||
*/
|
*/
|
||||||
public class ReadProjectsFromDB implements Closeable {
|
public class ReadProjectsFromDB implements Closeable {
|
||||||
|
|
||||||
private final DbClient dbClient;
|
|
||||||
private static final Log log = LogFactory.getLog(ReadProjectsFromDB.class);
|
private static final Log log = LogFactory.getLog(ReadProjectsFromDB.class);
|
||||||
|
|
||||||
|
private static final String query = "SELECT code " +
|
||||||
|
"from projects where id like 'corda__h2020%' ";
|
||||||
|
|
||||||
|
private final DbClient dbClient;
|
||||||
private final Configuration conf;
|
private final Configuration conf;
|
||||||
private final BufferedWriter writer;
|
private final BufferedWriter writer;
|
||||||
private final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
private final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
||||||
|
|
||||||
private final static String query = "SELECT code " +
|
|
||||||
"from projects where id like 'corda__h2020%' ";
|
|
||||||
|
|
||||||
public static void main(final String[] args) throws Exception {
|
public static void main(final String[] args) throws Exception {
|
||||||
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
|
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
|
||||||
IOUtils
|
IOUtils
|
||||||
|
@ -65,9 +66,9 @@ public class ReadProjectsFromDB implements Closeable {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public void execute(final String sql, final Function<ResultSet, List<ProjectSubset>> producer) throws Exception {
|
public void execute(final String sql, final Function<ResultSet, List<ProjectSubset>> producer) {
|
||||||
|
|
||||||
final Consumer<ResultSet> consumer = rs -> producer.apply(rs).forEach(r -> writeProject(r));
|
final Consumer<ResultSet> consumer = rs -> producer.apply(rs).forEach(this::writeProject);
|
||||||
|
|
||||||
dbClient.processResults(sql, consumer);
|
dbClient.processResults(sql, consumer);
|
||||||
}
|
}
|
||||||
|
@ -94,20 +95,20 @@ public class ReadProjectsFromDB implements Closeable {
|
||||||
|
|
||||||
public ReadProjectsFromDB(
|
public ReadProjectsFromDB(
|
||||||
final String hdfsPath, String hdfsNameNode, final String dbUrl, final String dbUser, final String dbPassword)
|
final String hdfsPath, String hdfsNameNode, final String dbUrl, final String dbUser, final String dbPassword)
|
||||||
throws Exception {
|
throws IOException {
|
||||||
|
|
||||||
this.dbClient = new DbClient(dbUrl, dbUser, dbPassword);
|
this.dbClient = new DbClient(dbUrl, dbUser, dbPassword);
|
||||||
this.conf = new Configuration();
|
this.conf = new Configuration();
|
||||||
this.conf.set("fs.defaultFS", hdfsNameNode);
|
this.conf.set("fs.defaultFS", hdfsNameNode);
|
||||||
FileSystem fileSystem = FileSystem.get(this.conf);
|
FileSystem fileSystem = FileSystem.get(this.conf);
|
||||||
Path hdfsWritePath = new Path(hdfsPath);
|
Path hdfsWritePath = new Path(hdfsPath);
|
||||||
FSDataOutputStream fsDataOutputStream = null;
|
|
||||||
if (fileSystem.exists(hdfsWritePath)) {
|
if (fileSystem.exists(hdfsWritePath)) {
|
||||||
fileSystem.delete(hdfsWritePath, false);
|
fileSystem.delete(hdfsWritePath, false);
|
||||||
}
|
}
|
||||||
fsDataOutputStream = fileSystem.create(hdfsWritePath);
|
FSDataOutputStream fos = fileSystem.create(hdfsWritePath);
|
||||||
|
|
||||||
this.writer = new BufferedWriter(new OutputStreamWriter(fsDataOutputStream, StandardCharsets.UTF_8));
|
this.writer = new BufferedWriter(new OutputStreamWriter(fos, StandardCharsets.UTF_8));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -4,7 +4,6 @@ package eu.dnetlib.dhp.actionmanager.project;
|
||||||
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
||||||
|
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.HashMap;
|
|
||||||
import java.util.Objects;
|
import java.util.Objects;
|
||||||
import java.util.Optional;
|
import java.util.Optional;
|
||||||
|
|
||||||
|
@ -22,15 +21,16 @@ import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.actionmanager.project.utils.CSVProgramme;
|
import eu.dnetlib.dhp.actionmanager.project.utils.model.CSVProgramme;
|
||||||
import eu.dnetlib.dhp.actionmanager.project.utils.CSVProject;
|
import eu.dnetlib.dhp.actionmanager.project.utils.model.CSVProject;
|
||||||
import eu.dnetlib.dhp.actionmanager.project.utils.EXCELTopic;
|
import eu.dnetlib.dhp.actionmanager.project.utils.model.EXCELTopic;
|
||||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
import eu.dnetlib.dhp.common.HdfsSupport;
|
import eu.dnetlib.dhp.common.HdfsSupport;
|
||||||
import eu.dnetlib.dhp.schema.action.AtomicAction;
|
import eu.dnetlib.dhp.schema.action.AtomicAction;
|
||||||
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
import eu.dnetlib.dhp.schema.common.ModelSupport;
|
||||||
import eu.dnetlib.dhp.schema.oaf.H2020Classification;
|
import eu.dnetlib.dhp.schema.oaf.H2020Classification;
|
||||||
import eu.dnetlib.dhp.schema.oaf.H2020Programme;
|
import eu.dnetlib.dhp.schema.oaf.H2020Programme;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.OafEntity;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Project;
|
import eu.dnetlib.dhp.schema.oaf.Project;
|
||||||
import eu.dnetlib.dhp.utils.DHPUtils;
|
import eu.dnetlib.dhp.utils.DHPUtils;
|
||||||
import scala.Tuple2;
|
import scala.Tuple2;
|
||||||
|
@ -47,13 +47,10 @@ import scala.Tuple2;
|
||||||
*
|
*
|
||||||
* To produce one single entry for each project code a step of groupoing is needed: each project can be associated to more
|
* To produce one single entry for each project code a step of groupoing is needed: each project can be associated to more
|
||||||
* than one programme.
|
* than one programme.
|
||||||
*
|
|
||||||
*
|
|
||||||
*/
|
*/
|
||||||
public class SparkAtomicActionJob {
|
public class SparkAtomicActionJob {
|
||||||
private static final Logger log = LoggerFactory.getLogger(SparkAtomicActionJob.class);
|
private static final Logger log = LoggerFactory.getLogger(SparkAtomicActionJob.class);
|
||||||
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
||||||
private static final HashMap<String, String> programmeMap = new HashMap<>();
|
|
||||||
|
|
||||||
public static void main(String[] args) throws Exception {
|
public static void main(String[] args) throws Exception {
|
||||||
|
|
||||||
|
@ -137,7 +134,6 @@ public class SparkAtomicActionJob {
|
||||||
h2020classification.setClassification(csvProgramme.getClassification());
|
h2020classification.setClassification(csvProgramme.getClassification());
|
||||||
h2020classification.setH2020Programme(pm);
|
h2020classification.setH2020Programme(pm);
|
||||||
setLevelsandProgramme(h2020classification, csvProgramme.getClassification_short());
|
setLevelsandProgramme(h2020classification, csvProgramme.getClassification_short());
|
||||||
// setProgramme(h2020classification, ocsvProgramme.get().getClassification());
|
|
||||||
pp.setH2020classification(Arrays.asList(h2020classification));
|
pp.setH2020classification(Arrays.asList(h2020classification));
|
||||||
|
|
||||||
return pp;
|
return pp;
|
||||||
|
@ -152,20 +148,16 @@ public class SparkAtomicActionJob {
|
||||||
.map((MapFunction<Tuple2<Project, EXCELTopic>, Project>) p -> {
|
.map((MapFunction<Tuple2<Project, EXCELTopic>, Project>) p -> {
|
||||||
Optional<EXCELTopic> op = Optional.ofNullable(p._2());
|
Optional<EXCELTopic> op = Optional.ofNullable(p._2());
|
||||||
Project rp = p._1();
|
Project rp = p._1();
|
||||||
if (op.isPresent()) {
|
op.ifPresent(excelTopic -> rp.setH2020topicdescription(excelTopic.getTitle()));
|
||||||
rp.setH2020topicdescription(op.get().getTitle());
|
|
||||||
}
|
|
||||||
return rp;
|
return rp;
|
||||||
}, Encoders.bean(Project.class))
|
}, Encoders.bean(Project.class))
|
||||||
.filter(Objects::nonNull)
|
.filter(Objects::nonNull)
|
||||||
.groupByKey(
|
.groupByKey(
|
||||||
(MapFunction<Project, String>) p -> p.getId(),
|
(MapFunction<Project, String>) OafEntity::getId,
|
||||||
Encoders.STRING())
|
Encoders.STRING())
|
||||||
.mapGroups((MapGroupsFunction<String, Project, Project>) (s, it) -> {
|
.mapGroups((MapGroupsFunction<String, Project, Project>) (s, it) -> {
|
||||||
Project first = it.next();
|
Project first = it.next();
|
||||||
it.forEachRemaining(p -> {
|
it.forEachRemaining(first::mergeFrom);
|
||||||
first.mergeFrom(p);
|
|
||||||
});
|
|
||||||
return first;
|
return first;
|
||||||
}, Encoders.bean(Project.class))
|
}, Encoders.bean(Project.class))
|
||||||
.toJavaRDD()
|
.toJavaRDD()
|
||||||
|
@ -189,12 +181,6 @@ public class SparkAtomicActionJob {
|
||||||
h2020Classification.getH2020Programme().setDescription(tmp[tmp.length - 1]);
|
h2020Classification.getH2020Programme().setDescription(tmp[tmp.length - 1]);
|
||||||
}
|
}
|
||||||
|
|
||||||
// private static void setProgramme(H2020Classification h2020Classification, String classification) {
|
|
||||||
// String[] tmp = classification.split(" \\| ");
|
|
||||||
//
|
|
||||||
// h2020Classification.getH2020Programme().setDescription(tmp[tmp.length - 1]);
|
|
||||||
// }
|
|
||||||
|
|
||||||
public static <R> Dataset<R> readPath(
|
public static <R> Dataset<R> readPath(
|
||||||
SparkSession spark, String inputPath, Class<R> clazz) {
|
SparkSession spark, String inputPath, Class<R> clazz) {
|
||||||
return spark
|
return spark
|
||||||
|
|
|
@ -1,40 +0,0 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.actionmanager.project.utils;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.Set;
|
|
||||||
|
|
||||||
import org.apache.commons.csv.CSVFormat;
|
|
||||||
import org.apache.commons.csv.CSVRecord;
|
|
||||||
import org.apache.commons.lang.reflect.FieldUtils;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Reads a generic csv and maps it into classes that mirror its schema
|
|
||||||
*/
|
|
||||||
public class CSVParser {
|
|
||||||
|
|
||||||
public <R> List<R> parse(String csvFile, String classForName)
|
|
||||||
throws ClassNotFoundException, IOException, IllegalAccessException, InstantiationException {
|
|
||||||
final CSVFormat format = CSVFormat.EXCEL
|
|
||||||
.withHeader()
|
|
||||||
.withDelimiter(';')
|
|
||||||
.withQuote('"')
|
|
||||||
.withTrim();
|
|
||||||
List<R> ret = new ArrayList<>();
|
|
||||||
final org.apache.commons.csv.CSVParser parser = org.apache.commons.csv.CSVParser.parse(csvFile, format);
|
|
||||||
final Set<String> headers = parser.getHeaderMap().keySet();
|
|
||||||
Class<?> clazz = Class.forName(classForName);
|
|
||||||
for (CSVRecord csvRecord : parser.getRecords()) {
|
|
||||||
final Object cc = clazz.newInstance();
|
|
||||||
for (String header : headers) {
|
|
||||||
FieldUtils.writeField(cc, header, csvRecord.get(header), true);
|
|
||||||
|
|
||||||
}
|
|
||||||
ret.add((R) cc);
|
|
||||||
}
|
|
||||||
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,200 +0,0 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.actionmanager.project.utils;
|
|
||||||
|
|
||||||
import java.io.Serializable;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* the mmodel for the projects csv file
|
|
||||||
*/
|
|
||||||
public class CSVProject implements Serializable {
|
|
||||||
private String rcn;
|
|
||||||
private String id;
|
|
||||||
private String acronym;
|
|
||||||
private String status;
|
|
||||||
private String programme;
|
|
||||||
private String topics;
|
|
||||||
private String frameworkProgramme;
|
|
||||||
private String title;
|
|
||||||
private String startDate;
|
|
||||||
private String endDate;
|
|
||||||
private String projectUrl;
|
|
||||||
private String objective;
|
|
||||||
private String totalCost;
|
|
||||||
private String ecMaxContribution;
|
|
||||||
private String call;
|
|
||||||
private String fundingScheme;
|
|
||||||
private String coordinator;
|
|
||||||
private String coordinatorCountry;
|
|
||||||
private String participants;
|
|
||||||
private String participantCountries;
|
|
||||||
private String subjects;
|
|
||||||
|
|
||||||
public String getRcn() {
|
|
||||||
return rcn;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setRcn(String rcn) {
|
|
||||||
this.rcn = rcn;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getId() {
|
|
||||||
return id;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setId(String id) {
|
|
||||||
this.id = id;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getAcronym() {
|
|
||||||
return acronym;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setAcronym(String acronym) {
|
|
||||||
this.acronym = acronym;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getStatus() {
|
|
||||||
return status;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setStatus(String status) {
|
|
||||||
this.status = status;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getProgramme() {
|
|
||||||
return programme;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setProgramme(String programme) {
|
|
||||||
this.programme = programme;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getTopics() {
|
|
||||||
return topics;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setTopics(String topics) {
|
|
||||||
this.topics = topics;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getFrameworkProgramme() {
|
|
||||||
return frameworkProgramme;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setFrameworkProgramme(String frameworkProgramme) {
|
|
||||||
this.frameworkProgramme = frameworkProgramme;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getTitle() {
|
|
||||||
return title;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setTitle(String title) {
|
|
||||||
this.title = title;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getStartDate() {
|
|
||||||
return startDate;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setStartDate(String startDate) {
|
|
||||||
this.startDate = startDate;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getEndDate() {
|
|
||||||
return endDate;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setEndDate(String endDate) {
|
|
||||||
this.endDate = endDate;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getProjectUrl() {
|
|
||||||
return projectUrl;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setProjectUrl(String projectUrl) {
|
|
||||||
this.projectUrl = projectUrl;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getObjective() {
|
|
||||||
return objective;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setObjective(String objective) {
|
|
||||||
this.objective = objective;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getTotalCost() {
|
|
||||||
return totalCost;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setTotalCost(String totalCost) {
|
|
||||||
this.totalCost = totalCost;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getEcMaxContribution() {
|
|
||||||
return ecMaxContribution;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setEcMaxContribution(String ecMaxContribution) {
|
|
||||||
this.ecMaxContribution = ecMaxContribution;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getCall() {
|
|
||||||
return call;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setCall(String call) {
|
|
||||||
this.call = call;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getFundingScheme() {
|
|
||||||
return fundingScheme;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setFundingScheme(String fundingScheme) {
|
|
||||||
this.fundingScheme = fundingScheme;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getCoordinator() {
|
|
||||||
return coordinator;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setCoordinator(String coordinator) {
|
|
||||||
this.coordinator = coordinator;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getCoordinatorCountry() {
|
|
||||||
return coordinatorCountry;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setCoordinatorCountry(String coordinatorCountry) {
|
|
||||||
this.coordinatorCountry = coordinatorCountry;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getParticipants() {
|
|
||||||
return participants;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setParticipants(String participants) {
|
|
||||||
this.participants = participants;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getParticipantCountries() {
|
|
||||||
return participantCountries;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setParticipantCountries(String participantCountries) {
|
|
||||||
this.participantCountries = participantCountries;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getSubjects() {
|
|
||||||
return subjects;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setSubjects(String subjects) {
|
|
||||||
this.subjects = subjects;
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
|
@ -17,6 +17,8 @@ import org.apache.poi.ss.usermodel.Row;
|
||||||
import org.apache.poi.xssf.usermodel.XSSFSheet;
|
import org.apache.poi.xssf.usermodel.XSSFSheet;
|
||||||
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
|
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.actionmanager.project.utils.model.EXCELTopic;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Reads a generic excel file and maps it into classes that mirror its schema
|
* Reads a generic excel file and maps it into classes that mirror its schema
|
||||||
*/
|
*/
|
||||||
|
@ -26,13 +28,12 @@ public class EXCELParser {
|
||||||
throws ClassNotFoundException, IOException, IllegalAccessException, InstantiationException,
|
throws ClassNotFoundException, IOException, IllegalAccessException, InstantiationException,
|
||||||
InvalidFormatException {
|
InvalidFormatException {
|
||||||
|
|
||||||
OPCPackage pkg = OPCPackage.open(file);
|
try (OPCPackage pkg = OPCPackage.open(file); XSSFWorkbook wb = new XSSFWorkbook(pkg)) {
|
||||||
XSSFWorkbook wb = new XSSFWorkbook(pkg);
|
|
||||||
|
|
||||||
XSSFSheet sheet = wb.getSheet(sheetName);
|
XSSFSheet sheet = wb.getSheet(sheetName);
|
||||||
|
|
||||||
if (sheetName == null) {
|
if (sheet == null) {
|
||||||
throw new RuntimeException("Sheet name " + sheetName + " not present in current file");
|
throw new IllegalArgumentException("Sheet name " + sheetName + " not present in current file");
|
||||||
}
|
}
|
||||||
|
|
||||||
List<R> ret = new ArrayList<>();
|
List<R> ret = new ArrayList<>();
|
||||||
|
@ -73,5 +74,6 @@ public class EXCELParser {
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,34 +1,21 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.actionmanager.project.utils;
|
package eu.dnetlib.dhp.actionmanager.project.utils;
|
||||||
|
|
||||||
import java.io.BufferedWriter;
|
import java.io.*;
|
||||||
import java.io.Closeable;
|
import java.util.Optional;
|
||||||
import java.io.IOException;
|
|
||||||
import java.io.OutputStreamWriter;
|
|
||||||
import java.nio.charset.StandardCharsets;
|
|
||||||
|
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
import org.apache.commons.logging.Log;
|
|
||||||
import org.apache.commons.logging.LogFactory;
|
|
||||||
import org.apache.hadoop.conf.Configuration;
|
import org.apache.hadoop.conf.Configuration;
|
||||||
import org.apache.hadoop.fs.FSDataOutputStream;
|
|
||||||
import org.apache.hadoop.fs.FileSystem;
|
import org.apache.hadoop.fs.FileSystem;
|
||||||
import org.apache.hadoop.fs.Path;
|
|
||||||
|
|
||||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
|
||||||
|
|
||||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
import eu.dnetlib.dhp.collection.HttpConnector2;
|
import eu.dnetlib.dhp.common.collection.GetCSV;
|
||||||
|
import eu.dnetlib.dhp.common.collection.HttpConnector2;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Applies the parsing of a csv file and writes the Serialization of it in hdfs
|
* Applies the parsing of a csv file and writes the Serialization of it in hdfs
|
||||||
*/
|
*/
|
||||||
public class ReadCSV implements Closeable {
|
public class ReadCSV {
|
||||||
private static final Log log = LogFactory.getLog(ReadCSV.class);
|
|
||||||
private final Configuration conf;
|
|
||||||
private final BufferedWriter writer;
|
|
||||||
private final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
|
||||||
private final String csvFile;
|
|
||||||
|
|
||||||
public static void main(final String[] args) throws Exception {
|
public static void main(final String[] args) throws Exception {
|
||||||
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
|
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
|
||||||
|
@ -44,56 +31,22 @@ public class ReadCSV implements Closeable {
|
||||||
final String hdfsPath = parser.get("hdfsPath");
|
final String hdfsPath = parser.get("hdfsPath");
|
||||||
final String hdfsNameNode = parser.get("hdfsNameNode");
|
final String hdfsNameNode = parser.get("hdfsNameNode");
|
||||||
final String classForName = parser.get("classForName");
|
final String classForName = parser.get("classForName");
|
||||||
|
Optional<String> delimiter = Optional.ofNullable(parser.get("delimiter"));
|
||||||
|
char del = ';';
|
||||||
|
if (delimiter.isPresent())
|
||||||
|
del = delimiter.get().charAt(0);
|
||||||
|
|
||||||
try (final ReadCSV readCSV = new ReadCSV(hdfsPath, hdfsNameNode, fileURL)) {
|
Configuration conf = new Configuration();
|
||||||
|
conf.set("fs.defaultFS", hdfsNameNode);
|
||||||
|
|
||||||
log.info("Getting CSV file...");
|
FileSystem fileSystem = FileSystem.get(conf);
|
||||||
readCSV.execute(classForName);
|
BufferedReader reader = new BufferedReader(
|
||||||
|
new InputStreamReader(new HttpConnector2().getInputSourceAsStream(fileURL)));
|
||||||
|
|
||||||
}
|
GetCSV.getCsv(fileSystem, reader, hdfsPath, classForName, del);
|
||||||
}
|
|
||||||
|
|
||||||
public void execute(final String classForName) throws Exception {
|
reader.close();
|
||||||
CSVParser csvParser = new CSVParser();
|
|
||||||
csvParser
|
|
||||||
.parse(csvFile, classForName)
|
|
||||||
.stream()
|
|
||||||
.forEach(p -> write(p));
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
|
||||||
public void close() throws IOException {
|
|
||||||
writer.close();
|
|
||||||
}
|
|
||||||
|
|
||||||
public ReadCSV(
|
|
||||||
final String hdfsPath,
|
|
||||||
final String hdfsNameNode,
|
|
||||||
final String fileURL)
|
|
||||||
throws Exception {
|
|
||||||
this.conf = new Configuration();
|
|
||||||
this.conf.set("fs.defaultFS", hdfsNameNode);
|
|
||||||
HttpConnector2 httpConnector = new HttpConnector2();
|
|
||||||
FileSystem fileSystem = FileSystem.get(this.conf);
|
|
||||||
Path hdfsWritePath = new Path(hdfsPath);
|
|
||||||
FSDataOutputStream fsDataOutputStream = null;
|
|
||||||
if (fileSystem.exists(hdfsWritePath)) {
|
|
||||||
fileSystem.delete(hdfsWritePath, false);
|
|
||||||
}
|
|
||||||
fsDataOutputStream = fileSystem.create(hdfsWritePath);
|
|
||||||
|
|
||||||
this.writer = new BufferedWriter(new OutputStreamWriter(fsDataOutputStream, StandardCharsets.UTF_8));
|
|
||||||
this.csvFile = httpConnector.getInputSource(fileURL);
|
|
||||||
}
|
|
||||||
|
|
||||||
protected void write(final Object p) {
|
|
||||||
try {
|
|
||||||
writer.write(OBJECT_MAPPER.writeValueAsString(p));
|
|
||||||
writer.newLine();
|
|
||||||
} catch (final Exception e) {
|
|
||||||
throw new RuntimeException(e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -11,18 +11,20 @@ import org.apache.hadoop.conf.Configuration;
|
||||||
import org.apache.hadoop.fs.FSDataOutputStream;
|
import org.apache.hadoop.fs.FSDataOutputStream;
|
||||||
import org.apache.hadoop.fs.FileSystem;
|
import org.apache.hadoop.fs.FileSystem;
|
||||||
import org.apache.hadoop.fs.Path;
|
import org.apache.hadoop.fs.Path;
|
||||||
|
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
|
||||||
|
|
||||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
import eu.dnetlib.dhp.collection.HttpConnector2;
|
import eu.dnetlib.dhp.common.collection.CollectorException;
|
||||||
|
import eu.dnetlib.dhp.common.collection.HttpConnector2;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Applies the parsing of an excel file and writes the Serialization of it in hdfs
|
* Applies the parsing of an excel file and writes the Serialization of it in hdfs
|
||||||
*/
|
*/
|
||||||
public class ReadExcel implements Closeable {
|
public class ReadExcel implements Closeable {
|
||||||
private static final Log log = LogFactory.getLog(ReadCSV.class);
|
private static final Log log = LogFactory.getLog(ReadExcel.class);
|
||||||
private final Configuration conf;
|
|
||||||
private final BufferedWriter writer;
|
private final BufferedWriter writer;
|
||||||
private final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
private final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
||||||
private final InputStream excelFile;
|
private final InputStream excelFile;
|
||||||
|
@ -31,7 +33,7 @@ public class ReadExcel implements Closeable {
|
||||||
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
|
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
|
||||||
IOUtils
|
IOUtils
|
||||||
.toString(
|
.toString(
|
||||||
ReadCSV.class
|
ReadExcel.class
|
||||||
.getResourceAsStream(
|
.getResourceAsStream(
|
||||||
"/eu/dnetlib/dhp/actionmanager/project/parameters.json")));
|
"/eu/dnetlib/dhp/actionmanager/project/parameters.json")));
|
||||||
|
|
||||||
|
@ -51,13 +53,15 @@ public class ReadExcel implements Closeable {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public void execute(final String classForName, final String sheetName) throws Exception {
|
public void execute(final String classForName, final String sheetName)
|
||||||
|
throws IOException, ClassNotFoundException, InvalidFormatException, IllegalAccessException,
|
||||||
|
InstantiationException {
|
||||||
|
|
||||||
EXCELParser excelParser = new EXCELParser();
|
EXCELParser excelParser = new EXCELParser();
|
||||||
excelParser
|
excelParser
|
||||||
.parse(excelFile, classForName, sheetName)
|
.parse(excelFile, classForName, sheetName)
|
||||||
.stream()
|
.stream()
|
||||||
.forEach(p -> write(p));
|
.forEach(this::write);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -68,20 +72,20 @@ public class ReadExcel implements Closeable {
|
||||||
public ReadExcel(
|
public ReadExcel(
|
||||||
final String hdfsPath,
|
final String hdfsPath,
|
||||||
final String hdfsNameNode,
|
final String hdfsNameNode,
|
||||||
final String fileURL)
|
final String fileURL) throws CollectorException, IOException {
|
||||||
throws Exception {
|
|
||||||
this.conf = new Configuration();
|
final Configuration conf = new Configuration();
|
||||||
this.conf.set("fs.defaultFS", hdfsNameNode);
|
conf.set("fs.defaultFS", hdfsNameNode);
|
||||||
HttpConnector2 httpConnector = new HttpConnector2();
|
HttpConnector2 httpConnector = new HttpConnector2();
|
||||||
FileSystem fileSystem = FileSystem.get(this.conf);
|
FileSystem fileSystem = FileSystem.get(conf);
|
||||||
Path hdfsWritePath = new Path(hdfsPath);
|
Path hdfsWritePath = new Path(hdfsPath);
|
||||||
FSDataOutputStream fsDataOutputStream = null;
|
|
||||||
if (fileSystem.exists(hdfsWritePath)) {
|
if (fileSystem.exists(hdfsWritePath)) {
|
||||||
fileSystem.delete(hdfsWritePath, false);
|
fileSystem.delete(hdfsWritePath, false);
|
||||||
}
|
}
|
||||||
fsDataOutputStream = fileSystem.create(hdfsWritePath);
|
FSDataOutputStream fos = fileSystem.create(hdfsWritePath);
|
||||||
|
|
||||||
this.writer = new BufferedWriter(new OutputStreamWriter(fsDataOutputStream, StandardCharsets.UTF_8));
|
this.writer = new BufferedWriter(new OutputStreamWriter(fos, StandardCharsets.UTF_8));
|
||||||
this.excelFile = httpConnector.getInputSourceAsStream(fileURL);
|
this.excelFile = httpConnector.getInputSourceAsStream(fileURL);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1,20 +1,32 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.actionmanager.project.utils;
|
package eu.dnetlib.dhp.actionmanager.project.utils.model;
|
||||||
|
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
|
|
||||||
|
import com.opencsv.bean.CsvBindByName;
|
||||||
|
import com.opencsv.bean.CsvIgnore;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The model for the programme csv file
|
* The model for the programme csv file
|
||||||
*/
|
*/
|
||||||
public class CSVProgramme implements Serializable {
|
public class CSVProgramme implements Serializable {
|
||||||
|
|
||||||
private String rcn;
|
@CsvBindByName(column = "code")
|
||||||
private String code;
|
private String code;
|
||||||
|
|
||||||
|
@CsvBindByName(column = "title")
|
||||||
private String title;
|
private String title;
|
||||||
|
|
||||||
|
@CsvBindByName(column = "shortTitle")
|
||||||
private String shortTitle;
|
private String shortTitle;
|
||||||
|
|
||||||
|
@CsvBindByName(column = "language")
|
||||||
private String language;
|
private String language;
|
||||||
|
|
||||||
|
@CsvIgnore
|
||||||
private String classification;
|
private String classification;
|
||||||
|
|
||||||
|
@CsvIgnore
|
||||||
private String classification_short;
|
private String classification_short;
|
||||||
|
|
||||||
public String getClassification_short() {
|
public String getClassification_short() {
|
||||||
|
@ -33,14 +45,6 @@ public class CSVProgramme implements Serializable {
|
||||||
this.classification = classification;
|
this.classification = classification;
|
||||||
}
|
}
|
||||||
|
|
||||||
public String getRcn() {
|
|
||||||
return rcn;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setRcn(String rcn) {
|
|
||||||
this.rcn = rcn;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getCode() {
|
public String getCode() {
|
||||||
return code;
|
return code;
|
||||||
}
|
}
|
||||||
|
@ -73,5 +77,4 @@ public class CSVProgramme implements Serializable {
|
||||||
this.language = language;
|
this.language = language;
|
||||||
}
|
}
|
||||||
|
|
||||||
//
|
|
||||||
}
|
}
|
|
@ -0,0 +1,46 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.actionmanager.project.utils.model;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
|
||||||
|
import com.opencsv.bean.CsvBindByName;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* the mmodel for the projects csv file
|
||||||
|
*/
|
||||||
|
public class CSVProject implements Serializable {
|
||||||
|
|
||||||
|
@CsvBindByName(column = "id")
|
||||||
|
private String id;
|
||||||
|
|
||||||
|
@CsvBindByName(column = "programme")
|
||||||
|
private String programme;
|
||||||
|
|
||||||
|
@CsvBindByName(column = "topics")
|
||||||
|
private String topics;
|
||||||
|
|
||||||
|
public String getId() {
|
||||||
|
return id;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setId(String id) {
|
||||||
|
this.id = id;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getProgramme() {
|
||||||
|
return programme;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setProgramme(String programme) {
|
||||||
|
this.programme = programme;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getTopics() {
|
||||||
|
return topics;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setTopics(String topics) {
|
||||||
|
this.topics = topics;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -1,5 +1,5 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.actionmanager.project.utils;
|
package eu.dnetlib.dhp.actionmanager.project.utils.model;
|
||||||
|
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
|
|
|
@ -9,6 +9,7 @@ import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.listKeyValues;
|
||||||
import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.qualifier;
|
import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.qualifier;
|
||||||
import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.structuredProperty;
|
import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.structuredProperty;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
import java.io.InputStream;
|
import java.io.InputStream;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
|
@ -74,7 +75,7 @@ public class GenerateRorActionSetJob {
|
||||||
|
|
||||||
final String jsonConfiguration = IOUtils
|
final String jsonConfiguration = IOUtils
|
||||||
.toString(
|
.toString(
|
||||||
SparkAtomicActionJob.class
|
GenerateRorActionSetJob.class
|
||||||
.getResourceAsStream("/eu/dnetlib/dhp/actionmanager/ror/action_set_parameters.json"));
|
.getResourceAsStream("/eu/dnetlib/dhp/actionmanager/ror/action_set_parameters.json"));
|
||||||
|
|
||||||
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
|
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
|
||||||
|
@ -108,7 +109,7 @@ public class GenerateRorActionSetJob {
|
||||||
|
|
||||||
private static void processRorOrganizations(final SparkSession spark,
|
private static void processRorOrganizations(final SparkSession spark,
|
||||||
final String inputPath,
|
final String inputPath,
|
||||||
final String outputPath) throws Exception {
|
final String outputPath) throws IOException {
|
||||||
|
|
||||||
readInputPath(spark, inputPath)
|
readInputPath(spark, inputPath)
|
||||||
.map(
|
.map(
|
||||||
|
@ -203,7 +204,7 @@ public class GenerateRorActionSetJob {
|
||||||
|
|
||||||
private static Dataset<RorOrganization> readInputPath(
|
private static Dataset<RorOrganization> readInputPath(
|
||||||
final SparkSession spark,
|
final SparkSession spark,
|
||||||
final String path) throws Exception {
|
final String path) throws IOException {
|
||||||
|
|
||||||
try (final FileSystem fileSystem = FileSystem.get(new Configuration());
|
try (final FileSystem fileSystem = FileSystem.get(new Configuration());
|
||||||
final InputStream is = fileSystem.open(new Path(path))) {
|
final InputStream is = fileSystem.open(new Path(path))) {
|
||||||
|
|
|
@ -7,6 +7,8 @@ import com.fasterxml.jackson.annotation.JsonProperty;
|
||||||
|
|
||||||
public class Address implements Serializable {
|
public class Address implements Serializable {
|
||||||
|
|
||||||
|
private static final long serialVersionUID = 2444635485253443195L;
|
||||||
|
|
||||||
@JsonProperty("lat")
|
@JsonProperty("lat")
|
||||||
private Float lat;
|
private Float lat;
|
||||||
|
|
||||||
|
@ -37,8 +39,6 @@ public class Address implements Serializable {
|
||||||
@JsonProperty("line")
|
@JsonProperty("line")
|
||||||
private String line;
|
private String line;
|
||||||
|
|
||||||
private final static long serialVersionUID = 2444635485253443195L;
|
|
||||||
|
|
||||||
public Float getLat() {
|
public Float getLat() {
|
||||||
return lat;
|
return lat;
|
||||||
}
|
}
|
||||||
|
|
|
@ -7,14 +7,14 @@ import com.fasterxml.jackson.annotation.JsonProperty;
|
||||||
|
|
||||||
public class Country implements Serializable {
|
public class Country implements Serializable {
|
||||||
|
|
||||||
|
private static final long serialVersionUID = 4357848706229493627L;
|
||||||
|
|
||||||
@JsonProperty("country_code")
|
@JsonProperty("country_code")
|
||||||
private String countryCode;
|
private String countryCode;
|
||||||
|
|
||||||
@JsonProperty("country_name")
|
@JsonProperty("country_name")
|
||||||
private String countryName;
|
private String countryName;
|
||||||
|
|
||||||
private final static long serialVersionUID = 4357848706229493627L;
|
|
||||||
|
|
||||||
public String getCountryCode() {
|
public String getCountryCode() {
|
||||||
return countryCode;
|
return countryCode;
|
||||||
}
|
}
|
||||||
|
|
|
@ -13,7 +13,7 @@ public class ExternalIdType implements Serializable {
|
||||||
|
|
||||||
private String preferred;
|
private String preferred;
|
||||||
|
|
||||||
private final static long serialVersionUID = 2616688352998387611L;
|
private static final long serialVersionUID = 2616688352998387611L;
|
||||||
|
|
||||||
public ExternalIdType() {
|
public ExternalIdType() {
|
||||||
}
|
}
|
||||||
|
|
|
@ -15,8 +15,7 @@ import com.fasterxml.jackson.databind.JsonNode;
|
||||||
public class ExternalIdTypeDeserializer extends JsonDeserializer<ExternalIdType> {
|
public class ExternalIdTypeDeserializer extends JsonDeserializer<ExternalIdType> {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public ExternalIdType deserialize(final JsonParser p, final DeserializationContext ctxt)
|
public ExternalIdType deserialize(final JsonParser p, final DeserializationContext ctxt) throws IOException {
|
||||||
throws IOException, JsonProcessingException {
|
|
||||||
final ObjectCodec oc = p.getCodec();
|
final ObjectCodec oc = p.getCodec();
|
||||||
final JsonNode node = oc.readTree(p);
|
final JsonNode node = oc.readTree(p);
|
||||||
|
|
||||||
|
|
|
@ -19,7 +19,7 @@ public class GeonamesAdmin implements Serializable {
|
||||||
@JsonProperty("code")
|
@JsonProperty("code")
|
||||||
private String code;
|
private String code;
|
||||||
|
|
||||||
private final static long serialVersionUID = 7294958526269195673L;
|
private static final long serialVersionUID = 7294958526269195673L;
|
||||||
|
|
||||||
public String getAsciiName() {
|
public String getAsciiName() {
|
||||||
return asciiName;
|
return asciiName;
|
||||||
|
|
|
@ -31,7 +31,7 @@ public class GeonamesCity implements Serializable {
|
||||||
@JsonProperty("license")
|
@JsonProperty("license")
|
||||||
private License license;
|
private License license;
|
||||||
|
|
||||||
private final static long serialVersionUID = -8389480201526252955L;
|
private static final long serialVersionUID = -8389480201526252955L;
|
||||||
|
|
||||||
public NameAndCode getNutsLevel2() {
|
public NameAndCode getNutsLevel2() {
|
||||||
return nutsLevel2;
|
return nutsLevel2;
|
||||||
|
|
|
@ -13,7 +13,7 @@ public class Label implements Serializable {
|
||||||
@JsonProperty("label")
|
@JsonProperty("label")
|
||||||
private String label;
|
private String label;
|
||||||
|
|
||||||
private final static long serialVersionUID = -6576156103297850809L;
|
private static final long serialVersionUID = -6576156103297850809L;
|
||||||
|
|
||||||
public String getIso639() {
|
public String getIso639() {
|
||||||
return iso639;
|
return iso639;
|
||||||
|
|
|
@ -13,7 +13,7 @@ public class License implements Serializable {
|
||||||
@JsonProperty("license")
|
@JsonProperty("license")
|
||||||
private String license;
|
private String license;
|
||||||
|
|
||||||
private final static long serialVersionUID = -194308261058176439L;
|
private static final long serialVersionUID = -194308261058176439L;
|
||||||
|
|
||||||
public String getAttribution() {
|
public String getAttribution() {
|
||||||
return attribution;
|
return attribution;
|
||||||
|
|
|
@ -7,14 +7,14 @@ import com.fasterxml.jackson.annotation.JsonProperty;
|
||||||
|
|
||||||
public class NameAndCode implements Serializable {
|
public class NameAndCode implements Serializable {
|
||||||
|
|
||||||
|
private static final long serialVersionUID = 5459836979206140843L;
|
||||||
|
|
||||||
@JsonProperty("name")
|
@JsonProperty("name")
|
||||||
private String name;
|
private String name;
|
||||||
|
|
||||||
@JsonProperty("code")
|
@JsonProperty("code")
|
||||||
private String code;
|
private String code;
|
||||||
|
|
||||||
private final static long serialVersionUID = 5459836979206140843L;
|
|
||||||
|
|
||||||
public String getName() {
|
public String getName() {
|
||||||
return name;
|
return name;
|
||||||
}
|
}
|
||||||
|
|
|
@ -7,6 +7,8 @@ import com.fasterxml.jackson.annotation.JsonProperty;
|
||||||
|
|
||||||
public class Relationship implements Serializable {
|
public class Relationship implements Serializable {
|
||||||
|
|
||||||
|
private static final long serialVersionUID = 7847399503395576960L;
|
||||||
|
|
||||||
@JsonProperty("type")
|
@JsonProperty("type")
|
||||||
private String type;
|
private String type;
|
||||||
|
|
||||||
|
@ -16,8 +18,6 @@ public class Relationship implements Serializable {
|
||||||
@JsonProperty("label")
|
@JsonProperty("label")
|
||||||
private String label;
|
private String label;
|
||||||
|
|
||||||
private final static long serialVersionUID = 7847399503395576960L;
|
|
||||||
|
|
||||||
public String getType() {
|
public String getType() {
|
||||||
return type;
|
return type;
|
||||||
}
|
}
|
||||||
|
|
|
@ -11,6 +11,8 @@ import com.fasterxml.jackson.annotation.JsonProperty;
|
||||||
|
|
||||||
public class RorOrganization implements Serializable {
|
public class RorOrganization implements Serializable {
|
||||||
|
|
||||||
|
private static final long serialVersionUID = -2658312087616043225L;
|
||||||
|
|
||||||
@JsonProperty("ip_addresses")
|
@JsonProperty("ip_addresses")
|
||||||
private List<String> ipAddresses = new ArrayList<>();
|
private List<String> ipAddresses = new ArrayList<>();
|
||||||
|
|
||||||
|
@ -59,8 +61,6 @@ public class RorOrganization implements Serializable {
|
||||||
@JsonProperty("status")
|
@JsonProperty("status")
|
||||||
private String status;
|
private String status;
|
||||||
|
|
||||||
private final static long serialVersionUID = -2658312087616043225L;
|
|
||||||
|
|
||||||
public List<String> getIpAddresses() {
|
public List<String> getIpAddresses() {
|
||||||
return ipAddresses;
|
return ipAddresses;
|
||||||
}
|
}
|
||||||
|
|
|
@ -6,6 +6,8 @@ import java.util.concurrent.Executors;
|
||||||
import java.util.concurrent.ScheduledExecutorService;
|
import java.util.concurrent.ScheduledExecutorService;
|
||||||
import java.util.concurrent.TimeUnit;
|
import java.util.concurrent.TimeUnit;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.common.aggregation.AggregatorReport;
|
||||||
|
|
||||||
public abstract class ReportingJob {
|
public abstract class ReportingJob {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -22,7 +24,7 @@ public abstract class ReportingJob {
|
||||||
|
|
||||||
protected final AggregatorReport report;
|
protected final AggregatorReport report;
|
||||||
|
|
||||||
public ReportingJob(AggregatorReport report) {
|
protected ReportingJob(AggregatorReport report) {
|
||||||
this.report = report;
|
this.report = report;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -25,7 +25,7 @@ public class MDStoreActionNode {
|
||||||
NEW_VERSION, ROLLBACK, COMMIT, READ_LOCK, READ_UNLOCK
|
NEW_VERSION, ROLLBACK, COMMIT, READ_LOCK, READ_UNLOCK
|
||||||
}
|
}
|
||||||
|
|
||||||
public static String NEW_VERSION_URI = "%s/mdstore/%s/newVersion";
|
public static final String NEW_VERSION_URI = "%s/mdstore/%s/newVersion";
|
||||||
|
|
||||||
public static final String COMMIT_VERSION_URL = "%s/version/%s/commit/%s";
|
public static final String COMMIT_VERSION_URL = "%s/version/%s/commit/%s";
|
||||||
public static final String ROLLBACK_VERSION_URL = "%s/version/%s/abort";
|
public static final String ROLLBACK_VERSION_URL = "%s/version/%s/abort";
|
||||||
|
@ -70,7 +70,7 @@ public class MDStoreActionNode {
|
||||||
if (StringUtils.isBlank(hdfsuri)) {
|
if (StringUtils.isBlank(hdfsuri)) {
|
||||||
throw new IllegalArgumentException("missing or empty argument namenode");
|
throw new IllegalArgumentException("missing or empty argument namenode");
|
||||||
}
|
}
|
||||||
final String mdStoreVersion_params = argumentParser.get("mdStoreVersion");
|
final String mdStoreVersion_params = argumentParser.get(MDSTOREVERSIONPARAM);
|
||||||
final MDStoreVersion mdStoreVersion = MAPPER.readValue(mdStoreVersion_params, MDStoreVersion.class);
|
final MDStoreVersion mdStoreVersion = MAPPER.readValue(mdStoreVersion_params, MDStoreVersion.class);
|
||||||
|
|
||||||
if (StringUtils.isBlank(mdStoreVersion.getId())) {
|
if (StringUtils.isBlank(mdStoreVersion.getId())) {
|
||||||
|
@ -94,7 +94,7 @@ public class MDStoreActionNode {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case ROLLBACK: {
|
case ROLLBACK: {
|
||||||
final String mdStoreVersion_params = argumentParser.get("mdStoreVersion");
|
final String mdStoreVersion_params = argumentParser.get(MDSTOREVERSIONPARAM);
|
||||||
final MDStoreVersion mdStoreVersion = MAPPER.readValue(mdStoreVersion_params, MDStoreVersion.class);
|
final MDStoreVersion mdStoreVersion = MAPPER.readValue(mdStoreVersion_params, MDStoreVersion.class);
|
||||||
|
|
||||||
if (StringUtils.isBlank(mdStoreVersion.getId())) {
|
if (StringUtils.isBlank(mdStoreVersion.getId())) {
|
||||||
|
|
|
@ -16,7 +16,6 @@ import org.apache.hadoop.io.compress.DeflateCodec;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.aggregation.common.AggregatorReport;
|
|
||||||
import eu.dnetlib.dhp.aggregation.common.ReporterCallback;
|
import eu.dnetlib.dhp.aggregation.common.ReporterCallback;
|
||||||
import eu.dnetlib.dhp.aggregation.common.ReportingJob;
|
import eu.dnetlib.dhp.aggregation.common.ReportingJob;
|
||||||
import eu.dnetlib.dhp.collection.plugin.CollectorPlugin;
|
import eu.dnetlib.dhp.collection.plugin.CollectorPlugin;
|
||||||
|
@ -24,6 +23,9 @@ import eu.dnetlib.dhp.collection.plugin.mongodb.MDStoreCollectorPlugin;
|
||||||
import eu.dnetlib.dhp.collection.plugin.mongodb.MongoDbDumpCollectorPlugin;
|
import eu.dnetlib.dhp.collection.plugin.mongodb.MongoDbDumpCollectorPlugin;
|
||||||
import eu.dnetlib.dhp.collection.plugin.oai.OaiCollectorPlugin;
|
import eu.dnetlib.dhp.collection.plugin.oai.OaiCollectorPlugin;
|
||||||
import eu.dnetlib.dhp.collection.plugin.rest.RestCollectorPlugin;
|
import eu.dnetlib.dhp.collection.plugin.rest.RestCollectorPlugin;
|
||||||
|
import eu.dnetlib.dhp.common.aggregation.AggregatorReport;
|
||||||
|
import eu.dnetlib.dhp.common.collection.CollectorException;
|
||||||
|
import eu.dnetlib.dhp.common.collection.HttpClientParams;
|
||||||
import eu.dnetlib.dhp.schema.mdstore.MDStoreVersion;
|
import eu.dnetlib.dhp.schema.mdstore.MDStoreVersion;
|
||||||
|
|
||||||
public class CollectorWorker extends ReportingJob {
|
public class CollectorWorker extends ReportingJob {
|
||||||
|
@ -116,7 +118,7 @@ public class CollectorWorker extends ReportingJob {
|
||||||
final CollectorPlugin.NAME.OTHER_NAME plugin = Optional
|
final CollectorPlugin.NAME.OTHER_NAME plugin = Optional
|
||||||
.ofNullable(api.getParams().get("other_plugin_type"))
|
.ofNullable(api.getParams().get("other_plugin_type"))
|
||||||
.map(CollectorPlugin.NAME.OTHER_NAME::valueOf)
|
.map(CollectorPlugin.NAME.OTHER_NAME::valueOf)
|
||||||
.get();
|
.orElseThrow(() -> new IllegalArgumentException("invalid other_plugin_type"));
|
||||||
|
|
||||||
switch (plugin) {
|
switch (plugin) {
|
||||||
case mdstore_mongodb_dump:
|
case mdstore_mongodb_dump:
|
||||||
|
|
|
@ -13,8 +13,10 @@ import org.apache.hadoop.fs.FileSystem;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.aggregation.common.AggregatorReport;
|
|
||||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
|
import eu.dnetlib.dhp.common.aggregation.AggregatorReport;
|
||||||
|
import eu.dnetlib.dhp.common.collection.CollectorException;
|
||||||
|
import eu.dnetlib.dhp.common.collection.HttpClientParams;
|
||||||
import eu.dnetlib.dhp.message.MessageSender;
|
import eu.dnetlib.dhp.message.MessageSender;
|
||||||
import eu.dnetlib.dhp.schema.mdstore.MDStoreVersion;
|
import eu.dnetlib.dhp.schema.mdstore.MDStoreVersion;
|
||||||
|
|
||||||
|
|
|
@ -207,6 +207,7 @@ public class GenerateNativeStoreSparkJob {
|
||||||
totalItems.add(1);
|
totalItems.add(1);
|
||||||
try {
|
try {
|
||||||
SAXReader reader = new SAXReader();
|
SAXReader reader = new SAXReader();
|
||||||
|
reader.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true);
|
||||||
Document document = reader.read(new ByteArrayInputStream(input.getBytes(StandardCharsets.UTF_8)));
|
Document document = reader.read(new ByteArrayInputStream(input.getBytes(StandardCharsets.UTF_8)));
|
||||||
Node node = document.selectSingleNode(xpath);
|
Node node = document.selectSingleNode(xpath);
|
||||||
final String originalIdentifier = node.getText();
|
final String originalIdentifier = node.getText();
|
||||||
|
|
|
@ -3,9 +3,9 @@ package eu.dnetlib.dhp.collection.plugin;
|
||||||
|
|
||||||
import java.util.stream.Stream;
|
import java.util.stream.Stream;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.aggregation.common.AggregatorReport;
|
|
||||||
import eu.dnetlib.dhp.collection.ApiDescriptor;
|
import eu.dnetlib.dhp.collection.ApiDescriptor;
|
||||||
import eu.dnetlib.dhp.collection.CollectorException;
|
import eu.dnetlib.dhp.common.aggregation.AggregatorReport;
|
||||||
|
import eu.dnetlib.dhp.common.collection.CollectorException;
|
||||||
|
|
||||||
public interface CollectorPlugin {
|
public interface CollectorPlugin {
|
||||||
|
|
||||||
|
|
|
@ -11,15 +11,13 @@ import org.bson.Document;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
import com.mongodb.MongoClient;
|
|
||||||
import com.mongodb.MongoClientURI;
|
|
||||||
import com.mongodb.client.MongoCollection;
|
import com.mongodb.client.MongoCollection;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.aggregation.common.AggregatorReport;
|
|
||||||
import eu.dnetlib.dhp.collection.ApiDescriptor;
|
import eu.dnetlib.dhp.collection.ApiDescriptor;
|
||||||
import eu.dnetlib.dhp.collection.CollectorException;
|
|
||||||
import eu.dnetlib.dhp.collection.plugin.CollectorPlugin;
|
import eu.dnetlib.dhp.collection.plugin.CollectorPlugin;
|
||||||
import eu.dnetlib.dhp.common.MdstoreClient;
|
import eu.dnetlib.dhp.common.MdstoreClient;
|
||||||
|
import eu.dnetlib.dhp.common.aggregation.AggregatorReport;
|
||||||
|
import eu.dnetlib.dhp.common.collection.CollectorException;
|
||||||
|
|
||||||
public class MDStoreCollectorPlugin implements CollectorPlugin {
|
public class MDStoreCollectorPlugin implements CollectorPlugin {
|
||||||
|
|
||||||
|
|
|
@ -12,10 +12,10 @@ import java.util.zip.GZIPInputStream;
|
||||||
import org.apache.hadoop.fs.FileSystem;
|
import org.apache.hadoop.fs.FileSystem;
|
||||||
import org.apache.hadoop.fs.Path;
|
import org.apache.hadoop.fs.Path;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.aggregation.common.AggregatorReport;
|
|
||||||
import eu.dnetlib.dhp.collection.ApiDescriptor;
|
import eu.dnetlib.dhp.collection.ApiDescriptor;
|
||||||
import eu.dnetlib.dhp.collection.CollectorException;
|
|
||||||
import eu.dnetlib.dhp.collection.plugin.CollectorPlugin;
|
import eu.dnetlib.dhp.collection.plugin.CollectorPlugin;
|
||||||
|
import eu.dnetlib.dhp.common.aggregation.AggregatorReport;
|
||||||
|
import eu.dnetlib.dhp.common.collection.CollectorException;
|
||||||
import eu.dnetlib.dhp.utils.DHPUtils;
|
import eu.dnetlib.dhp.utils.DHPUtils;
|
||||||
|
|
||||||
public class MongoDbDumpCollectorPlugin implements CollectorPlugin {
|
public class MongoDbDumpCollectorPlugin implements CollectorPlugin {
|
||||||
|
@ -23,7 +23,7 @@ public class MongoDbDumpCollectorPlugin implements CollectorPlugin {
|
||||||
public static final String PATH_PARAM = "path";
|
public static final String PATH_PARAM = "path";
|
||||||
public static final String BODY_JSONPATH = "$.body";
|
public static final String BODY_JSONPATH = "$.body";
|
||||||
|
|
||||||
public FileSystem fileSystem;
|
private final FileSystem fileSystem;
|
||||||
|
|
||||||
public MongoDbDumpCollectorPlugin(FileSystem fileSystem) {
|
public MongoDbDumpCollectorPlugin(FileSystem fileSystem) {
|
||||||
this.fileSystem = fileSystem;
|
this.fileSystem = fileSystem;
|
||||||
|
|
|
@ -13,11 +13,11 @@ import com.google.common.base.Splitter;
|
||||||
import com.google.common.collect.Iterators;
|
import com.google.common.collect.Iterators;
|
||||||
import com.google.common.collect.Lists;
|
import com.google.common.collect.Lists;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.aggregation.common.AggregatorReport;
|
|
||||||
import eu.dnetlib.dhp.collection.ApiDescriptor;
|
import eu.dnetlib.dhp.collection.ApiDescriptor;
|
||||||
import eu.dnetlib.dhp.collection.CollectorException;
|
|
||||||
import eu.dnetlib.dhp.collection.HttpClientParams;
|
|
||||||
import eu.dnetlib.dhp.collection.plugin.CollectorPlugin;
|
import eu.dnetlib.dhp.collection.plugin.CollectorPlugin;
|
||||||
|
import eu.dnetlib.dhp.common.aggregation.AggregatorReport;
|
||||||
|
import eu.dnetlib.dhp.common.collection.CollectorException;
|
||||||
|
import eu.dnetlib.dhp.common.collection.HttpClientParams;
|
||||||
|
|
||||||
public class OaiCollectorPlugin implements CollectorPlugin {
|
public class OaiCollectorPlugin implements CollectorPlugin {
|
||||||
|
|
||||||
|
@ -66,11 +66,11 @@ public class OaiCollectorPlugin implements CollectorPlugin {
|
||||||
}
|
}
|
||||||
|
|
||||||
if (fromDate != null && !fromDate.matches(DATE_REGEX) && !fromDate.matches(UTC_DATETIME_REGEX)) {
|
if (fromDate != null && !fromDate.matches(DATE_REGEX) && !fromDate.matches(UTC_DATETIME_REGEX)) {
|
||||||
throw new CollectorException("Invalid date (YYYY-MM-DD): " + fromDate);
|
throw new CollectorException("Invalid date (YYYY-MM-DD or YYYY-MM-DDT00:00:00Z): " + fromDate);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (untilDate != null && !untilDate.matches(DATE_REGEX) && !untilDate.matches(UTC_DATETIME_REGEX)) {
|
if (untilDate != null && !untilDate.matches(DATE_REGEX) && !untilDate.matches(UTC_DATETIME_REGEX)) {
|
||||||
throw new CollectorException("Invalid date (YYYY-MM-DD): " + untilDate);
|
throw new CollectorException("Invalid date (YYYY-MM-DD or YYYY-MM-DDT00:00:00Z): " + untilDate);
|
||||||
}
|
}
|
||||||
|
|
||||||
final Iterator<Iterator<String>> iters = sets
|
final Iterator<Iterator<String>> iters = sets
|
||||||
|
|
|
@ -2,7 +2,6 @@
|
||||||
package eu.dnetlib.dhp.collection.plugin.oai;
|
package eu.dnetlib.dhp.collection.plugin.oai;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.StringReader;
|
|
||||||
import java.io.StringWriter;
|
import java.io.StringWriter;
|
||||||
import java.io.UnsupportedEncodingException;
|
import java.io.UnsupportedEncodingException;
|
||||||
import java.net.URLEncoder;
|
import java.net.URLEncoder;
|
||||||
|
@ -16,21 +15,21 @@ import org.dom4j.DocumentException;
|
||||||
import org.dom4j.DocumentHelper;
|
import org.dom4j.DocumentHelper;
|
||||||
import org.dom4j.Node;
|
import org.dom4j.Node;
|
||||||
import org.dom4j.io.OutputFormat;
|
import org.dom4j.io.OutputFormat;
|
||||||
import org.dom4j.io.SAXReader;
|
|
||||||
import org.dom4j.io.XMLWriter;
|
import org.dom4j.io.XMLWriter;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.aggregation.common.AggregatorReport;
|
|
||||||
import eu.dnetlib.dhp.collection.CollectorException;
|
|
||||||
import eu.dnetlib.dhp.collection.HttpConnector2;
|
|
||||||
import eu.dnetlib.dhp.collection.XmlCleaner;
|
import eu.dnetlib.dhp.collection.XmlCleaner;
|
||||||
|
import eu.dnetlib.dhp.common.aggregation.AggregatorReport;
|
||||||
|
import eu.dnetlib.dhp.common.collection.CollectorException;
|
||||||
|
import eu.dnetlib.dhp.common.collection.HttpConnector2;
|
||||||
|
|
||||||
public class OaiIterator implements Iterator<String> {
|
public class OaiIterator implements Iterator<String> {
|
||||||
|
|
||||||
private static final Logger log = LoggerFactory.getLogger(OaiIterator.class);
|
private static final Logger log = LoggerFactory.getLogger(OaiIterator.class);
|
||||||
|
|
||||||
private final static String REPORT_PREFIX = "oai:";
|
private static final String REPORT_PREFIX = "oai:";
|
||||||
|
public static final String UTF_8 = "UTF-8";
|
||||||
|
|
||||||
private final Queue<String> queue = new PriorityBlockingQueue<>();
|
private final Queue<String> queue = new PriorityBlockingQueue<>();
|
||||||
|
|
||||||
|
@ -68,7 +67,7 @@ public class OaiIterator implements Iterator<String> {
|
||||||
try {
|
try {
|
||||||
this.token = firstPage();
|
this.token = firstPage();
|
||||||
} catch (final CollectorException e) {
|
} catch (final CollectorException e) {
|
||||||
throw new RuntimeException(e);
|
throw new IllegalStateException(e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -90,7 +89,7 @@ public class OaiIterator implements Iterator<String> {
|
||||||
try {
|
try {
|
||||||
token = otherPages(token);
|
token = otherPages(token);
|
||||||
} catch (final CollectorException e) {
|
} catch (final CollectorException e) {
|
||||||
throw new RuntimeException(e);
|
throw new IllegalStateException(e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return res;
|
return res;
|
||||||
|
@ -99,23 +98,24 @@ public class OaiIterator implements Iterator<String> {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void remove() {
|
public void remove() {
|
||||||
|
throw new UnsupportedOperationException();
|
||||||
}
|
}
|
||||||
|
|
||||||
private String firstPage() throws CollectorException {
|
private String firstPage() throws CollectorException {
|
||||||
try {
|
try {
|
||||||
String url = baseUrl + "?verb=ListRecords&metadataPrefix=" + URLEncoder.encode(mdFormat, "UTF-8");
|
String url = baseUrl + "?verb=ListRecords&metadataPrefix=" + URLEncoder.encode(mdFormat, UTF_8);
|
||||||
if (set != null && !set.isEmpty()) {
|
if (set != null && !set.isEmpty()) {
|
||||||
url += "&set=" + URLEncoder.encode(set, "UTF-8");
|
url += "&set=" + URLEncoder.encode(set, UTF_8);
|
||||||
}
|
}
|
||||||
if (fromDate != null && (fromDate.matches(OaiCollectorPlugin.DATE_REGEX)
|
if (fromDate != null && (fromDate.matches(OaiCollectorPlugin.DATE_REGEX)
|
||||||
|| fromDate.matches(OaiCollectorPlugin.UTC_DATETIME_REGEX))) {
|
|| fromDate.matches(OaiCollectorPlugin.UTC_DATETIME_REGEX))) {
|
||||||
url += "&from=" + URLEncoder.encode(fromDate, "UTF-8");
|
url += "&from=" + URLEncoder.encode(fromDate, UTF_8);
|
||||||
}
|
}
|
||||||
if (untilDate != null && (untilDate.matches(OaiCollectorPlugin.DATE_REGEX)
|
if (untilDate != null && (untilDate.matches(OaiCollectorPlugin.DATE_REGEX)
|
||||||
|| untilDate.matches(OaiCollectorPlugin.UTC_DATETIME_REGEX))) {
|
|| untilDate.matches(OaiCollectorPlugin.UTC_DATETIME_REGEX))) {
|
||||||
url += "&until=" + URLEncoder.encode(untilDate, "UTF-8");
|
url += "&until=" + URLEncoder.encode(untilDate, UTF_8);
|
||||||
}
|
}
|
||||||
log.info("Start harvesting using url: " + url);
|
log.info("Start harvesting using url: {}", url);
|
||||||
|
|
||||||
return downloadPage(url);
|
return downloadPage(url);
|
||||||
} catch (final UnsupportedEncodingException e) {
|
} catch (final UnsupportedEncodingException e) {
|
||||||
|
@ -143,7 +143,7 @@ public class OaiIterator implements Iterator<String> {
|
||||||
return downloadPage(
|
return downloadPage(
|
||||||
baseUrl
|
baseUrl
|
||||||
+ "?verb=ListRecords&resumptionToken="
|
+ "?verb=ListRecords&resumptionToken="
|
||||||
+ URLEncoder.encode(resumptionToken, "UTF-8"));
|
+ URLEncoder.encode(resumptionToken, UTF_8));
|
||||||
} catch (final UnsupportedEncodingException e) {
|
} catch (final UnsupportedEncodingException e) {
|
||||||
report.put(e.getClass().getName(), e.getMessage());
|
report.put(e.getClass().getName(), e.getMessage());
|
||||||
throw new CollectorException(e);
|
throw new CollectorException(e);
|
||||||
|
@ -161,7 +161,7 @@ public class OaiIterator implements Iterator<String> {
|
||||||
report.put(e.getClass().getName(), e.getMessage());
|
report.put(e.getClass().getName(), e.getMessage());
|
||||||
final String cleaned = XmlCleaner.cleanAllEntities(xml);
|
final String cleaned = XmlCleaner.cleanAllEntities(xml);
|
||||||
try {
|
try {
|
||||||
doc = DocumentHelper.parseText(xml);
|
doc = DocumentHelper.parseText(cleaned);
|
||||||
} catch (final DocumentException e1) {
|
} catch (final DocumentException e1) {
|
||||||
final String resumptionToken = extractResumptionToken(xml);
|
final String resumptionToken = extractResumptionToken(xml);
|
||||||
if (resumptionToken == null) {
|
if (resumptionToken == null) {
|
||||||
|
|
|
@ -3,9 +3,9 @@ package eu.dnetlib.dhp.collection.plugin.oai;
|
||||||
|
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.aggregation.common.AggregatorReport;
|
import eu.dnetlib.dhp.common.aggregation.AggregatorReport;
|
||||||
import eu.dnetlib.dhp.collection.HttpClientParams;
|
import eu.dnetlib.dhp.common.collection.HttpClientParams;
|
||||||
import eu.dnetlib.dhp.collection.HttpConnector2;
|
import eu.dnetlib.dhp.common.collection.HttpConnector2;
|
||||||
|
|
||||||
public class OaiIteratorFactory {
|
public class OaiIteratorFactory {
|
||||||
|
|
||||||
|
|
|
@ -9,11 +9,11 @@ import java.util.stream.StreamSupport;
|
||||||
|
|
||||||
import org.apache.commons.lang3.StringUtils;
|
import org.apache.commons.lang3.StringUtils;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.aggregation.common.AggregatorReport;
|
|
||||||
import eu.dnetlib.dhp.collection.ApiDescriptor;
|
import eu.dnetlib.dhp.collection.ApiDescriptor;
|
||||||
import eu.dnetlib.dhp.collection.CollectorException;
|
|
||||||
import eu.dnetlib.dhp.collection.HttpClientParams;
|
|
||||||
import eu.dnetlib.dhp.collection.plugin.CollectorPlugin;
|
import eu.dnetlib.dhp.collection.plugin.CollectorPlugin;
|
||||||
|
import eu.dnetlib.dhp.common.aggregation.AggregatorReport;
|
||||||
|
import eu.dnetlib.dhp.common.collection.CollectorException;
|
||||||
|
import eu.dnetlib.dhp.common.collection.HttpClientParams;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* TODO: delegate HTTP requests to the common HttpConnector2 implementation.
|
* TODO: delegate HTTP requests to the common HttpConnector2 implementation.
|
||||||
|
|
|
@ -30,9 +30,9 @@ import org.w3c.dom.Node;
|
||||||
import org.w3c.dom.NodeList;
|
import org.w3c.dom.NodeList;
|
||||||
import org.xml.sax.InputSource;
|
import org.xml.sax.InputSource;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.collection.CollectorException;
|
|
||||||
import eu.dnetlib.dhp.collection.HttpClientParams;
|
|
||||||
import eu.dnetlib.dhp.collection.JsonUtils;
|
import eu.dnetlib.dhp.collection.JsonUtils;
|
||||||
|
import eu.dnetlib.dhp.common.collection.CollectorException;
|
||||||
|
import eu.dnetlib.dhp.common.collection.HttpClientParams;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* log.info(...) equal to log.trace(...) in the application-logs
|
* log.info(...) equal to log.trace(...) in the application-logs
|
||||||
|
@ -131,7 +131,8 @@ public class RestIterator implements Iterator<String> {
|
||||||
|
|
||||||
private void initXmlTransformation(String resultTotalXpath, String resumptionXpath, String entityXpath)
|
private void initXmlTransformation(String resultTotalXpath, String resumptionXpath, String entityXpath)
|
||||||
throws TransformerConfigurationException, XPathExpressionException {
|
throws TransformerConfigurationException, XPathExpressionException {
|
||||||
transformer = TransformerFactory.newInstance().newTransformer();
|
final TransformerFactory factory = TransformerFactory.newInstance();
|
||||||
|
transformer = factory.newTransformer();
|
||||||
transformer.setOutputProperty(OutputKeys.INDENT, "yes");
|
transformer.setOutputProperty(OutputKeys.INDENT, "yes");
|
||||||
transformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "3");
|
transformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "3");
|
||||||
xpath = XPathFactory.newInstance().newXPath();
|
xpath = XPathFactory.newInstance().newXPath();
|
||||||
|
@ -142,7 +143,7 @@ public class RestIterator implements Iterator<String> {
|
||||||
|
|
||||||
private void initQueue() {
|
private void initQueue() {
|
||||||
query = baseUrl + "?" + queryParams + querySize + queryFormat;
|
query = baseUrl + "?" + queryParams + querySize + queryFormat;
|
||||||
log.info("REST calls starting with " + query);
|
log.info("REST calls starting with {}", query);
|
||||||
}
|
}
|
||||||
|
|
||||||
private void disconnect() {
|
private void disconnect() {
|
||||||
|
@ -174,7 +175,7 @@ public class RestIterator implements Iterator<String> {
|
||||||
try {
|
try {
|
||||||
query = downloadPage(query);
|
query = downloadPage(query);
|
||||||
} catch (CollectorException e) {
|
} catch (CollectorException e) {
|
||||||
log.debug("CollectorPlugin.next()-Exception: " + e);
|
log.debug("CollectorPlugin.next()-Exception: {}", e);
|
||||||
throw new RuntimeException(e);
|
throw new RuntimeException(e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -198,7 +199,7 @@ public class RestIterator implements Iterator<String> {
|
||||||
|
|
||||||
// check if cursor=* is initial set otherwise add it to the queryParam URL
|
// check if cursor=* is initial set otherwise add it to the queryParam URL
|
||||||
if (resumptionType.equalsIgnoreCase("deep-cursor")) {
|
if (resumptionType.equalsIgnoreCase("deep-cursor")) {
|
||||||
log.debug("check resumptionType deep-cursor and check cursor=*?" + query);
|
log.debug("check resumptionType deep-cursor and check cursor=*?{}", query);
|
||||||
if (!query.contains("&cursor=")) {
|
if (!query.contains("&cursor=")) {
|
||||||
query += "&cursor=*";
|
query += "&cursor=*";
|
||||||
}
|
}
|
||||||
|
@ -208,16 +209,16 @@ public class RestIterator implements Iterator<String> {
|
||||||
log.info("requestig URL [{}]", query);
|
log.info("requestig URL [{}]", query);
|
||||||
|
|
||||||
URL qUrl = new URL(query);
|
URL qUrl = new URL(query);
|
||||||
log.debug("authMethod :" + authMethod);
|
log.debug("authMethod: {}", authMethod);
|
||||||
if ("bearer".equalsIgnoreCase(this.authMethod)) {
|
if ("bearer".equalsIgnoreCase(this.authMethod)) {
|
||||||
log.trace("authMethod before inputStream: " + resultXml);
|
log.trace("authMethod before inputStream: {}", resultXml);
|
||||||
HttpURLConnection conn = (HttpURLConnection) qUrl.openConnection();
|
HttpURLConnection conn = (HttpURLConnection) qUrl.openConnection();
|
||||||
conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Bearer " + authToken);
|
conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Bearer " + authToken);
|
||||||
conn.setRequestProperty(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON.getMimeType());
|
conn.setRequestProperty(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON.getMimeType());
|
||||||
conn.setRequestMethod("GET");
|
conn.setRequestMethod("GET");
|
||||||
theHttpInputStream = conn.getInputStream();
|
theHttpInputStream = conn.getInputStream();
|
||||||
} else if (BASIC.equalsIgnoreCase(this.authMethod)) {
|
} else if (BASIC.equalsIgnoreCase(this.authMethod)) {
|
||||||
log.trace("authMethod before inputStream: " + resultXml);
|
log.trace("authMethod before inputStream: {}", resultXml);
|
||||||
HttpURLConnection conn = (HttpURLConnection) qUrl.openConnection();
|
HttpURLConnection conn = (HttpURLConnection) qUrl.openConnection();
|
||||||
conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Basic " + authToken);
|
conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Basic " + authToken);
|
||||||
conn.setRequestProperty(HttpHeaders.ACCEPT, ContentType.APPLICATION_XML.getMimeType());
|
conn.setRequestProperty(HttpHeaders.ACCEPT, ContentType.APPLICATION_XML.getMimeType());
|
||||||
|
@ -237,13 +238,13 @@ public class RestIterator implements Iterator<String> {
|
||||||
if (!(emptyXml).equalsIgnoreCase(resultXml)) {
|
if (!(emptyXml).equalsIgnoreCase(resultXml)) {
|
||||||
resultNode = (Node) xpath.evaluate("/", new InputSource(resultStream), XPathConstants.NODE);
|
resultNode = (Node) xpath.evaluate("/", new InputSource(resultStream), XPathConstants.NODE);
|
||||||
nodeList = (NodeList) xprEntity.evaluate(resultNode, XPathConstants.NODESET);
|
nodeList = (NodeList) xprEntity.evaluate(resultNode, XPathConstants.NODESET);
|
||||||
log.debug("nodeList.length: " + nodeList.getLength());
|
log.debug("nodeList.length: {}", nodeList.getLength());
|
||||||
for (int i = 0; i < nodeList.getLength(); i++) {
|
for (int i = 0; i < nodeList.getLength(); i++) {
|
||||||
StringWriter sw = new StringWriter();
|
StringWriter sw = new StringWriter();
|
||||||
transformer.transform(new DOMSource(nodeList.item(i)), new StreamResult(sw));
|
transformer.transform(new DOMSource(nodeList.item(i)), new StreamResult(sw));
|
||||||
String toEnqueue = sw.toString();
|
String toEnqueue = sw.toString();
|
||||||
if (toEnqueue == null || StringUtils.isBlank(toEnqueue) || emptyXml.equalsIgnoreCase(toEnqueue)) {
|
if (toEnqueue == null || StringUtils.isBlank(toEnqueue) || emptyXml.equalsIgnoreCase(toEnqueue)) {
|
||||||
log.warn("The following record resulted in empty item for the feeding queue: " + resultXml);
|
log.warn("The following record resulted in empty item for the feeding queue: {}", resultXml);
|
||||||
} else {
|
} else {
|
||||||
recordQueue.add(sw.toString());
|
recordQueue.add(sw.toString());
|
||||||
}
|
}
|
||||||
|
@ -274,9 +275,9 @@ public class RestIterator implements Iterator<String> {
|
||||||
String[] resumptionKeyValue = arrayUrlArgStr.split("=");
|
String[] resumptionKeyValue = arrayUrlArgStr.split("=");
|
||||||
if (isInteger(resumptionKeyValue[1])) {
|
if (isInteger(resumptionKeyValue[1])) {
|
||||||
urlOldResumptionSize = Integer.parseInt(resumptionKeyValue[1]);
|
urlOldResumptionSize = Integer.parseInt(resumptionKeyValue[1]);
|
||||||
log.debug("discover OldResumptionSize from Url (int): " + urlOldResumptionSize);
|
log.debug("discover OldResumptionSize from Url (int): {}", urlOldResumptionSize);
|
||||||
} else {
|
} else {
|
||||||
log.debug("discover OldResumptionSize from Url (str): " + resumptionKeyValue[1]);
|
log.debug("discover OldResumptionSize from Url (str): {}", resumptionKeyValue[1]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -7,6 +7,7 @@ import static eu.dnetlib.dhp.utils.DHPUtils.*;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
import java.util.Objects;
|
||||||
import java.util.Optional;
|
import java.util.Optional;
|
||||||
|
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
|
@ -23,8 +24,8 @@ import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.aggregation.common.AggregationCounter;
|
import eu.dnetlib.dhp.aggregation.common.AggregationCounter;
|
||||||
import eu.dnetlib.dhp.aggregation.common.AggregatorReport;
|
|
||||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
|
import eu.dnetlib.dhp.common.aggregation.AggregatorReport;
|
||||||
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup;
|
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup;
|
||||||
import eu.dnetlib.dhp.message.MessageSender;
|
import eu.dnetlib.dhp.message.MessageSender;
|
||||||
import eu.dnetlib.dhp.schema.mdstore.MDStoreVersion;
|
import eu.dnetlib.dhp.schema.mdstore.MDStoreVersion;
|
||||||
|
@ -67,10 +68,10 @@ public class TransformSparkJobNode {
|
||||||
log.info("outputBasePath: {}", outputBasePath);
|
log.info("outputBasePath: {}", outputBasePath);
|
||||||
|
|
||||||
final String isLookupUrl = parser.get("isLookupUrl");
|
final String isLookupUrl = parser.get("isLookupUrl");
|
||||||
log.info(String.format("isLookupUrl: %s", isLookupUrl));
|
log.info("isLookupUrl: {}", isLookupUrl);
|
||||||
|
|
||||||
final String dateOfTransformation = parser.get("dateOfTransformation");
|
final String dateOfTransformation = parser.get("dateOfTransformation");
|
||||||
log.info(String.format("dateOfTransformation: %s", dateOfTransformation));
|
log.info("dateOfTransformation: {}", dateOfTransformation);
|
||||||
|
|
||||||
final Integer rpt = Optional
|
final Integer rpt = Optional
|
||||||
.ofNullable(parser.get("recordsPerTask"))
|
.ofNullable(parser.get("recordsPerTask"))
|
||||||
|
@ -126,12 +127,13 @@ public class TransformSparkJobNode {
|
||||||
JavaRDD<MetadataRecord> mdstore = inputMDStore
|
JavaRDD<MetadataRecord> mdstore = inputMDStore
|
||||||
.javaRDD()
|
.javaRDD()
|
||||||
.repartition(getRepartitionNumber(totalInput, rpt))
|
.repartition(getRepartitionNumber(totalInput, rpt))
|
||||||
.map((Function<MetadataRecord, MetadataRecord>) x::call);
|
.map((Function<MetadataRecord, MetadataRecord>) x::call)
|
||||||
|
.filter((Function<MetadataRecord, Boolean>) Objects::nonNull);
|
||||||
saveDataset(spark.createDataset(mdstore.rdd(), encoder), outputBasePath + MDSTORE_DATA_PATH);
|
saveDataset(spark.createDataset(mdstore.rdd(), encoder), outputBasePath + MDSTORE_DATA_PATH);
|
||||||
|
|
||||||
log.info("Transformed item " + ct.getProcessedItems().count());
|
log.info("Transformed item {}", ct.getProcessedItems().count());
|
||||||
log.info("Total item " + ct.getTotalItems().count());
|
log.info("Total item {}", ct.getTotalItems().count());
|
||||||
log.info("Transformation Error item " + ct.getErrorItems().count());
|
log.info("Transformation Error item {}", ct.getErrorItems().count());
|
||||||
|
|
||||||
final long mdStoreSize = spark.read().load(outputBasePath + MDSTORE_DATA_PATH).count();
|
final long mdStoreSize = spark.read().load(outputBasePath + MDSTORE_DATA_PATH).count();
|
||||||
writeHdfsFile(
|
writeHdfsFile(
|
||||||
|
|
|
@ -13,12 +13,18 @@ import eu.dnetlib.dhp.aggregation.common.AggregationCounter;
|
||||||
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup;
|
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup;
|
||||||
import eu.dnetlib.dhp.schema.mdstore.MetadataRecord;
|
import eu.dnetlib.dhp.schema.mdstore.MetadataRecord;
|
||||||
import eu.dnetlib.dhp.transformation.xslt.XSLTTransformationFunction;
|
import eu.dnetlib.dhp.transformation.xslt.XSLTTransformationFunction;
|
||||||
|
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
|
||||||
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
|
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
|
||||||
|
|
||||||
public class TransformationFactory {
|
public class TransformationFactory {
|
||||||
|
|
||||||
private static final Logger log = LoggerFactory.getLogger(TransformationFactory.class);
|
private static final Logger log = LoggerFactory.getLogger(TransformationFactory.class);
|
||||||
public static final String TRULE_XQUERY = "for $x in collection('/db/DRIVER/TransformationRuleDSResources/TransformationRuleDSResourceType') where $x//RESOURCE_IDENTIFIER/@value = \"%s\" return $x//CODE/*[local-name() =\"stylesheet\"]";
|
public static final String TRULE_XQUERY = "for $x in collection('/db/DRIVER/TransformationRuleDSResources/TransformationRuleDSResourceType') "
|
||||||
|
+
|
||||||
|
"where $x//RESOURCE_IDENTIFIER/@value = \"%s\" return $x//CODE/*[local-name() =\"stylesheet\"]";
|
||||||
|
|
||||||
|
private TransformationFactory() {
|
||||||
|
}
|
||||||
|
|
||||||
public static MapFunction<MetadataRecord, MetadataRecord> getTransformationPlugin(
|
public static MapFunction<MetadataRecord, MetadataRecord> getTransformationPlugin(
|
||||||
final Map<String, String> jobArgument, final AggregationCounter counters, final ISLookUpService isLookupService)
|
final Map<String, String> jobArgument, final AggregationCounter counters, final ISLookUpService isLookupService)
|
||||||
|
@ -27,7 +33,7 @@ public class TransformationFactory {
|
||||||
try {
|
try {
|
||||||
final String transformationPlugin = jobArgument.get("transformationPlugin");
|
final String transformationPlugin = jobArgument.get("transformationPlugin");
|
||||||
|
|
||||||
log.info("Transformation plugin required " + transformationPlugin);
|
log.info("Transformation plugin required {}", transformationPlugin);
|
||||||
switch (transformationPlugin) {
|
switch (transformationPlugin) {
|
||||||
case "XSLT_TRANSFORM": {
|
case "XSLT_TRANSFORM": {
|
||||||
final String transformationRuleId = jobArgument.get("transformationRuleId");
|
final String transformationRuleId = jobArgument.get("transformationRuleId");
|
||||||
|
@ -38,7 +44,7 @@ public class TransformationFactory {
|
||||||
final String transformationRule = queryTransformationRuleFromIS(
|
final String transformationRule = queryTransformationRuleFromIS(
|
||||||
transformationRuleId, isLookupService);
|
transformationRuleId, isLookupService);
|
||||||
|
|
||||||
final long dateOfTransformation = new Long(jobArgument.get("dateOfTransformation"));
|
final long dateOfTransformation = Long.parseLong(jobArgument.get("dateOfTransformation"));
|
||||||
return new XSLTTransformationFunction(counters, transformationRule, dateOfTransformation,
|
return new XSLTTransformationFunction(counters, transformationRule, dateOfTransformation,
|
||||||
vocabularies);
|
vocabularies);
|
||||||
|
|
||||||
|
@ -46,7 +52,6 @@ public class TransformationFactory {
|
||||||
default:
|
default:
|
||||||
throw new DnetTransformationException(
|
throw new DnetTransformationException(
|
||||||
"transformation plugin does not exists for " + transformationPlugin);
|
"transformation plugin does not exists for " + transformationPlugin);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
} catch (Throwable e) {
|
} catch (Throwable e) {
|
||||||
|
@ -55,9 +60,9 @@ public class TransformationFactory {
|
||||||
}
|
}
|
||||||
|
|
||||||
private static String queryTransformationRuleFromIS(final String transformationRuleId,
|
private static String queryTransformationRuleFromIS(final String transformationRuleId,
|
||||||
final ISLookUpService isLookUpService) throws Exception {
|
final ISLookUpService isLookUpService) throws DnetTransformationException, ISLookUpException {
|
||||||
final String query = String.format(TRULE_XQUERY, transformationRuleId);
|
final String query = String.format(TRULE_XQUERY, transformationRuleId);
|
||||||
System.out.println("asking query to IS: " + query);
|
log.info("asking query to IS: {}", query);
|
||||||
List<String> result = isLookUpService.quickSearchProfile(query);
|
List<String> result = isLookUpService.quickSearchProfile(query);
|
||||||
|
|
||||||
if (result == null || result.isEmpty())
|
if (result == null || result.isEmpty())
|
||||||
|
|
|
@ -4,11 +4,6 @@ package eu.dnetlib.dhp.transformation.xslt;
|
||||||
import static eu.dnetlib.dhp.transformation.xslt.XSLTTransformationFunction.QNAME_BASE_URI;
|
import static eu.dnetlib.dhp.transformation.xslt.XSLTTransformationFunction.QNAME_BASE_URI;
|
||||||
|
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
import java.time.LocalDate;
|
|
||||||
import java.time.format.DateTimeFormatter;
|
|
||||||
import java.util.*;
|
|
||||||
import java.util.regex.Matcher;
|
|
||||||
import java.util.regex.Pattern;
|
|
||||||
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.utils.GraphCleaningFunctions;
|
import eu.dnetlib.dhp.schema.oaf.utils.GraphCleaningFunctions;
|
||||||
import net.sf.saxon.s9api.*;
|
import net.sf.saxon.s9api.*;
|
||||||
|
|
|
@ -28,22 +28,12 @@ public class PersonCleaner implements ExtensionFunction, Serializable {
|
||||||
|
|
||||||
private static final Set<String> particles = null;
|
private static final Set<String> particles = null;
|
||||||
|
|
||||||
public PersonCleaner() {
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
private String normalize(String s) {
|
private String normalize(String s) {
|
||||||
s = Normalizer.normalize(s, Normalizer.Form.NFD); // was NFD
|
s = Normalizer.normalize(s, Normalizer.Form.NFD); // was NFD
|
||||||
s = s.replaceAll("\\(.+\\)", "");
|
s = s.replaceAll("\\(.+\\)", "");
|
||||||
s = s.replaceAll("\\[.+\\]", "");
|
s = s.replaceAll("\\[.+\\]", "");
|
||||||
s = s.replaceAll("\\{.+\\}", "");
|
s = s.replaceAll("\\{.+\\}", "");
|
||||||
s = s.replaceAll("\\s+-\\s+", "-");
|
s = s.replaceAll("\\s+-\\s+", "-");
|
||||||
|
|
||||||
// s = s.replaceAll("[\\W&&[^,-]]", " ");
|
|
||||||
|
|
||||||
// System.out.println("class Person: s: " + s);
|
|
||||||
|
|
||||||
// s = s.replaceAll("[\\p{InCombiningDiacriticalMarks}&&[^,-]]", " ");
|
|
||||||
s = s.replaceAll("[\\p{Punct}&&[^-,]]", " ");
|
s = s.replaceAll("[\\p{Punct}&&[^-,]]", " ");
|
||||||
s = s.replace("\\d", " ");
|
s = s.replace("\\d", " ");
|
||||||
s = s.replace("\\n", " ");
|
s = s.replace("\\n", " ");
|
||||||
|
@ -51,8 +41,6 @@ public class PersonCleaner implements ExtensionFunction, Serializable {
|
||||||
s = s.replaceAll("\\s+", " ");
|
s = s.replaceAll("\\s+", " ");
|
||||||
|
|
||||||
if (s.contains(",")) {
|
if (s.contains(",")) {
|
||||||
// System.out.println("class Person: s: " + s);
|
|
||||||
|
|
||||||
String[] arr = s.split(",");
|
String[] arr = s.split(",");
|
||||||
if (arr.length == 1) {
|
if (arr.length == 1) {
|
||||||
|
|
||||||
|
@ -60,9 +48,6 @@ public class PersonCleaner implements ExtensionFunction, Serializable {
|
||||||
} else if (arr.length > 1) {
|
} else if (arr.length > 1) {
|
||||||
surname = splitTerms(arr[0]);
|
surname = splitTerms(arr[0]);
|
||||||
firstname = splitTermsFirstName(arr[1]);
|
firstname = splitTermsFirstName(arr[1]);
|
||||||
// System.out.println("class Person: surname: " + surname);
|
|
||||||
// System.out.println("class Person: firstname: " + firstname);
|
|
||||||
|
|
||||||
fullname.addAll(surname);
|
fullname.addAll(surname);
|
||||||
fullname.addAll(firstname);
|
fullname.addAll(firstname);
|
||||||
}
|
}
|
||||||
|
@ -82,7 +67,6 @@ public class PersonCleaner implements ExtensionFunction, Serializable {
|
||||||
}
|
}
|
||||||
if (lastInitialPosition < fullname.size() - 1) { // Case: Michele G. Artini
|
if (lastInitialPosition < fullname.size() - 1) { // Case: Michele G. Artini
|
||||||
firstname = fullname.subList(0, lastInitialPosition + 1);
|
firstname = fullname.subList(0, lastInitialPosition + 1);
|
||||||
System.out.println("name: " + firstname);
|
|
||||||
surname = fullname.subList(lastInitialPosition + 1, fullname.size());
|
surname = fullname.subList(lastInitialPosition + 1, fullname.size());
|
||||||
} else if (hasSurnameInUpperCase) { // Case: Michele ARTINI
|
} else if (hasSurnameInUpperCase) { // Case: Michele ARTINI
|
||||||
for (String term : fullname) {
|
for (String term : fullname) {
|
||||||
|
@ -119,16 +103,9 @@ public class PersonCleaner implements ExtensionFunction, Serializable {
|
||||||
}
|
}
|
||||||
|
|
||||||
private List<String> splitTerms(String s) {
|
private List<String> splitTerms(String s) {
|
||||||
if (particles == null) {
|
|
||||||
// particles = NGramUtils.loadFromClasspath("/eu/dnetlib/pace/config/name_particles.txt");
|
|
||||||
}
|
|
||||||
|
|
||||||
List<String> list = Lists.newArrayList();
|
List<String> list = Lists.newArrayList();
|
||||||
for (String part : Splitter.on(" ").omitEmptyStrings().split(s)) {
|
for (String part : Splitter.on(" ").omitEmptyStrings().split(s)) {
|
||||||
// if (!particles.contains(part.toLowerCase())) {
|
|
||||||
list.add(part);
|
list.add(part);
|
||||||
|
|
||||||
// }
|
|
||||||
}
|
}
|
||||||
return list;
|
return list;
|
||||||
}
|
}
|
||||||
|
@ -152,9 +129,6 @@ public class PersonCleaner implements ExtensionFunction, Serializable {
|
||||||
public String getNormalisedFullname() {
|
public String getNormalisedFullname() {
|
||||||
return isAccurate() ? Joiner.on(" ").join(getSurname()) + ", " + Joiner.on(" ").join(getNameWithAbbreviations())
|
return isAccurate() ? Joiner.on(" ").join(getSurname()) + ", " + Joiner.on(" ").join(getNameWithAbbreviations())
|
||||||
: Joiner.on(" ").join(fullname);
|
: Joiner.on(" ").join(fullname);
|
||||||
// return isAccurate() ?
|
|
||||||
// Joiner.on(" ").join(getCapitalSurname()) + ", " + Joiner.on(" ").join(getNameWithAbbreviations()) :
|
|
||||||
// Joiner.on(" ").join(fullname);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public List<String> getCapitalSurname() {
|
public List<String> getCapitalSurname() {
|
||||||
|
|
|
@ -1,7 +1,6 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.transformation.xslt;
|
package eu.dnetlib.dhp.transformation.xslt;
|
||||||
|
|
||||||
import java.io.ByteArrayInputStream;
|
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
import java.io.StringWriter;
|
import java.io.StringWriter;
|
||||||
import java.nio.charset.StandardCharsets;
|
import java.nio.charset.StandardCharsets;
|
||||||
|
@ -18,11 +17,11 @@ import net.sf.saxon.s9api.*;
|
||||||
|
|
||||||
public class XSLTTransformationFunction implements MapFunction<MetadataRecord, MetadataRecord>, Serializable {
|
public class XSLTTransformationFunction implements MapFunction<MetadataRecord, MetadataRecord>, Serializable {
|
||||||
|
|
||||||
public final static String QNAME_BASE_URI = "http://eu/dnetlib/transform";
|
public static final String QNAME_BASE_URI = "http://eu/dnetlib/transform";
|
||||||
|
|
||||||
private final static String DATASOURCE_ID_PARAM = "varDataSourceId";
|
private static final String DATASOURCE_ID_PARAM = "varDataSourceId";
|
||||||
|
|
||||||
private final static String DATASOURCE_NAME_PARAM = "varOfficialName";
|
private static final String DATASOURCE_NAME_PARAM = "varOfficialName";
|
||||||
|
|
||||||
private final AggregationCounter aggregationCounter;
|
private final AggregationCounter aggregationCounter;
|
||||||
|
|
||||||
|
@ -38,8 +37,7 @@ public class XSLTTransformationFunction implements MapFunction<MetadataRecord, M
|
||||||
final AggregationCounter aggregationCounter,
|
final AggregationCounter aggregationCounter,
|
||||||
final String transformationRule,
|
final String transformationRule,
|
||||||
long dateOfTransformation,
|
long dateOfTransformation,
|
||||||
final VocabularyGroup vocabularies)
|
final VocabularyGroup vocabularies) {
|
||||||
throws Exception {
|
|
||||||
this.aggregationCounter = aggregationCounter;
|
this.aggregationCounter = aggregationCounter;
|
||||||
this.transformationRule = transformationRule;
|
this.transformationRule = transformationRule;
|
||||||
this.vocabularies = vocabularies;
|
this.vocabularies = vocabularies;
|
||||||
|
@ -83,7 +81,8 @@ public class XSLTTransformationFunction implements MapFunction<MetadataRecord, M
|
||||||
return value;
|
return value;
|
||||||
} catch (Throwable e) {
|
} catch (Throwable e) {
|
||||||
aggregationCounter.getErrorItems().add(1);
|
aggregationCounter.getErrorItems().add(1);
|
||||||
throw new RuntimeException(e);
|
return null;
|
||||||
|
// throw new RuntimeException(e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1,8 +1,6 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.transformation.xslt.utils;
|
package eu.dnetlib.dhp.transformation.xslt.utils;
|
||||||
|
|
||||||
// import org.apache.commons.text.WordUtils;
|
|
||||||
// import org.apache.commons.text.WordUtils;
|
|
||||||
import com.google.common.base.Function;
|
import com.google.common.base.Function;
|
||||||
|
|
||||||
public class Capitalize implements Function<String, String> {
|
public class Capitalize implements Function<String, String> {
|
||||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue