Merge branch 'beta' into beta

This commit is contained in:
Antonis Lempesis 2021-10-19 23:54:21 +02:00
commit 241dcf6df1
628 changed files with 35057 additions and 5275 deletions

View File

@ -1,2 +1,2 @@
# dnet-hadoop # dnet-hadoop
Dnet-hadoop is a tool for Dnet-hadoop is the project that defined all the OOZIE workflows for the OpenAIRE Graph construction, processing, provisioning.

View File

@ -8,8 +8,6 @@ import java.util.List;
import org.apache.commons.lang.ArrayUtils; import org.apache.commons.lang.ArrayUtils;
import org.apache.commons.lang.StringUtils; import org.apache.commons.lang.StringUtils;
import org.apache.maven.plugin.AbstractMojo; import org.apache.maven.plugin.AbstractMojo;
import org.apache.maven.plugin.MojoExecutionException;
import org.apache.maven.plugin.MojoFailureException;
/** /**
* Generates oozie properties which were not provided from commandline. * Generates oozie properties which were not provided from commandline.
@ -27,7 +25,7 @@ public class GenerateOoziePropertiesMojo extends AbstractMojo {
}; };
@Override @Override
public void execute() throws MojoExecutionException, MojoFailureException { public void execute() {
if (System.getProperties().containsKey(PROPERTY_NAME_WF_SOURCE_DIR) if (System.getProperties().containsKey(PROPERTY_NAME_WF_SOURCE_DIR)
&& !System.getProperties().containsKey(PROPERTY_NAME_SANDBOX_NAME)) { && !System.getProperties().containsKey(PROPERTY_NAME_SANDBOX_NAME)) {
String generatedSandboxName = generateSandboxName( String generatedSandboxName = generateSandboxName(
@ -46,24 +44,24 @@ public class GenerateOoziePropertiesMojo extends AbstractMojo {
/** /**
* Generates sandbox name from workflow source directory. * Generates sandbox name from workflow source directory.
* *
* @param wfSourceDir * @param wfSourceDir workflow source directory
* @return generated sandbox name * @return generated sandbox name
*/ */
private String generateSandboxName(String wfSourceDir) { private String generateSandboxName(String wfSourceDir) {
// utilize all dir names until finding one of the limiters // utilize all dir names until finding one of the limiters
List<String> sandboxNameParts = new ArrayList<String>(); List<String> sandboxNameParts = new ArrayList<>();
String[] tokens = StringUtils.split(wfSourceDir, File.separatorChar); String[] tokens = StringUtils.split(wfSourceDir, File.separatorChar);
ArrayUtils.reverse(tokens); ArrayUtils.reverse(tokens);
if (tokens.length > 0) { if (tokens.length > 0) {
for (String token : tokens) { for (String token : tokens) {
for (String limiter : limiters) { for (String limiter : limiters) {
if (limiter.equals(token)) { if (limiter.equals(token)) {
return sandboxNameParts.size() > 0 return !sandboxNameParts.isEmpty()
? StringUtils.join(sandboxNameParts.toArray()) ? StringUtils.join(sandboxNameParts.toArray())
: null; : null;
} }
} }
if (sandboxNameParts.size() > 0) { if (!sandboxNameParts.isEmpty()) {
sandboxNameParts.add(0, File.separator); sandboxNameParts.add(0, File.separator);
} }
sandboxNameParts.add(0, token); sandboxNameParts.add(0, token);

View File

@ -16,6 +16,7 @@ import java.io.File;
import java.io.FileInputStream; import java.io.FileInputStream;
import java.io.IOException; import java.io.IOException;
import java.io.InputStream; import java.io.InputStream;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
import java.util.Collections; import java.util.Collections;
@ -289,7 +290,7 @@ public class WritePredefinedProjectProperties extends AbstractMojo {
*/ */
protected List<String> getEscapeChars(String escapeChars) { protected List<String> getEscapeChars(String escapeChars) {
List<String> tokens = getListFromCSV(escapeChars); List<String> tokens = getListFromCSV(escapeChars);
List<String> realTokens = new ArrayList<String>(); List<String> realTokens = new ArrayList<>();
for (String token : tokens) { for (String token : tokens) {
String realToken = getRealToken(token); String realToken = getRealToken(token);
realTokens.add(realToken); realTokens.add(realToken);
@ -324,7 +325,7 @@ public class WritePredefinedProjectProperties extends AbstractMojo {
* @return content * @return content
*/ */
protected String getContent(String comment, Properties properties, List<String> escapeTokens) { protected String getContent(String comment, Properties properties, List<String> escapeTokens) {
List<String> names = new ArrayList<String>(properties.stringPropertyNames()); List<String> names = new ArrayList<>(properties.stringPropertyNames());
Collections.sort(names); Collections.sort(names);
StringBuilder sb = new StringBuilder(); StringBuilder sb = new StringBuilder();
if (!StringUtils.isBlank(comment)) { if (!StringUtils.isBlank(comment)) {
@ -352,7 +353,7 @@ public class WritePredefinedProjectProperties extends AbstractMojo {
throws MojoExecutionException { throws MojoExecutionException {
try { try {
String content = getContent(comment, properties, escapeTokens); String content = getContent(comment, properties, escapeTokens);
FileUtils.writeStringToFile(file, content, ENCODING_UTF8); FileUtils.writeStringToFile(file, content, StandardCharsets.UTF_8);
} catch (IOException e) { } catch (IOException e) {
throw new MojoExecutionException("Error creating properties file", e); throw new MojoExecutionException("Error creating properties file", e);
} }
@ -399,9 +400,9 @@ public class WritePredefinedProjectProperties extends AbstractMojo {
*/ */
protected static final List<String> getListFromCSV(String csv) { protected static final List<String> getListFromCSV(String csv) {
if (StringUtils.isBlank(csv)) { if (StringUtils.isBlank(csv)) {
return new ArrayList<String>(); return new ArrayList<>();
} }
List<String> list = new ArrayList<String>(); List<String> list = new ArrayList<>();
String[] tokens = StringUtils.split(csv, ","); String[] tokens = StringUtils.split(csv, ",");
for (String token : tokens) { for (String token : tokens) {
list.add(token.trim()); list.add(token.trim());

View File

@ -9,18 +9,18 @@ import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test; import org.junit.jupiter.api.Test;
/** @author mhorst, claudio.atzori */ /** @author mhorst, claudio.atzori */
public class GenerateOoziePropertiesMojoTest { class GenerateOoziePropertiesMojoTest {
private final GenerateOoziePropertiesMojo mojo = new GenerateOoziePropertiesMojo(); private final GenerateOoziePropertiesMojo mojo = new GenerateOoziePropertiesMojo();
@BeforeEach @BeforeEach
public void clearSystemProperties() { void clearSystemProperties() {
System.clearProperty(PROPERTY_NAME_SANDBOX_NAME); System.clearProperty(PROPERTY_NAME_SANDBOX_NAME);
System.clearProperty(PROPERTY_NAME_WF_SOURCE_DIR); System.clearProperty(PROPERTY_NAME_WF_SOURCE_DIR);
} }
@Test @Test
public void testExecuteEmpty() throws Exception { void testExecuteEmpty() throws Exception {
// execute // execute
mojo.execute(); mojo.execute();
@ -29,7 +29,7 @@ public class GenerateOoziePropertiesMojoTest {
} }
@Test @Test
public void testExecuteSandboxNameAlreadySet() throws Exception { void testExecuteSandboxNameAlreadySet() throws Exception {
// given // given
String workflowSourceDir = "eu/dnetlib/dhp/wf/transformers"; String workflowSourceDir = "eu/dnetlib/dhp/wf/transformers";
String sandboxName = "originalSandboxName"; String sandboxName = "originalSandboxName";
@ -44,7 +44,7 @@ public class GenerateOoziePropertiesMojoTest {
} }
@Test @Test
public void testExecuteEmptyWorkflowSourceDir() throws Exception { void testExecuteEmptyWorkflowSourceDir() throws Exception {
// given // given
String workflowSourceDir = ""; String workflowSourceDir = "";
System.setProperty(PROPERTY_NAME_WF_SOURCE_DIR, workflowSourceDir); System.setProperty(PROPERTY_NAME_WF_SOURCE_DIR, workflowSourceDir);
@ -57,7 +57,7 @@ public class GenerateOoziePropertiesMojoTest {
} }
@Test @Test
public void testExecuteNullSandboxNameGenerated() throws Exception { void testExecuteNullSandboxNameGenerated() throws Exception {
// given // given
String workflowSourceDir = "eu/dnetlib/dhp/"; String workflowSourceDir = "eu/dnetlib/dhp/";
System.setProperty(PROPERTY_NAME_WF_SOURCE_DIR, workflowSourceDir); System.setProperty(PROPERTY_NAME_WF_SOURCE_DIR, workflowSourceDir);
@ -70,7 +70,7 @@ public class GenerateOoziePropertiesMojoTest {
} }
@Test @Test
public void testExecute() throws Exception { void testExecute() throws Exception {
// given // given
String workflowSourceDir = "eu/dnetlib/dhp/wf/transformers"; String workflowSourceDir = "eu/dnetlib/dhp/wf/transformers";
System.setProperty(PROPERTY_NAME_WF_SOURCE_DIR, workflowSourceDir); System.setProperty(PROPERTY_NAME_WF_SOURCE_DIR, workflowSourceDir);
@ -83,7 +83,7 @@ public class GenerateOoziePropertiesMojoTest {
} }
@Test @Test
public void testExecuteWithoutRoot() throws Exception { void testExecuteWithoutRoot() throws Exception {
// given // given
String workflowSourceDir = "wf/transformers"; String workflowSourceDir = "wf/transformers";
System.setProperty(PROPERTY_NAME_WF_SOURCE_DIR, workflowSourceDir); System.setProperty(PROPERTY_NAME_WF_SOURCE_DIR, workflowSourceDir);

View File

@ -20,7 +20,7 @@ import org.mockito.junit.jupiter.MockitoExtension;
/** @author mhorst, claudio.atzori */ /** @author mhorst, claudio.atzori */
@ExtendWith(MockitoExtension.class) @ExtendWith(MockitoExtension.class)
public class WritePredefinedProjectPropertiesTest { class WritePredefinedProjectPropertiesTest {
@Mock @Mock
private MavenProject mavenProject; private MavenProject mavenProject;
@ -39,7 +39,7 @@ public class WritePredefinedProjectPropertiesTest {
// ----------------------------------- TESTS --------------------------------------------- // ----------------------------------- TESTS ---------------------------------------------
@Test @Test
public void testExecuteEmpty() throws Exception { void testExecuteEmpty() throws Exception {
// execute // execute
mojo.execute(); mojo.execute();
@ -50,7 +50,7 @@ public class WritePredefinedProjectPropertiesTest {
} }
@Test @Test
public void testExecuteWithProjectProperties() throws Exception { void testExecuteWithProjectProperties() throws Exception {
// given // given
String key = "projectPropertyKey"; String key = "projectPropertyKey";
String value = "projectPropertyValue"; String value = "projectPropertyValue";
@ -70,7 +70,7 @@ public class WritePredefinedProjectPropertiesTest {
} }
@Test() @Test()
public void testExecuteWithProjectPropertiesAndInvalidOutputFile(@TempDir File testFolder) { void testExecuteWithProjectPropertiesAndInvalidOutputFile(@TempDir File testFolder) {
// given // given
String key = "projectPropertyKey"; String key = "projectPropertyKey";
String value = "projectPropertyValue"; String value = "projectPropertyValue";
@ -84,7 +84,7 @@ public class WritePredefinedProjectPropertiesTest {
} }
@Test @Test
public void testExecuteWithProjectPropertiesExclusion(@TempDir File testFolder) throws Exception { void testExecuteWithProjectPropertiesExclusion(@TempDir File testFolder) throws Exception {
// given // given
String key = "projectPropertyKey"; String key = "projectPropertyKey";
String value = "projectPropertyValue"; String value = "projectPropertyValue";
@ -108,7 +108,7 @@ public class WritePredefinedProjectPropertiesTest {
} }
@Test @Test
public void testExecuteWithProjectPropertiesInclusion(@TempDir File testFolder) throws Exception { void testExecuteWithProjectPropertiesInclusion(@TempDir File testFolder) throws Exception {
// given // given
String key = "projectPropertyKey"; String key = "projectPropertyKey";
String value = "projectPropertyValue"; String value = "projectPropertyValue";
@ -132,7 +132,7 @@ public class WritePredefinedProjectPropertiesTest {
} }
@Test @Test
public void testExecuteIncludingPropertyKeysFromFile(@TempDir File testFolder) throws Exception { void testExecuteIncludingPropertyKeysFromFile(@TempDir File testFolder) throws Exception {
// given // given
String key = "projectPropertyKey"; String key = "projectPropertyKey";
String value = "projectPropertyValue"; String value = "projectPropertyValue";
@ -164,7 +164,7 @@ public class WritePredefinedProjectPropertiesTest {
} }
@Test @Test
public void testExecuteIncludingPropertyKeysFromClasspathResource(@TempDir File testFolder) void testExecuteIncludingPropertyKeysFromClasspathResource(@TempDir File testFolder)
throws Exception { throws Exception {
// given // given
String key = "projectPropertyKey"; String key = "projectPropertyKey";
@ -194,7 +194,7 @@ public class WritePredefinedProjectPropertiesTest {
} }
@Test @Test
public void testExecuteIncludingPropertyKeysFromBlankLocation() { void testExecuteIncludingPropertyKeysFromBlankLocation() {
// given // given
String key = "projectPropertyKey"; String key = "projectPropertyKey";
String value = "projectPropertyValue"; String value = "projectPropertyValue";
@ -214,7 +214,7 @@ public class WritePredefinedProjectPropertiesTest {
} }
@Test @Test
public void testExecuteIncludingPropertyKeysFromXmlFile(@TempDir File testFolder) void testExecuteIncludingPropertyKeysFromXmlFile(@TempDir File testFolder)
throws Exception { throws Exception {
// given // given
String key = "projectPropertyKey"; String key = "projectPropertyKey";
@ -247,7 +247,7 @@ public class WritePredefinedProjectPropertiesTest {
} }
@Test @Test
public void testExecuteIncludingPropertyKeysFromInvalidXmlFile(@TempDir File testFolder) void testExecuteIncludingPropertyKeysFromInvalidXmlFile(@TempDir File testFolder)
throws Exception { throws Exception {
// given // given
String key = "projectPropertyKey"; String key = "projectPropertyKey";
@ -273,7 +273,7 @@ public class WritePredefinedProjectPropertiesTest {
} }
@Test @Test
public void testExecuteWithQuietModeOn(@TempDir File testFolder) throws Exception { void testExecuteWithQuietModeOn(@TempDir File testFolder) throws Exception {
// given // given
mojo.setQuiet(true); mojo.setQuiet(true);
mojo.setIncludePropertyKeysFromFiles(new String[] { mojo.setIncludePropertyKeysFromFiles(new String[] {
@ -290,7 +290,7 @@ public class WritePredefinedProjectPropertiesTest {
} }
@Test @Test
public void testExecuteIncludingPropertyKeysFromInvalidFile() { void testExecuteIncludingPropertyKeysFromInvalidFile() {
// given // given
mojo.setIncludePropertyKeysFromFiles(new String[] { mojo.setIncludePropertyKeysFromFiles(new String[] {
"invalid location" "invalid location"
@ -301,7 +301,7 @@ public class WritePredefinedProjectPropertiesTest {
} }
@Test @Test
public void testExecuteWithEnvironmentProperties(@TempDir File testFolder) throws Exception { void testExecuteWithEnvironmentProperties(@TempDir File testFolder) throws Exception {
// given // given
mojo.setIncludeEnvironmentVariables(true); mojo.setIncludeEnvironmentVariables(true);
@ -318,7 +318,7 @@ public class WritePredefinedProjectPropertiesTest {
} }
@Test @Test
public void testExecuteWithSystemProperties(@TempDir File testFolder) throws Exception { void testExecuteWithSystemProperties(@TempDir File testFolder) throws Exception {
// given // given
String key = "systemPropertyKey"; String key = "systemPropertyKey";
String value = "systemPropertyValue"; String value = "systemPropertyValue";
@ -337,7 +337,7 @@ public class WritePredefinedProjectPropertiesTest {
} }
@Test @Test
public void testExecuteWithSystemPropertiesAndEscapeChars(@TempDir File testFolder) void testExecuteWithSystemPropertiesAndEscapeChars(@TempDir File testFolder)
throws Exception { throws Exception {
// given // given
String key = "systemPropertyKey "; String key = "systemPropertyKey ";

View File

@ -25,6 +25,11 @@
<groupId>com.github.sisyphsu</groupId> <groupId>com.github.sisyphsu</groupId>
<artifactId>dateparser</artifactId> <artifactId>dateparser</artifactId>
</dependency> </dependency>
<dependency>
<groupId>me.xuender</groupId>
<artifactId>unidecode</artifactId>
</dependency>
<dependency> <dependency>
<groupId>org.apache.spark</groupId> <groupId>org.apache.spark</groupId>
<artifactId>spark-core_2.11</artifactId> <artifactId>spark-core_2.11</artifactId>
@ -112,6 +117,11 @@
<groupId>eu.dnetlib.dhp</groupId> <groupId>eu.dnetlib.dhp</groupId>
<artifactId>dhp-schemas</artifactId> <artifactId>dhp-schemas</artifactId>
</dependency> </dependency>
<dependency>
<groupId>com.opencsv</groupId>
<artifactId>opencsv</artifactId>
</dependency>
</dependencies> </dependencies>
</project> </project>

View File

@ -1,14 +0,0 @@
package eu.dnetlib.dhp.application;
import java.io.*;
import java.util.Map;
import java.util.Properties;
import org.apache.hadoop.conf.Configuration;
import com.google.common.collect.Maps;
public class ApplicationUtils {
}

View File

@ -56,13 +56,13 @@ public class ArgumentApplicationParser implements Serializable {
final StringWriter stringWriter = new StringWriter(); final StringWriter stringWriter = new StringWriter();
IOUtils.copy(gis, stringWriter); IOUtils.copy(gis, stringWriter);
return stringWriter.toString(); return stringWriter.toString();
} catch (Throwable e) { } catch (IOException e) {
log.error("Wrong value to decompress:" + abstractCompressed); log.error("Wrong value to decompress: {}", abstractCompressed);
throw new RuntimeException(e); throw new IllegalArgumentException(e);
} }
} }
public static String compressArgument(final String value) throws Exception { public static String compressArgument(final String value) throws IOException {
ByteArrayOutputStream out = new ByteArrayOutputStream(); ByteArrayOutputStream out = new ByteArrayOutputStream();
GZIPOutputStream gzip = new GZIPOutputStream(out); GZIPOutputStream gzip = new GZIPOutputStream(out);
gzip.write(value.getBytes()); gzip.write(value.getBytes());

View File

@ -9,9 +9,6 @@ public class OptionsParameter {
private boolean paramRequired; private boolean paramRequired;
private boolean compressed; private boolean compressed;
public OptionsParameter() {
}
public String getParamName() { public String getParamName() {
return paramName; return paramName;
} }

View File

@ -34,7 +34,7 @@ public class ApiDescriptor {
return params; return params;
} }
public void setParams(final HashMap<String, String> params) { public void setParams(final Map<String, String> params) {
this.params = params; this.params = params;
} }

View File

@ -12,6 +12,9 @@ public class Constants {
public static String COAR_ACCESS_RIGHT_SCHEMA = "http://vocabularies.coar-repositories.org/documentation/access_rights/"; public static String COAR_ACCESS_RIGHT_SCHEMA = "http://vocabularies.coar-repositories.org/documentation/access_rights/";
private Constants() {
}
static { static {
accessRightsCoarMap.put("OPEN", "c_abf2"); accessRightsCoarMap.put("OPEN", "c_abf2");
accessRightsCoarMap.put("RESTRICTED", "c_16ec"); accessRightsCoarMap.put("RESTRICTED", "c_16ec");
@ -49,4 +52,10 @@ public class Constants {
public static final String CONTENT_INVALIDRECORDS = "InvalidRecords"; public static final String CONTENT_INVALIDRECORDS = "InvalidRecords";
public static final String CONTENT_TRANSFORMEDRECORDS = "transformedItems"; public static final String CONTENT_TRANSFORMEDRECORDS = "transformedItems";
// IETF Draft and used by Repositories like ZENODO , not included in APACHE HTTP java packages
// see https://ietf-wg-httpapi.github.io/ratelimit-headers/draft-ietf-httpapi-ratelimit-headers.html
public static final String HTTPHEADER_IETF_DRAFT_RATELIMIT_LIMIT = "X-RateLimit-Limit";
public static final String HTTPHEADER_IETF_DRAFT_RATELIMIT_REMAINING = "X-RateLimit-Remaining";
public static final String HTTPHEADER_IETF_DRAFT_RATELIMIT_RESET = "X-RateLimit-Reset";
} }

View File

@ -84,7 +84,7 @@ public class GraphResultMapper implements Serializable {
.setDocumentationUrl( .setDocumentationUrl(
value value
.stream() .stream()
.map(v -> v.getValue()) .map(Field::getValue)
.collect(Collectors.toList()))); .collect(Collectors.toList())));
Optional Optional
@ -100,20 +100,20 @@ public class GraphResultMapper implements Serializable {
.setContactgroup( .setContactgroup(
Optional Optional
.ofNullable(ir.getContactgroup()) .ofNullable(ir.getContactgroup())
.map(value -> value.stream().map(cg -> cg.getValue()).collect(Collectors.toList())) .map(value -> value.stream().map(Field::getValue).collect(Collectors.toList()))
.orElse(null)); .orElse(null));
out out
.setContactperson( .setContactperson(
Optional Optional
.ofNullable(ir.getContactperson()) .ofNullable(ir.getContactperson())
.map(value -> value.stream().map(cp -> cp.getValue()).collect(Collectors.toList())) .map(value -> value.stream().map(Field::getValue).collect(Collectors.toList()))
.orElse(null)); .orElse(null));
out out
.setTool( .setTool(
Optional Optional
.ofNullable(ir.getTool()) .ofNullable(ir.getTool())
.map(value -> value.stream().map(t -> t.getValue()).collect(Collectors.toList())) .map(value -> value.stream().map(Field::getValue).collect(Collectors.toList()))
.orElse(null)); .orElse(null));
out.setType(ModelConstants.ORP_DEFAULT_RESULTTYPE.getClassname()); out.setType(ModelConstants.ORP_DEFAULT_RESULTTYPE.getClassname());
@ -123,7 +123,8 @@ public class GraphResultMapper implements Serializable {
Optional Optional
.ofNullable(input.getAuthor()) .ofNullable(input.getAuthor())
.ifPresent(ats -> out.setAuthor(ats.stream().map(at -> getAuthor(at)).collect(Collectors.toList()))); .ifPresent(
ats -> out.setAuthor(ats.stream().map(GraphResultMapper::getAuthor).collect(Collectors.toList())));
// I do not map Access Right UNKNOWN or OTHER // I do not map Access Right UNKNOWN or OTHER
@ -210,7 +211,7 @@ public class GraphResultMapper implements Serializable {
if (oInst.isPresent()) { if (oInst.isPresent()) {
out out
.setInstance( .setInstance(
oInst.get().stream().map(i -> getInstance(i)).collect(Collectors.toList())); oInst.get().stream().map(GraphResultMapper::getInstance).collect(Collectors.toList()));
} }
@ -230,7 +231,7 @@ public class GraphResultMapper implements Serializable {
.stream() .stream()
.filter(t -> t.getQualifier().getClassid().equalsIgnoreCase("main title")) .filter(t -> t.getQualifier().getClassid().equalsIgnoreCase("main title"))
.collect(Collectors.toList()); .collect(Collectors.toList());
if (iTitle.size() > 0) { if (!iTitle.isEmpty()) {
out.setMaintitle(iTitle.get(0).getValue()); out.setMaintitle(iTitle.get(0).getValue());
} }
@ -239,7 +240,7 @@ public class GraphResultMapper implements Serializable {
.stream() .stream()
.filter(t -> t.getQualifier().getClassid().equalsIgnoreCase("subtitle")) .filter(t -> t.getQualifier().getClassid().equalsIgnoreCase("subtitle"))
.collect(Collectors.toList()); .collect(Collectors.toList());
if (iTitle.size() > 0) { if (!iTitle.isEmpty()) {
out.setSubtitle(iTitle.get(0).getValue()); out.setSubtitle(iTitle.get(0).getValue());
} }

View File

@ -14,38 +14,33 @@ public class MakeTarArchive implements Serializable {
private static TarArchiveOutputStream getTar(FileSystem fileSystem, String outputPath) throws IOException { private static TarArchiveOutputStream getTar(FileSystem fileSystem, String outputPath) throws IOException {
Path hdfsWritePath = new Path(outputPath); Path hdfsWritePath = new Path(outputPath);
FSDataOutputStream fsDataOutputStream = null;
if (fileSystem.exists(hdfsWritePath)) { if (fileSystem.exists(hdfsWritePath)) {
fileSystem.delete(hdfsWritePath, true); fileSystem.delete(hdfsWritePath, true);
} }
fsDataOutputStream = fileSystem.create(hdfsWritePath); return new TarArchiveOutputStream(fileSystem.create(hdfsWritePath).getWrappedStream());
return new TarArchiveOutputStream(fsDataOutputStream.getWrappedStream());
} }
private static void write(FileSystem fileSystem, String inputPath, String outputPath, String dir_name) private static void write(FileSystem fileSystem, String inputPath, String outputPath, String dir_name)
throws IOException { throws IOException {
Path hdfsWritePath = new Path(outputPath); Path hdfsWritePath = new Path(outputPath);
FSDataOutputStream fsDataOutputStream = null;
if (fileSystem.exists(hdfsWritePath)) { if (fileSystem.exists(hdfsWritePath)) {
fileSystem.delete(hdfsWritePath, true); fileSystem.delete(hdfsWritePath, true);
} }
fsDataOutputStream = fileSystem.create(hdfsWritePath); try (TarArchiveOutputStream ar = new TarArchiveOutputStream(
fileSystem.create(hdfsWritePath).getWrappedStream())) {
TarArchiveOutputStream ar = new TarArchiveOutputStream(fsDataOutputStream.getWrappedStream()); RemoteIterator<LocatedFileStatus> iterator = fileSystem
RemoteIterator<LocatedFileStatus> fileStatusListIterator = fileSystem
.listFiles( .listFiles(
new Path(inputPath), true); new Path(inputPath), true);
while (fileStatusListIterator.hasNext()) { while (iterator.hasNext()) {
writeCurrentFile(fileSystem, dir_name, fileStatusListIterator, ar, 0); writeCurrentFile(fileSystem, dir_name, iterator, ar, 0);
} }
ar.close(); }
} }
public static void tarMaxSize(FileSystem fileSystem, String inputPath, String outputPath, String dir_name, public static void tarMaxSize(FileSystem fileSystem, String inputPath, String outputPath, String dir_name,
@ -90,6 +85,13 @@ public class MakeTarArchive implements Serializable {
String p_string = p.toString(); String p_string = p.toString();
if (!p_string.endsWith("_SUCCESS")) { if (!p_string.endsWith("_SUCCESS")) {
String name = p_string.substring(p_string.lastIndexOf("/") + 1); String name = p_string.substring(p_string.lastIndexOf("/") + 1);
if (name.startsWith("part-") & name.length() > 10) {
String tmp = name.substring(0, 10);
if (name.contains(".")) {
tmp += name.substring(name.indexOf("."));
}
name = tmp;
}
TarArchiveEntry entry = new TarArchiveEntry(dir_name + "/" + name); TarArchiveEntry entry = new TarArchiveEntry(dir_name + "/" + name);
entry.setSize(fileStatus.getLen()); entry.setSize(fileStatus.getLen());
current_size += fileStatus.getLen(); current_size += fileStatus.getLen();

View File

@ -10,8 +10,6 @@ import java.util.Optional;
import java.util.stream.StreamSupport; import java.util.stream.StreamSupport;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.bson.Document; import org.bson.Document;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
@ -21,6 +19,7 @@ import com.mongodb.BasicDBObject;
import com.mongodb.MongoClient; import com.mongodb.MongoClient;
import com.mongodb.MongoClientURI; import com.mongodb.MongoClientURI;
import com.mongodb.QueryBuilder; import com.mongodb.QueryBuilder;
import com.mongodb.client.FindIterable;
import com.mongodb.client.MongoCollection; import com.mongodb.client.MongoCollection;
import com.mongodb.client.MongoDatabase; import com.mongodb.client.MongoDatabase;
@ -46,7 +45,7 @@ public class MdstoreClient implements Closeable {
final String currentId = Optional final String currentId = Optional
.ofNullable(getColl(db, COLL_METADATA_MANAGER, true).find(query)) .ofNullable(getColl(db, COLL_METADATA_MANAGER, true).find(query))
.map(r -> r.first()) .map(FindIterable::first)
.map(d -> d.getString("currentId")) .map(d -> d.getString("currentId"))
.orElseThrow(() -> new IllegalArgumentException("cannot find current mdstore id for: " + mdId)); .orElseThrow(() -> new IllegalArgumentException("cannot find current mdstore id for: " + mdId));
@ -84,7 +83,7 @@ public class MdstoreClient implements Closeable {
if (!Iterables.contains(client.listDatabaseNames(), dbName)) { if (!Iterables.contains(client.listDatabaseNames(), dbName)) {
final String err = String.format("Database '%s' not found in %s", dbName, client.getAddress()); final String err = String.format("Database '%s' not found in %s", dbName, client.getAddress());
log.warn(err); log.warn(err);
throw new RuntimeException(err); throw new IllegalArgumentException(err);
} }
return client.getDatabase(dbName); return client.getDatabase(dbName);
} }
@ -97,7 +96,7 @@ public class MdstoreClient implements Closeable {
String.format("Missing collection '%s' in database '%s'", collName, db.getName())); String.format("Missing collection '%s' in database '%s'", collName, db.getName()));
log.warn(err); log.warn(err);
if (abortIfMissing) { if (abortIfMissing) {
throw new RuntimeException(err); throw new IllegalArgumentException(err);
} else { } else {
return null; return null;
} }

View File

@ -24,7 +24,6 @@ import com.google.common.hash.Hashing;
*/ */
public class PacePerson { public class PacePerson {
private static final String UTF8 = "UTF-8";
private List<String> name = Lists.newArrayList(); private List<String> name = Lists.newArrayList();
private List<String> surname = Lists.newArrayList(); private List<String> surname = Lists.newArrayList();
private List<String> fullname = Lists.newArrayList(); private List<String> fullname = Lists.newArrayList();

View File

@ -1,5 +1,5 @@
package eu.dnetlib.dhp.aggregation.common; package eu.dnetlib.dhp.common.aggregation;
import java.io.Closeable; import java.io.Closeable;
import java.io.IOException; import java.io.IOException;
@ -11,8 +11,6 @@ import java.util.Objects;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import com.google.gson.Gson;
import eu.dnetlib.dhp.message.MessageSender; import eu.dnetlib.dhp.message.MessageSender;
import eu.dnetlib.dhp.utils.DHPUtils; import eu.dnetlib.dhp.utils.DHPUtils;
@ -20,12 +18,12 @@ public class AggregatorReport extends LinkedHashMap<String, String> implements C
private static final Logger log = LoggerFactory.getLogger(AggregatorReport.class); private static final Logger log = LoggerFactory.getLogger(AggregatorReport.class);
private MessageSender messageSender; private transient MessageSender messageSender;
public AggregatorReport() { public AggregatorReport() {
} }
public AggregatorReport(MessageSender messageSender) throws IOException { public AggregatorReport(MessageSender messageSender) {
this.messageSender = messageSender; this.messageSender = messageSender;
} }

View File

@ -5,6 +5,9 @@ import java.io.*;
import java.io.IOException; import java.io.IOException;
import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeUnit;
import org.apache.http.HttpHeaders;
import org.apache.http.entity.ContentType;
import com.google.gson.Gson; import com.google.gson.Gson;
import eu.dnetlib.dhp.common.api.zenodo.ZenodoModel; import eu.dnetlib.dhp.common.api.zenodo.ZenodoModel;
@ -43,7 +46,7 @@ public class ZenodoAPIClient implements Serializable {
this.deposition_id = deposition_id; this.deposition_id = deposition_id;
} }
public ZenodoAPIClient(String urlString, String access_token) throws IOException { public ZenodoAPIClient(String urlString, String access_token) {
this.urlString = urlString; this.urlString = urlString;
this.access_token = access_token; this.access_token = access_token;
@ -63,8 +66,8 @@ public class ZenodoAPIClient implements Serializable {
Request request = new Request.Builder() Request request = new Request.Builder()
.url(urlString) .url(urlString)
.addHeader("Content-Type", "application/json") // add request headers .addHeader(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON.toString()) // add request headers
.addHeader("Authorization", "Bearer " + access_token) .addHeader(HttpHeaders.AUTHORIZATION, "Bearer " + access_token)
.post(body) .post(body)
.build(); .build();
@ -103,8 +106,8 @@ public class ZenodoAPIClient implements Serializable {
Request request = new Request.Builder() Request request = new Request.Builder()
.url(bucket + "/" + file_name) .url(bucket + "/" + file_name)
.addHeader("Content-Type", "application/zip") // add request headers .addHeader(HttpHeaders.CONTENT_TYPE, "application/zip") // add request headers
.addHeader("Authorization", "Bearer " + access_token) .addHeader(HttpHeaders.AUTHORIZATION, "Bearer " + access_token)
.put(InputStreamRequestBody.create(MEDIA_TYPE_ZIP, is, len)) .put(InputStreamRequestBody.create(MEDIA_TYPE_ZIP, is, len))
.build(); .build();
@ -130,8 +133,8 @@ public class ZenodoAPIClient implements Serializable {
Request request = new Request.Builder() Request request = new Request.Builder()
.url(urlString + "/" + deposition_id) .url(urlString + "/" + deposition_id)
.addHeader("Content-Type", "application/json") // add request headers .addHeader(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON.toString()) // add request headers
.addHeader("Authorization", "Bearer " + access_token) .addHeader(HttpHeaders.AUTHORIZATION, "Bearer " + access_token)
.put(body) .put(body)
.build(); .build();
@ -197,7 +200,7 @@ public class ZenodoAPIClient implements Serializable {
Request request = new Request.Builder() Request request = new Request.Builder()
.url(urlString + "/" + deposition_id + "/actions/newversion") .url(urlString + "/" + deposition_id + "/actions/newversion")
.addHeader("Authorization", "Bearer " + access_token) .addHeader(HttpHeaders.AUTHORIZATION, "Bearer " + access_token)
.post(body) .post(body)
.build(); .build();
@ -270,8 +273,8 @@ public class ZenodoAPIClient implements Serializable {
Request request = new Request.Builder() Request request = new Request.Builder()
.url(urlString) .url(urlString)
.addHeader("Content-Type", "application/json") // add request headers .addHeader(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON.toString()) // add request headers
.addHeader("Authorization", "Bearer " + access_token) .addHeader(HttpHeaders.AUTHORIZATION, "Bearer " + access_token)
.get() .get()
.build(); .build();
@ -293,8 +296,8 @@ public class ZenodoAPIClient implements Serializable {
Request request = new Request.Builder() Request request = new Request.Builder()
.url(url) .url(url)
.addHeader("Content-Type", "application/json") // add request headers .addHeader(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON.toString()) // add request headers
.addHeader("Authorization", "Bearer " + access_token) .addHeader(HttpHeaders.AUTHORIZATION, "Bearer " + access_token)
.get() .get()
.build(); .build();

View File

@ -32,13 +32,13 @@ public class Creator {
public static Creator newInstance(String name, String affiliation, String orcid) { public static Creator newInstance(String name, String affiliation, String orcid) {
Creator c = new Creator(); Creator c = new Creator();
if (!(name == null)) { if (name != null) {
c.name = name; c.name = name;
} }
if (!(affiliation == null)) { if (affiliation != null) {
c.affiliation = affiliation; c.affiliation = affiliation;
} }
if (!(orcid == null)) { if (orcid != null) {
c.orcid = orcid; c.orcid = orcid;
} }

View File

@ -3,17 +3,12 @@ package eu.dnetlib.dhp.common.api.zenodo;
import java.io.Serializable; import java.io.Serializable;
import net.minidev.json.annotate.JsonIgnore;
public class File implements Serializable { public class File implements Serializable {
private String checksum; private String checksum;
private String filename; private String filename;
private long filesize; private long filesize;
private String id; private String id;
@JsonIgnore
// private Links links;
public String getChecksum() { public String getChecksum() {
return checksum; return checksum;
} }
@ -46,13 +41,4 @@ public class File implements Serializable {
this.id = id; this.id = id;
} }
// @JsonIgnore
// public Links getLinks() {
// return links;
// }
//
// @JsonIgnore
// public void setLinks(Links links) {
// this.links = links;
// }
} }

View File

@ -1,5 +1,5 @@
package eu.dnetlib.dhp.collection; package eu.dnetlib.dhp.common.collection;
public class CollectorException extends Exception { public class CollectorException extends Exception {

View File

@ -0,0 +1,56 @@
package eu.dnetlib.dhp.common.collection;
import java.io.*;
import java.nio.charset.StandardCharsets;
import java.util.List;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.opencsv.bean.CsvToBeanBuilder;
public class GetCSV {
public static final char DEFAULT_DELIMITER = ',';
private GetCSV() {
}
public static void getCsv(FileSystem fileSystem, BufferedReader reader, String hdfsPath,
String modelClass) throws IOException, ClassNotFoundException {
getCsv(fileSystem, reader, hdfsPath, modelClass, DEFAULT_DELIMITER);
}
public static void getCsv(FileSystem fileSystem, Reader reader, String hdfsPath,
String modelClass, char delimiter) throws IOException, ClassNotFoundException {
Path hdfsWritePath = new Path(hdfsPath);
FSDataOutputStream fsDataOutputStream = null;
if (fileSystem.exists(hdfsWritePath)) {
fileSystem.delete(hdfsWritePath, false);
}
fsDataOutputStream = fileSystem.create(hdfsWritePath);
try (BufferedWriter writer = new BufferedWriter(
new OutputStreamWriter(fsDataOutputStream, StandardCharsets.UTF_8))) {
final ObjectMapper mapper = new ObjectMapper();
@SuppressWarnings("unchecked")
final List lines = new CsvToBeanBuilder(reader)
.withType(Class.forName(modelClass))
.withSeparator(delimiter)
.build()
.parse();
for (Object line : lines) {
writer.write(mapper.writeValueAsString(line));
writer.newLine();
}
}
}
}

View File

@ -1,5 +1,5 @@
package eu.dnetlib.dhp.collection; package eu.dnetlib.dhp.common.collection;
/** /**
* Bundles the http connection parameters driving the client behaviour. * Bundles the http connection parameters driving the client behaviour.

View File

@ -1,5 +1,5 @@
package eu.dnetlib.dhp.collection; package eu.dnetlib.dhp.common.collection;
import static eu.dnetlib.dhp.utils.DHPUtils.*; import static eu.dnetlib.dhp.utils.DHPUtils.*;
@ -15,12 +15,13 @@ import org.apache.http.HttpHeaders;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import eu.dnetlib.dhp.aggregation.common.AggregatorReport; import eu.dnetlib.dhp.common.Constants;
import eu.dnetlib.dhp.common.aggregation.AggregatorReport;
/** /**
* Migrated from https://svn.driver.research-infrastructures.eu/driver/dnet45/modules/dnet-modular-collector-service/trunk/src/main/java/eu/dnetlib/data/collector/plugins/HttpConnector.java * Migrated from https://svn.driver.research-infrastructures.eu/driver/dnet45/modules/dnet-modular-collector-service/trunk/src/main/java/eu/dnetlib/data/collector/plugins/HttpConnector.java
* *
* @author jochen, michele, andrea, alessia, claudio * @author jochen, michele, andrea, alessia, claudio, andreas
*/ */
public class HttpConnector2 { public class HttpConnector2 {
@ -32,7 +33,7 @@ public class HttpConnector2 {
private String responseType = null; private String responseType = null;
private final String userAgent = "Mozilla/5.0 (compatible; OAI; +http://www.openaire.eu)"; private static final String userAgent = "Mozilla/5.0 (compatible; OAI; +http://www.openaire.eu)";
public HttpConnector2() { public HttpConnector2() {
this(new HttpClientParams()); this(new HttpClientParams());
@ -112,6 +113,17 @@ public class HttpConnector2 {
} }
int retryAfter = obtainRetryAfter(urlConn.getHeaderFields()); int retryAfter = obtainRetryAfter(urlConn.getHeaderFields());
String rateLimit = urlConn.getHeaderField(Constants.HTTPHEADER_IETF_DRAFT_RATELIMIT_LIMIT);
String rateRemaining = urlConn.getHeaderField(Constants.HTTPHEADER_IETF_DRAFT_RATELIMIT_REMAINING);
if ((rateLimit != null) && (rateRemaining != null) && (Integer.parseInt(rateRemaining) < 2)) {
if (retryAfter > 0) {
backoffAndSleep(retryAfter);
} else {
backoffAndSleep(1000);
}
}
if (is2xx(urlConn.getResponseCode())) { if (is2xx(urlConn.getResponseCode())) {
input = urlConn.getInputStream(); input = urlConn.getInputStream();
responseType = urlConn.getContentType(); responseType = urlConn.getContentType();
@ -120,7 +132,7 @@ public class HttpConnector2 {
if (is3xx(urlConn.getResponseCode())) { if (is3xx(urlConn.getResponseCode())) {
// REDIRECTS // REDIRECTS
final String newUrl = obtainNewLocation(urlConn.getHeaderFields()); final String newUrl = obtainNewLocation(urlConn.getHeaderFields());
log.info(String.format("The requested url has been moved to %s", newUrl)); log.info("The requested url has been moved to {}", newUrl);
report report
.put( .put(
REPORT_PREFIX + urlConn.getResponseCode(), REPORT_PREFIX + urlConn.getResponseCode(),
@ -140,14 +152,14 @@ public class HttpConnector2 {
if (retryAfter > 0) { if (retryAfter > 0) {
log log
.warn( .warn(
requestUrl + " - waiting and repeating request after suggested retry-after " "{} - waiting and repeating request after suggested retry-after {} sec.",
+ retryAfter + " sec."); requestUrl, retryAfter);
backoffAndSleep(retryAfter * 1000); backoffAndSleep(retryAfter * 1000);
} else { } else {
log log
.warn( .warn(
requestUrl + " - waiting and repeating request after default delay of " "{} - waiting and repeating request after default delay of {} sec.",
+ getClientParams().getRetryDelay() + " sec."); requestUrl, getClientParams().getRetryDelay());
backoffAndSleep(retryNumber * getClientParams().getRetryDelay() * 1000); backoffAndSleep(retryNumber * getClientParams().getRetryDelay() * 1000);
} }
report.put(REPORT_PREFIX + urlConn.getResponseCode(), requestUrl); report.put(REPORT_PREFIX + urlConn.getResponseCode(), requestUrl);
@ -181,12 +193,12 @@ public class HttpConnector2 {
} }
private void logHeaderFields(final HttpURLConnection urlConn) throws IOException { private void logHeaderFields(final HttpURLConnection urlConn) throws IOException {
log.debug("StatusCode: " + urlConn.getResponseMessage()); log.debug("StatusCode: {}", urlConn.getResponseMessage());
for (Map.Entry<String, List<String>> e : urlConn.getHeaderFields().entrySet()) { for (Map.Entry<String, List<String>> e : urlConn.getHeaderFields().entrySet()) {
if (e.getKey() != null) { if (e.getKey() != null) {
for (String v : e.getValue()) { for (String v : e.getValue()) {
log.debug(" key: " + e.getKey() + " - value: " + v); log.debug(" key: {} - value: {}", e.getKey(), v);
} }
} }
} }
@ -204,7 +216,7 @@ public class HttpConnector2 {
private int obtainRetryAfter(final Map<String, List<String>> headerMap) { private int obtainRetryAfter(final Map<String, List<String>> headerMap) {
for (String key : headerMap.keySet()) { for (String key : headerMap.keySet()) {
if ((key != null) && key.equalsIgnoreCase(HttpHeaders.RETRY_AFTER) && (headerMap.get(key).size() > 0) if ((key != null) && key.equalsIgnoreCase(HttpHeaders.RETRY_AFTER) && (!headerMap.get(key).isEmpty())
&& NumberUtils.isCreatable(headerMap.get(key).get(0))) { && NumberUtils.isCreatable(headerMap.get(key).get(0))) {
return Integer.parseInt(headerMap.get(key).get(0)) + 10; return Integer.parseInt(headerMap.get(key).get(0)) + 10;
} }

View File

@ -1,11 +1,11 @@
package eu.dnetlib.dhp.common.rest; package eu.dnetlib.dhp.common.rest;
import java.io.IOException;
import java.util.Arrays; import java.util.Arrays;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import org.apache.commons.io.IOUtils; import org.apache.commons.io.IOUtils;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet; import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpPost; import org.apache.http.client.methods.HttpPost;
import org.apache.http.client.methods.HttpUriRequest; import org.apache.http.client.methods.HttpUriRequest;
@ -23,17 +23,20 @@ public class DNetRestClient {
private static final ObjectMapper mapper = new ObjectMapper(); private static final ObjectMapper mapper = new ObjectMapper();
private DNetRestClient() {
}
public static <T> T doGET(final String url, Class<T> clazz) throws Exception { public static <T> T doGET(final String url, Class<T> clazz) throws Exception {
final HttpGet httpGet = new HttpGet(url); final HttpGet httpGet = new HttpGet(url);
return doHTTPRequest(httpGet, clazz); return doHTTPRequest(httpGet, clazz);
} }
public static String doGET(final String url) throws Exception { public static String doGET(final String url) throws IOException {
final HttpGet httpGet = new HttpGet(url); final HttpGet httpGet = new HttpGet(url);
return doHTTPRequest(httpGet); return doHTTPRequest(httpGet);
} }
public static <V> String doPOST(final String url, V objParam) throws Exception { public static <V> String doPOST(final String url, V objParam) throws IOException {
final HttpPost httpPost = new HttpPost(url); final HttpPost httpPost = new HttpPost(url);
if (objParam != null) { if (objParam != null) {
@ -45,12 +48,12 @@ public class DNetRestClient {
return doHTTPRequest(httpPost); return doHTTPRequest(httpPost);
} }
public static <T, V> T doPOST(final String url, V objParam, Class<T> clazz) throws Exception { public static <T, V> T doPOST(final String url, V objParam, Class<T> clazz) throws IOException {
return mapper.readValue(doPOST(url, objParam), clazz); return mapper.readValue(doPOST(url, objParam), clazz);
} }
private static String doHTTPRequest(final HttpUriRequest r) throws Exception { private static String doHTTPRequest(final HttpUriRequest r) throws IOException {
CloseableHttpClient client = HttpClients.createDefault(); try (CloseableHttpClient client = HttpClients.createDefault()) {
log.info("performing HTTP request, method {} on URI {}", r.getMethod(), r.getURI().toString()); log.info("performing HTTP request, method {} on URI {}", r.getMethod(), r.getURI().toString());
log log
@ -62,8 +65,8 @@ public class DNetRestClient {
.map(h -> h.getName() + ":" + h.getValue()) .map(h -> h.getName() + ":" + h.getValue())
.collect(Collectors.joining(","))); .collect(Collectors.joining(",")));
CloseableHttpResponse response = client.execute(r); return IOUtils.toString(client.execute(r).getEntity().getContent());
return IOUtils.toString(response.getEntity().getContent()); }
} }
private static <T> T doHTTPRequest(final HttpUriRequest r, Class<T> clazz) throws Exception { private static <T> T doHTTPRequest(final HttpUriRequest r, Class<T> clazz) throws Exception {

View File

@ -46,7 +46,7 @@ public class Vocabulary implements Serializable {
} }
public VocabularyTerm getTerm(final String id) { public VocabularyTerm getTerm(final String id) {
return Optional.ofNullable(id).map(s -> s.toLowerCase()).map(s -> terms.get(s)).orElse(null); return Optional.ofNullable(id).map(String::toLowerCase).map(terms::get).orElse(null);
} }
protected void addTerm(final String id, final String name) { protected void addTerm(final String id, final String name) {
@ -81,7 +81,6 @@ public class Vocabulary implements Serializable {
.ofNullable(getTermBySynonym(syn)) .ofNullable(getTermBySynonym(syn))
.map(term -> getTermAsQualifier(term.getId())) .map(term -> getTermAsQualifier(term.getId()))
.orElse(null); .orElse(null);
// .orElse(OafMapperUtils.unknown(getId(), getName()));
} }
} }

View File

@ -46,7 +46,6 @@ public class VocabularyGroup implements Serializable {
} }
vocs.addTerm(vocId, termId, termName); vocs.addTerm(vocId, termId, termName);
// vocs.addSynonyms(vocId, termId, termId);
} }
} }
@ -58,7 +57,6 @@ public class VocabularyGroup implements Serializable {
final String syn = arr[2].trim(); final String syn = arr[2].trim();
vocs.addSynonyms(vocId, termId, syn); vocs.addSynonyms(vocId, termId, syn);
// vocs.addSynonyms(vocId, termId, termId);
} }
} }
@ -98,7 +96,7 @@ public class VocabularyGroup implements Serializable {
.getTerms() .getTerms()
.values() .values()
.stream() .stream()
.map(t -> t.getId()) .map(VocabularyTerm::getId)
.collect(Collectors.toCollection(HashSet::new)); .collect(Collectors.toCollection(HashSet::new));
} }
@ -154,16 +152,19 @@ public class VocabularyGroup implements Serializable {
return Optional return Optional
.ofNullable(vocId) .ofNullable(vocId)
.map(String::toLowerCase) .map(String::toLowerCase)
.map(id -> vocs.containsKey(id)) .map(vocs::containsKey)
.orElse(false); .orElse(false);
} }
private void addSynonyms(final String vocId, final String termId, final String syn) { private void addSynonyms(final String vocId, final String termId, final String syn) {
String id = Optional String id = Optional
.ofNullable(vocId) .ofNullable(vocId)
.map(s -> s.toLowerCase()) .map(String::toLowerCase)
.orElseThrow( .orElseThrow(
() -> new IllegalArgumentException(String.format("empty vocabulary id for [term:%s, synonym:%s]"))); () -> new IllegalArgumentException(
String
.format(
"empty vocabulary id for [term:%s, synonym:%s]", termId, syn)));
Optional Optional
.ofNullable(vocs.get(id)) .ofNullable(vocs.get(id))
.orElseThrow(() -> new IllegalArgumentException("missing vocabulary id: " + vocId)) .orElseThrow(() -> new IllegalArgumentException("missing vocabulary id: " + vocId))

View File

@ -2,7 +2,6 @@
package eu.dnetlib.dhp.message; package eu.dnetlib.dhp.message;
import java.io.Serializable; import java.io.Serializable;
import java.util.HashMap;
import java.util.LinkedHashMap; import java.util.LinkedHashMap;
import java.util.Map; import java.util.Map;
@ -10,8 +9,8 @@ public class Message implements Serializable {
private static final long serialVersionUID = 401753881204524893L; private static final long serialVersionUID = 401753881204524893L;
public static String CURRENT_PARAM = "current"; public static final String CURRENT_PARAM = "current";
public static String TOTAL_PARAM = "total"; public static final String TOTAL_PARAM = "total";
private MessageType messageType; private MessageType messageType;

View File

@ -4,7 +4,6 @@ package eu.dnetlib.dhp.oa.merge;
import java.text.Normalizer; import java.text.Normalizer;
import java.util.*; import java.util.*;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
@ -19,6 +18,9 @@ public class AuthorMerger {
private static final Double THRESHOLD = 0.95; private static final Double THRESHOLD = 0.95;
private AuthorMerger() {
}
public static List<Author> merge(List<List<Author>> authors) { public static List<Author> merge(List<List<Author>> authors) {
authors.sort((o1, o2) -> -Integer.compare(countAuthorsPids(o1), countAuthorsPids(o2))); authors.sort((o1, o2) -> -Integer.compare(countAuthorsPids(o1), countAuthorsPids(o2)));
@ -36,7 +38,8 @@ public class AuthorMerger {
public static List<Author> mergeAuthor(final List<Author> a, final List<Author> b, Double threshold) { public static List<Author> mergeAuthor(final List<Author> a, final List<Author> b, Double threshold) {
int pa = countAuthorsPids(a); int pa = countAuthorsPids(a);
int pb = countAuthorsPids(b); int pb = countAuthorsPids(b);
List<Author> base, enrich; List<Author> base;
List<Author> enrich;
int sa = authorsSize(a); int sa = authorsSize(a);
int sb = authorsSize(b); int sb = authorsSize(b);
@ -62,7 +65,7 @@ public class AuthorMerger {
// <pidComparableString, Author> (if an Author has more than 1 pid, it appears 2 times in the list) // <pidComparableString, Author> (if an Author has more than 1 pid, it appears 2 times in the list)
final Map<String, Author> basePidAuthorMap = base final Map<String, Author> basePidAuthorMap = base
.stream() .stream()
.filter(a -> a.getPid() != null && a.getPid().size() > 0) .filter(a -> a.getPid() != null && !a.getPid().isEmpty())
.flatMap( .flatMap(
a -> a a -> a
.getPid() .getPid()
@ -74,7 +77,7 @@ public class AuthorMerger {
// <pid, Author> (list of pid that are missing in the other list) // <pid, Author> (list of pid that are missing in the other list)
final List<Tuple2<StructuredProperty, Author>> pidToEnrich = enrich final List<Tuple2<StructuredProperty, Author>> pidToEnrich = enrich
.stream() .stream()
.filter(a -> a.getPid() != null && a.getPid().size() > 0) .filter(a -> a.getPid() != null && !a.getPid().isEmpty())
.flatMap( .flatMap(
a -> a a -> a
.getPid() .getPid()
@ -117,9 +120,9 @@ public class AuthorMerger {
} }
public static String pidToComparableString(StructuredProperty pid) { public static String pidToComparableString(StructuredProperty pid) {
return (pid.getQualifier() != null final String classid = pid.getQualifier().getClassid() != null ? pid.getQualifier().getClassid().toLowerCase()
? pid.getQualifier().getClassid() != null ? pid.getQualifier().getClassid().toLowerCase() : "" : "";
: "") return (pid.getQualifier() != null ? classid : "")
+ (pid.getValue() != null ? pid.getValue().toLowerCase() : ""); + (pid.getValue() != null ? pid.getValue().toLowerCase() : "");
} }

View File

@ -12,6 +12,9 @@ import com.ximpleware.VTDNav;
/** Created by sandro on 9/29/16. */ /** Created by sandro on 9/29/16. */
public class VtdUtilityParser { public class VtdUtilityParser {
private VtdUtilityParser() {
}
public static List<Node> getTextValuesWithAttributes( public static List<Node> getTextValuesWithAttributes(
final AutoPilot ap, final VTDNav vn, final String xpath, final List<String> attributes) final AutoPilot ap, final VTDNav vn, final String xpath, final List<String> attributes)
throws VtdException { throws VtdException {

View File

@ -7,22 +7,19 @@ import java.time.format.DateTimeFormatter;
import java.time.format.DateTimeParseException; import java.time.format.DateTimeParseException;
import java.util.*; import java.util.*;
import java.util.function.Function; import java.util.function.Function;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import java.util.stream.Stream; import java.util.stream.Stream;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
import org.jetbrains.annotations.NotNull;
import com.github.sisyphsu.dateparser.DateParserUtils; import com.github.sisyphsu.dateparser.DateParserUtils;
import com.google.common.collect.Lists; import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.collect.Sets; import com.google.common.collect.Sets;
import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.common.ModelSupport; import eu.dnetlib.dhp.schema.common.ModelSupport;
import eu.dnetlib.dhp.schema.oaf.*; import eu.dnetlib.dhp.schema.oaf.*;
import me.xuender.unidecode.Unidecode;
public class GraphCleaningFunctions extends CleaningFunctions { public class GraphCleaningFunctions extends CleaningFunctions {
@ -194,11 +191,15 @@ public class GraphCleaningFunctions extends CleaningFunctions {
.filter(Objects::nonNull) .filter(Objects::nonNull)
.filter(sp -> StringUtils.isNotBlank(sp.getValue())) .filter(sp -> StringUtils.isNotBlank(sp.getValue()))
.filter( .filter(
sp -> sp sp -> {
final String title = sp
.getValue() .getValue()
.toLowerCase() .toLowerCase();
.replaceAll(TITLE_FILTER_REGEX, "") final String residual = Unidecode
.length() > TITLE_FILTER_RESIDUAL_LENGTH) .decode(title)
.replaceAll(TITLE_FILTER_REGEX, "");
return residual.length() > TITLE_FILTER_RESIDUAL_LENGTH;
})
.map(GraphCleaningFunctions::cleanValue) .map(GraphCleaningFunctions::cleanValue)
.collect(Collectors.toList())); .collect(Collectors.toList()));
} }
@ -283,7 +284,7 @@ public class GraphCleaningFunctions extends CleaningFunctions {
r r
.getAuthor() .getAuthor()
.stream() .stream()
.filter(a -> Objects.nonNull(a)) .filter(Objects::nonNull)
.filter(a -> StringUtils.isNotBlank(a.getFullname())) .filter(a -> StringUtils.isNotBlank(a.getFullname()))
.filter(a -> StringUtils.isNotBlank(a.getFullname().replaceAll("[\\W]", ""))) .filter(a -> StringUtils.isNotBlank(a.getFullname().replaceAll("[\\W]", "")))
.collect(Collectors.toList())); .collect(Collectors.toList()));

View File

@ -17,13 +17,16 @@ import eu.dnetlib.dhp.schema.oaf.*;
public class OafMapperUtils { public class OafMapperUtils {
private OafMapperUtils() {
}
public static Oaf merge(final Oaf left, final Oaf right) { public static Oaf merge(final Oaf left, final Oaf right) {
if (ModelSupport.isSubClass(left, OafEntity.class)) { if (ModelSupport.isSubClass(left, OafEntity.class)) {
return mergeEntities((OafEntity) left, (OafEntity) right); return mergeEntities((OafEntity) left, (OafEntity) right);
} else if (ModelSupport.isSubClass(left, Relation.class)) { } else if (ModelSupport.isSubClass(left, Relation.class)) {
((Relation) left).mergeFrom((Relation) right); ((Relation) left).mergeFrom((Relation) right);
} else { } else {
throw new RuntimeException("invalid Oaf type:" + left.getClass().getCanonicalName()); throw new IllegalArgumentException("invalid Oaf type:" + left.getClass().getCanonicalName());
} }
return left; return left;
} }
@ -38,7 +41,7 @@ public class OafMapperUtils {
} else if (ModelSupport.isSubClass(left, Project.class)) { } else if (ModelSupport.isSubClass(left, Project.class)) {
left.mergeFrom(right); left.mergeFrom(right);
} else { } else {
throw new RuntimeException("invalid OafEntity subtype:" + left.getClass().getCanonicalName()); throw new IllegalArgumentException("invalid OafEntity subtype:" + left.getClass().getCanonicalName());
} }
return left; return left;
} }
@ -62,7 +65,7 @@ public class OafMapperUtils {
public static List<KeyValue> listKeyValues(final String... s) { public static List<KeyValue> listKeyValues(final String... s) {
if (s.length % 2 > 0) { if (s.length % 2 > 0) {
throw new RuntimeException("Invalid number of parameters (k,v,k,v,....)"); throw new IllegalArgumentException("Invalid number of parameters (k,v,k,v,....)");
} }
final List<KeyValue> list = new ArrayList<>(); final List<KeyValue> list = new ArrayList<>();
@ -88,7 +91,7 @@ public class OafMapperUtils {
.stream(values) .stream(values)
.map(v -> field(v, info)) .map(v -> field(v, info))
.filter(Objects::nonNull) .filter(Objects::nonNull)
.filter(distinctByKey(f -> f.getValue())) .filter(distinctByKey(Field::getValue))
.collect(Collectors.toList()); .collect(Collectors.toList());
} }
@ -97,7 +100,7 @@ public class OafMapperUtils {
.stream() .stream()
.map(v -> field(v, info)) .map(v -> field(v, info))
.filter(Objects::nonNull) .filter(Objects::nonNull)
.filter(distinctByKey(f -> f.getValue())) .filter(distinctByKey(Field::getValue))
.collect(Collectors.toList()); .collect(Collectors.toList());
} }
@ -342,10 +345,10 @@ public class OafMapperUtils {
if (instanceList != null) { if (instanceList != null) {
final Optional<AccessRight> min = instanceList final Optional<AccessRight> min = instanceList
.stream() .stream()
.map(i -> i.getAccessright()) .map(Instance::getAccessright)
.min(new AccessRightComparator<>()); .min(new AccessRightComparator<>());
final Qualifier rights = min.isPresent() ? qualifier(min.get()) : new Qualifier(); final Qualifier rights = min.map(OafMapperUtils::qualifier).orElseGet(Qualifier::new);
if (StringUtils.isBlank(rights.getClassid())) { if (StringUtils.isBlank(rights.getClassid())) {
rights.setClassid(UNKNOWN); rights.setClassid(UNKNOWN);

View File

@ -34,6 +34,9 @@ public class DHPUtils {
private static final Logger log = LoggerFactory.getLogger(DHPUtils.class); private static final Logger log = LoggerFactory.getLogger(DHPUtils.class);
private DHPUtils() {
}
public static Seq<String> toSeq(List<String> list) { public static Seq<String> toSeq(List<String> list) {
return JavaConverters.asScalaIteratorConverter(list.iterator()).asScala().toSeq(); return JavaConverters.asScalaIteratorConverter(list.iterator()).asScala().toSeq();
} }
@ -44,7 +47,7 @@ public class DHPUtils {
md.update(s.getBytes(StandardCharsets.UTF_8)); md.update(s.getBytes(StandardCharsets.UTF_8));
return new String(Hex.encodeHex(md.digest())); return new String(Hex.encodeHex(md.digest()));
} catch (final Exception e) { } catch (final Exception e) {
System.err.println("Error creating id"); log.error("Error creating id from {}", s);
return null; return null;
} }
} }
@ -53,33 +56,6 @@ public class DHPUtils {
return String.format("%s::%s", nsPrefix, DHPUtils.md5(originalId)); return String.format("%s::%s", nsPrefix, DHPUtils.md5(originalId));
} }
public static String compressString(final String input) {
try (ByteArrayOutputStream out = new ByteArrayOutputStream();
Base64OutputStream b64os = new Base64OutputStream(out)) {
GZIPOutputStream gzip = new GZIPOutputStream(b64os);
gzip.write(input.getBytes(StandardCharsets.UTF_8));
gzip.close();
return out.toString();
} catch (Throwable e) {
return null;
}
}
public static String decompressString(final String input) {
byte[] byteArray = Base64.decodeBase64(input.getBytes());
int len;
try (GZIPInputStream gis = new GZIPInputStream(new ByteArrayInputStream((byteArray)));
ByteArrayOutputStream bos = new ByteArrayOutputStream(byteArray.length)) {
byte[] buffer = new byte[1024];
while ((len = gis.read(buffer)) != -1) {
bos.write(buffer, 0, len);
}
return bos.toString();
} catch (Exception e) {
return null;
}
}
public static String getJPathString(final String jsonPath, final String json) { public static String getJPathString(final String jsonPath, final String json) {
try { try {
Object o = JsonPath.read(json, jsonPath); Object o = JsonPath.read(json, jsonPath);

View File

@ -18,13 +18,16 @@ public class ISLookupClientFactory {
private static final int requestTimeout = 60000 * 10; private static final int requestTimeout = 60000 * 10;
private static final int connectTimeout = 60000 * 10; private static final int connectTimeout = 60000 * 10;
private ISLookupClientFactory() {
}
public static ISLookUpService getLookUpService(final String isLookupUrl) { public static ISLookUpService getLookUpService(final String isLookupUrl) {
return getServiceStub(ISLookUpService.class, isLookupUrl); return getServiceStub(ISLookUpService.class, isLookupUrl);
} }
@SuppressWarnings("unchecked") @SuppressWarnings("unchecked")
private static <T> T getServiceStub(final Class<T> clazz, final String endpoint) { private static <T> T getServiceStub(final Class<T> clazz, final String endpoint) {
log.info(String.format("creating %s stub from %s", clazz.getName(), endpoint)); log.info("creating {} stub from {}", clazz.getName(), endpoint);
final JaxWsProxyFactoryBean jaxWsProxyFactory = new JaxWsProxyFactoryBean(); final JaxWsProxyFactoryBean jaxWsProxyFactory = new JaxWsProxyFactoryBean();
jaxWsProxyFactory.setServiceClass(clazz); jaxWsProxyFactory.setServiceClass(clazz);
jaxWsProxyFactory.setAddress(endpoint); jaxWsProxyFactory.setAddress(endpoint);
@ -38,12 +41,10 @@ public class ISLookupClientFactory {
log log
.info( .info(
String "setting connectTimeout to {}, requestTimeout to {} for service {}",
.format(
"setting connectTimeout to %s, requestTimeout to %s for service %s",
connectTimeout, connectTimeout,
requestTimeout, requestTimeout,
clazz.getCanonicalName())); clazz.getCanonicalName());
policy.setConnectionTimeout(connectTimeout); policy.setConnectionTimeout(connectTimeout);
policy.setReceiveTimeout(requestTimeout); policy.setReceiveTimeout(requestTimeout);

View File

@ -10,7 +10,7 @@ import net.sf.saxon.trans.XPathException;
public abstract class AbstractExtensionFunction extends ExtensionFunctionDefinition { public abstract class AbstractExtensionFunction extends ExtensionFunctionDefinition {
public static String DEFAULT_SAXON_EXT_NS_URI = "http://www.d-net.research-infrastructures.eu/saxon-extension"; public static final String DEFAULT_SAXON_EXT_NS_URI = "http://www.d-net.research-infrastructures.eu/saxon-extension";
public abstract String getName(); public abstract String getName();

View File

@ -26,7 +26,7 @@ public class ExtractYear extends AbstractExtensionFunction {
@Override @Override
public Sequence doCall(XPathContext context, Sequence[] arguments) throws XPathException { public Sequence doCall(XPathContext context, Sequence[] arguments) throws XPathException {
if (arguments == null | arguments.length == 0) { if (arguments == null || arguments.length == 0) {
return new StringValue(""); return new StringValue("");
} }
final Item item = arguments[0].head(); final Item item = arguments[0].head();
@ -63,8 +63,7 @@ public class ExtractYear extends AbstractExtensionFunction {
for (String format : dateFormats) { for (String format : dateFormats) {
try { try {
c.setTime(new SimpleDateFormat(format).parse(s)); c.setTime(new SimpleDateFormat(format).parse(s));
String year = String.valueOf(c.get(Calendar.YEAR)); return String.valueOf(c.get(Calendar.YEAR));
return year;
} catch (ParseException e) { } catch (ParseException e) {
} }
} }

View File

@ -30,7 +30,7 @@ public class NormalizeDate extends AbstractExtensionFunction {
@Override @Override
public Sequence doCall(XPathContext context, Sequence[] arguments) throws XPathException { public Sequence doCall(XPathContext context, Sequence[] arguments) throws XPathException {
if (arguments == null | arguments.length == 0) { if (arguments == null || arguments.length == 0) {
return new StringValue(BLANK); return new StringValue(BLANK);
} }
String s = arguments[0].head().getStringValue(); String s = arguments[0].head().getStringValue();

View File

@ -1,6 +1,8 @@
package eu.dnetlib.dhp.utils.saxon; package eu.dnetlib.dhp.utils.saxon;
import static org.apache.commons.lang3.StringUtils.isNotBlank;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
import net.sf.saxon.expr.XPathContext; import net.sf.saxon.expr.XPathContext;
@ -26,7 +28,8 @@ public class PickFirst extends AbstractExtensionFunction {
final String s1 = getValue(arguments[0]); final String s1 = getValue(arguments[0]);
final String s2 = getValue(arguments[1]); final String s2 = getValue(arguments[1]);
return new StringValue(StringUtils.isNotBlank(s1) ? s1 : StringUtils.isNotBlank(s2) ? s2 : ""); final String value = isNotBlank(s1) ? s1 : isNotBlank(s2) ? s2 : "";
return new StringValue(value);
} }
private String getValue(final Sequence arg) throws XPathException { private String getValue(final Sequence arg) throws XPathException {

View File

@ -12,6 +12,9 @@ import net.sf.saxon.TransformerFactoryImpl;
public class SaxonTransformerFactory { public class SaxonTransformerFactory {
private SaxonTransformerFactory() {
}
/** /**
* Creates the index record transformer from the given XSLT * Creates the index record transformer from the given XSLT
* *

View File

@ -7,10 +7,10 @@ import static org.junit.jupiter.api.Assertions.assertNotNull;
import org.apache.commons.io.IOUtils; import org.apache.commons.io.IOUtils;
import org.junit.jupiter.api.Test; import org.junit.jupiter.api.Test;
public class ArgumentApplicationParserTest { class ArgumentApplicationParserTest {
@Test @Test
public void testParseParameter() throws Exception { void testParseParameter() throws Exception {
final String jsonConfiguration = IOUtils final String jsonConfiguration = IOUtils
.toString( .toString(
this.getClass().getResourceAsStream("/eu/dnetlib/application/parameters.json")); this.getClass().getResourceAsStream("/eu/dnetlib/application/parameters.json"));

View File

@ -21,13 +21,13 @@ public class HdfsSupportTest {
class Remove { class Remove {
@Test @Test
public void shouldThrowARuntimeExceptionOnError() { void shouldThrowARuntimeExceptionOnError() {
// when // when
assertThrows(RuntimeException.class, () -> HdfsSupport.remove(null, new Configuration())); assertThrows(RuntimeException.class, () -> HdfsSupport.remove(null, new Configuration()));
} }
@Test @Test
public void shouldRemoveADirFromHDFS(@TempDir Path tempDir) { void shouldRemoveADirFromHDFS(@TempDir Path tempDir) {
// when // when
HdfsSupport.remove(tempDir.toString(), new Configuration()); HdfsSupport.remove(tempDir.toString(), new Configuration());
@ -36,7 +36,7 @@ public class HdfsSupportTest {
} }
@Test @Test
public void shouldRemoveAFileFromHDFS(@TempDir Path tempDir) throws IOException { void shouldRemoveAFileFromHDFS(@TempDir Path tempDir) throws IOException {
// given // given
Path file = Files.createTempFile(tempDir, "p", "s"); Path file = Files.createTempFile(tempDir, "p", "s");
@ -52,13 +52,13 @@ public class HdfsSupportTest {
class ListFiles { class ListFiles {
@Test @Test
public void shouldThrowARuntimeExceptionOnError() { void shouldThrowARuntimeExceptionOnError() {
// when // when
assertThrows(RuntimeException.class, () -> HdfsSupport.listFiles(null, new Configuration())); assertThrows(RuntimeException.class, () -> HdfsSupport.listFiles(null, new Configuration()));
} }
@Test @Test
public void shouldListFilesLocatedInPath(@TempDir Path tempDir) throws IOException { void shouldListFilesLocatedInPath(@TempDir Path tempDir) throws IOException {
Path subDir1 = Files.createTempDirectory(tempDir, "list_me"); Path subDir1 = Files.createTempDirectory(tempDir, "list_me");
Path subDir2 = Files.createTempDirectory(tempDir, "list_me"); Path subDir2 = Files.createTempDirectory(tempDir, "list_me");

View File

@ -5,10 +5,10 @@ import static org.junit.jupiter.api.Assertions.*;
import org.junit.jupiter.api.Test; import org.junit.jupiter.api.Test;
public class PacePersonTest { class PacePersonTest {
@Test @Test
public void pacePersonTest1() { void pacePersonTest1() {
PacePerson p = new PacePerson("Artini, Michele", false); PacePerson p = new PacePerson("Artini, Michele", false);
assertEquals("Artini", p.getSurnameString()); assertEquals("Artini", p.getSurnameString());
@ -17,7 +17,7 @@ public class PacePersonTest {
} }
@Test @Test
public void pacePersonTest2() { void pacePersonTest2() {
PacePerson p = new PacePerson("Michele G. Artini", false); PacePerson p = new PacePerson("Michele G. Artini", false);
assertEquals("Artini, Michele G.", p.getNormalisedFullname()); assertEquals("Artini, Michele G.", p.getNormalisedFullname());
assertEquals("Michele G", p.getNameString()); assertEquals("Michele G", p.getNameString());

View File

@ -18,7 +18,8 @@ public class SparkSessionSupportTest {
class RunWithSparkSession { class RunWithSparkSession {
@Test @Test
public void shouldExecuteFunctionAndNotStopSparkSessionWhenSparkSessionIsNotManaged() @SuppressWarnings("unchecked")
void shouldExecuteFunctionAndNotStopSparkSessionWhenSparkSessionIsNotManaged()
throws Exception { throws Exception {
// given // given
SparkSession spark = mock(SparkSession.class); SparkSession spark = mock(SparkSession.class);
@ -37,7 +38,8 @@ public class SparkSessionSupportTest {
} }
@Test @Test
public void shouldExecuteFunctionAndStopSparkSessionWhenSparkSessionIsManaged() @SuppressWarnings("unchecked")
void shouldExecuteFunctionAndStopSparkSessionWhenSparkSessionIsManaged()
throws Exception { throws Exception {
// given // given
SparkSession spark = mock(SparkSession.class); SparkSession spark = mock(SparkSession.class);

View File

@ -12,7 +12,7 @@ import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.Test; import org.junit.jupiter.api.Test;
@Disabled @Disabled
public class ZenodoAPIClientTest { class ZenodoAPIClientTest {
private final String URL_STRING = "https://sandbox.zenodo.org/api/deposit/depositions"; private final String URL_STRING = "https://sandbox.zenodo.org/api/deposit/depositions";
private final String ACCESS_TOKEN = ""; private final String ACCESS_TOKEN = "";
@ -22,7 +22,7 @@ public class ZenodoAPIClientTest {
private final String depositionId = "674915"; private final String depositionId = "674915";
@Test @Test
public void testUploadOldDeposition() throws IOException, MissingConceptDoiException { void testUploadOldDeposition() throws IOException, MissingConceptDoiException {
ZenodoAPIClient client = new ZenodoAPIClient(URL_STRING, ZenodoAPIClient client = new ZenodoAPIClient(URL_STRING,
ACCESS_TOKEN); ACCESS_TOKEN);
Assertions.assertEquals(200, client.uploadOpenDeposition(depositionId)); Assertions.assertEquals(200, client.uploadOpenDeposition(depositionId));
@ -44,7 +44,7 @@ public class ZenodoAPIClientTest {
} }
@Test @Test
public void testNewDeposition() throws IOException { void testNewDeposition() throws IOException {
ZenodoAPIClient client = new ZenodoAPIClient(URL_STRING, ZenodoAPIClient client = new ZenodoAPIClient(URL_STRING,
ACCESS_TOKEN); ACCESS_TOKEN);
@ -67,7 +67,7 @@ public class ZenodoAPIClientTest {
} }
@Test @Test
public void testNewVersionNewName() throws IOException, MissingConceptDoiException { void testNewVersionNewName() throws IOException, MissingConceptDoiException {
ZenodoAPIClient client = new ZenodoAPIClient(URL_STRING, ZenodoAPIClient client = new ZenodoAPIClient(URL_STRING,
ACCESS_TOKEN); ACCESS_TOKEN);
@ -87,7 +87,7 @@ public class ZenodoAPIClientTest {
} }
@Test @Test
public void testNewVersionOldName() throws IOException, MissingConceptDoiException { void testNewVersionOldName() throws IOException, MissingConceptDoiException {
ZenodoAPIClient client = new ZenodoAPIClient(URL_STRING, ZenodoAPIClient client = new ZenodoAPIClient(URL_STRING,
ACCESS_TOKEN); ACCESS_TOKEN);

View File

@ -21,7 +21,7 @@ import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
import eu.dnetlib.pace.util.MapDocumentUtil; import eu.dnetlib.pace.util.MapDocumentUtil;
import scala.Tuple2; import scala.Tuple2;
public class AuthorMergerTest { class AuthorMergerTest {
private String publicationsBasePath; private String publicationsBasePath;
@ -43,7 +43,7 @@ public class AuthorMergerTest {
} }
@Test @Test
public void mergeTest() { // used in the dedup: threshold set to 0.95 void mergeTest() { // used in the dedup: threshold set to 0.95
for (List<Author> authors1 : authors) { for (List<Author> authors1 : authors) {
System.out.println("List " + (authors.indexOf(authors1) + 1)); System.out.println("List " + (authors.indexOf(authors1) + 1));

View File

@ -4,12 +4,8 @@ package eu.dnetlib.dhp.schema.oaf.utils;
import static org.junit.jupiter.api.Assertions.*; import static org.junit.jupiter.api.Assertions.*;
import java.io.IOException; import java.io.IOException;
import java.time.LocalDate;
import java.time.format.DateTimeFormatter;
import java.util.HashSet; import java.util.HashSet;
import java.util.List; import java.util.List;
import java.util.Locale;
import java.util.Optional;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import org.apache.commons.io.IOUtils; import org.apache.commons.io.IOUtils;
@ -19,15 +15,34 @@ import com.fasterxml.jackson.databind.DeserializationFeature;
import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.oaf.*; import eu.dnetlib.dhp.schema.oaf.Dataset;
import eu.dnetlib.dhp.schema.oaf.KeyValue;
import eu.dnetlib.dhp.schema.oaf.Publication;
import eu.dnetlib.dhp.schema.oaf.Result;
import me.xuender.unidecode.Unidecode;
public class OafMapperUtilsTest { class OafMapperUtilsTest {
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper() private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper()
.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); .configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false);
@Test @Test
public void testDateValidation() { public void testUnidecode() {
assertEquals("Liu Ben Mu hiruzuSen tawa", Unidecode.decode("六本木ヒルズ森タワ"));
assertEquals("Nan Wu A Mi Tuo Fo", Unidecode.decode("南无阿弥陀佛"));
assertEquals("Yi Tiao Hui Zou Lu De Yu", Unidecode.decode("一条会走路的鱼"));
assertEquals("amidaniyorai", Unidecode.decode("あみだにょらい"));
assertEquals("T`owrk`iayi", Unidecode.decode("Թուրքիայի"));
assertEquals("Obzor tematiki", Unidecode.decode("Обзор тематики"));
assertEquals("GERMANSKIE IaZYKI", Unidecode.decode("ГЕРМАНСКИЕ ЯЗЫКИ"));
assertEquals("Diereunese tes ikanopoieses", Unidecode.decode("Διερεύνηση της ικανοποίησης"));
assertEquals("lqDy l'wly@", Unidecode.decode("القضايا الأولية"));
assertEquals("abc def ghi", Unidecode.decode("abc def ghi"));
}
@Test
void testDateValidation() {
assertTrue(GraphCleaningFunctions.doCleanDate("2016-05-07T12:41:19.202Z ").isPresent()); assertTrue(GraphCleaningFunctions.doCleanDate("2016-05-07T12:41:19.202Z ").isPresent());
assertTrue(GraphCleaningFunctions.doCleanDate("2020-09-10 11:08:52 ").isPresent()); assertTrue(GraphCleaningFunctions.doCleanDate("2020-09-10 11:08:52 ").isPresent());
@ -132,44 +147,46 @@ public class OafMapperUtilsTest {
} }
@Test @Test
public void testDate() { void testDate() {
System.out.println(GraphCleaningFunctions.cleanDate("23-FEB-1998")); final String date = GraphCleaningFunctions.cleanDate("23-FEB-1998");
assertNotNull(date);
System.out.println(date);
} }
@Test @Test
public void testMergePubs() throws IOException { void testMergePubs() throws IOException {
Publication p1 = read("publication_1.json", Publication.class); Publication p1 = read("publication_1.json", Publication.class);
Publication p2 = read("publication_2.json", Publication.class); Publication p2 = read("publication_2.json", Publication.class);
Dataset d1 = read("dataset_1.json", Dataset.class); Dataset d1 = read("dataset_1.json", Dataset.class);
Dataset d2 = read("dataset_2.json", Dataset.class); Dataset d2 = read("dataset_2.json", Dataset.class);
assertEquals(p1.getCollectedfrom().size(), 1); assertEquals(1, p1.getCollectedfrom().size());
assertEquals(p1.getCollectedfrom().get(0).getKey(), ModelConstants.CROSSREF_ID); assertEquals(ModelConstants.CROSSREF_ID, p1.getCollectedfrom().get(0).getKey());
assertEquals(d2.getCollectedfrom().size(), 1); assertEquals(1, d2.getCollectedfrom().size());
assertFalse(cfId(d2.getCollectedfrom()).contains(ModelConstants.CROSSREF_ID)); assertFalse(cfId(d2.getCollectedfrom()).contains(ModelConstants.CROSSREF_ID));
assertTrue( assertEquals(
ModelConstants.PUBLICATION_RESULTTYPE_CLASSID,
OafMapperUtils OafMapperUtils
.mergeResults(p1, d2) .mergeResults(p1, d2)
.getResulttype() .getResulttype()
.getClassid() .getClassid());
.equals(ModelConstants.PUBLICATION_RESULTTYPE_CLASSID));
assertEquals(p2.getCollectedfrom().size(), 1); assertEquals(1, p2.getCollectedfrom().size());
assertFalse(cfId(p2.getCollectedfrom()).contains(ModelConstants.CROSSREF_ID)); assertFalse(cfId(p2.getCollectedfrom()).contains(ModelConstants.CROSSREF_ID));
assertEquals(d1.getCollectedfrom().size(), 1); assertEquals(1, d1.getCollectedfrom().size());
assertTrue(cfId(d1.getCollectedfrom()).contains(ModelConstants.CROSSREF_ID)); assertTrue(cfId(d1.getCollectedfrom()).contains(ModelConstants.CROSSREF_ID));
assertTrue( assertEquals(
ModelConstants.DATASET_RESULTTYPE_CLASSID,
OafMapperUtils OafMapperUtils
.mergeResults(p2, d1) .mergeResults(p2, d1)
.getResulttype() .getResulttype()
.getClassid() .getClassid());
.equals(ModelConstants.DATASET_RESULTTYPE_CLASSID));
} }
protected HashSet<String> cfId(List<KeyValue> collectedfrom) { protected HashSet<String> cfId(List<KeyValue> collectedfrom) {
return collectedfrom.stream().map(c -> c.getKey()).collect(Collectors.toCollection(HashSet::new)); return collectedfrom.stream().map(KeyValue::getKey).collect(Collectors.toCollection(HashSet::new));
} }
protected <T extends Result> T read(String filename, Class<T> clazz) throws IOException { protected <T extends Result> T read(String filename, Class<T> clazz) throws IOException {

View File

@ -3,10 +3,10 @@ package eu.dnetlib.scholexplorer.relation;
import org.junit.jupiter.api.Test; import org.junit.jupiter.api.Test;
public class RelationMapperTest { class RelationMapperTest {
@Test @Test
public void testLoadRels() throws Exception { void testLoadRels() throws Exception {
RelationMapper relationMapper = RelationMapper.load(); RelationMapper relationMapper = RelationMapper.load();
relationMapper.keySet().forEach(System.out::println); relationMapper.keySet().forEach(System.out::println);

View File

@ -3,40 +3,37 @@ package eu.dnetlib.dhp.actionmanager;
import java.io.Serializable; import java.io.Serializable;
import java.io.StringReader; import java.io.StringReader;
import java.util.*; import java.util.List;
import java.util.NoSuchElementException;
import java.util.Optional;
import java.util.Set;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import org.apache.commons.lang3.tuple.Triple; import org.apache.commons.lang3.tuple.Triple;
import org.dom4j.Document; import org.dom4j.Document;
import org.dom4j.DocumentException; import org.dom4j.DocumentException;
import org.dom4j.Element;
import org.dom4j.io.SAXReader; import org.dom4j.io.SAXReader;
import org.jetbrains.annotations.NotNull;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import org.xml.sax.SAXException;
import com.google.common.base.Joiner; import com.google.common.base.Joiner;
import com.google.common.base.Splitter; import com.google.common.base.Splitter;
import com.google.common.collect.Iterables; import com.google.common.collect.Iterables;
import com.google.common.collect.Lists;
import com.google.common.collect.Sets; import com.google.common.collect.Sets;
import eu.dnetlib.actionmanager.rmi.ActionManagerException; import eu.dnetlib.actionmanager.rmi.ActionManagerException;
import eu.dnetlib.actionmanager.set.ActionManagerSet;
import eu.dnetlib.actionmanager.set.ActionManagerSet.ImpactTypes;
import eu.dnetlib.dhp.actionmanager.partition.PartitionActionSetsByPayloadTypeJob;
import eu.dnetlib.dhp.utils.ISLookupClientFactory; import eu.dnetlib.dhp.utils.ISLookupClientFactory;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
import scala.Tuple2;
public class ISClient implements Serializable { public class ISClient implements Serializable {
private static final Logger log = LoggerFactory.getLogger(PartitionActionSetsByPayloadTypeJob.class); private static final Logger log = LoggerFactory.getLogger(ISClient.class);
private static final String INPUT_ACTION_SET_ID_SEPARATOR = ","; private static final String INPUT_ACTION_SET_ID_SEPARATOR = ",";
private final ISLookUpService isLookup; private final transient ISLookUpService isLookup;
public ISClient(String isLookupUrl) { public ISClient(String isLookupUrl) {
isLookup = ISLookupClientFactory.getLookUpService(isLookupUrl); isLookup = ISLookupClientFactory.getLookUpService(isLookupUrl);
@ -63,7 +60,7 @@ public class ISClient implements Serializable {
.map( .map(
sets -> sets sets -> sets
.stream() .stream()
.map(set -> parseSetInfo(set)) .map(ISClient::parseSetInfo)
.filter(t -> ids.contains(t.getLeft())) .filter(t -> ids.contains(t.getLeft()))
.map(t -> buildDirectory(basePath, t)) .map(t -> buildDirectory(basePath, t))
.collect(Collectors.toList())) .collect(Collectors.toList()))
@ -73,15 +70,17 @@ public class ISClient implements Serializable {
} }
} }
private Triple<String, String, String> parseSetInfo(String set) { private static Triple<String, String, String> parseSetInfo(String set) {
try { try {
Document doc = new SAXReader().read(new StringReader(set)); final SAXReader reader = new SAXReader();
reader.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true);
Document doc = reader.read(new StringReader(set));
return Triple return Triple
.of( .of(
doc.valueOf("//SET/@id"), doc.valueOf("//SET/@id"),
doc.valueOf("//SET/@directory"), doc.valueOf("//SET/@directory"),
doc.valueOf("//SET/@latest")); doc.valueOf("//SET/@latest"));
} catch (DocumentException e) { } catch (DocumentException | SAXException e) {
throw new IllegalStateException(e); throw new IllegalStateException(e);
} }
} }
@ -99,7 +98,7 @@ public class ISClient implements Serializable {
final String q = "for $x in /RESOURCE_PROFILE[.//RESOURCE_TYPE/@value='ActionManagerServiceResourceType'] return $x//SERVICE_PROPERTIES/PROPERTY[./@ key='" final String q = "for $x in /RESOURCE_PROFILE[.//RESOURCE_TYPE/@value='ActionManagerServiceResourceType'] return $x//SERVICE_PROPERTIES/PROPERTY[./@ key='"
+ propertyName + propertyName
+ "']/@value/string()"; + "']/@value/string()";
log.debug("quering for service property: " + q); log.debug("quering for service property: {}", q);
try { try {
final List<String> value = isLookup.quickSearchProfile(q); final List<String> value = isLookup.quickSearchProfile(q);
return Iterables.getOnlyElement(value); return Iterables.getOnlyElement(value);

View File

@ -62,6 +62,7 @@ public class MergeAndGet {
x.getClass().getCanonicalName(), y.getClass().getCanonicalName())); x.getClass().getCanonicalName(), y.getClass().getCanonicalName()));
} }
@SuppressWarnings("unchecked")
private static <G extends Oaf, A extends Oaf> G selectNewerAndGet(G x, A y) { private static <G extends Oaf, A extends Oaf> G selectNewerAndGet(G x, A y) {
if (x.getClass().equals(y.getClass()) if (x.getClass().equals(y.getClass())
&& x.getLastupdatetimestamp() > y.getLastupdatetimestamp()) { && x.getLastupdatetimestamp() > y.getLastupdatetimestamp()) {

View File

@ -74,7 +74,9 @@ public class PromoteActionPayloadForGraphTableJob {
.orElse(true); .orElse(true);
logger.info("shouldGroupById: {}", shouldGroupById); logger.info("shouldGroupById: {}", shouldGroupById);
@SuppressWarnings("unchecked")
Class<? extends Oaf> rowClazz = (Class<? extends Oaf>) Class.forName(graphTableClassName); Class<? extends Oaf> rowClazz = (Class<? extends Oaf>) Class.forName(graphTableClassName);
@SuppressWarnings("unchecked")
Class<? extends Oaf> actionPayloadClazz = (Class<? extends Oaf>) Class.forName(actionPayloadClassName); Class<? extends Oaf> actionPayloadClazz = (Class<? extends Oaf>) Class.forName(actionPayloadClassName);
throwIfGraphTableClassIsNotSubClassOfActionPayloadClass(rowClazz, actionPayloadClazz); throwIfGraphTableClassIsNotSubClassOfActionPayloadClass(rowClazz, actionPayloadClazz);
@ -152,7 +154,7 @@ public class PromoteActionPayloadForGraphTableJob {
return spark return spark
.read() .read()
.parquet(path) .parquet(path)
.map((MapFunction<Row, String>) value -> extractPayload(value), Encoders.STRING()) .map((MapFunction<Row, String>) PromoteActionPayloadForGraphTableJob::extractPayload, Encoders.STRING())
.map( .map(
(MapFunction<String, A>) value -> decodePayload(actionPayloadClazz, value), (MapFunction<String, A>) value -> decodePayload(actionPayloadClazz, value),
Encoders.bean(actionPayloadClazz)); Encoders.bean(actionPayloadClazz));

View File

@ -80,7 +80,7 @@ public class PartitionActionSetsByPayloadTypeJobTest {
private ISClient isClient; private ISClient isClient;
@Test @Test
public void shouldPartitionActionSetsByPayloadType(@TempDir Path workingDir) throws Exception { void shouldPartitionActionSetsByPayloadType(@TempDir Path workingDir) throws Exception {
// given // given
Path inputActionSetsBaseDir = workingDir.resolve("input").resolve("action_sets"); Path inputActionSetsBaseDir = workingDir.resolve("input").resolve("action_sets");
Path outputDir = workingDir.resolve("output"); Path outputDir = workingDir.resolve("output");

View File

@ -20,7 +20,7 @@ public class MergeAndGetTest {
class MergeFromAndGetStrategy { class MergeFromAndGetStrategy {
@Test @Test
public void shouldThrowForOafAndOaf() { void shouldThrowForOafAndOaf() {
// given // given
Oaf a = mock(Oaf.class); Oaf a = mock(Oaf.class);
Oaf b = mock(Oaf.class); Oaf b = mock(Oaf.class);
@ -33,7 +33,7 @@ public class MergeAndGetTest {
} }
@Test @Test
public void shouldThrowForOafAndRelation() { void shouldThrowForOafAndRelation() {
// given // given
Oaf a = mock(Oaf.class); Oaf a = mock(Oaf.class);
Relation b = mock(Relation.class); Relation b = mock(Relation.class);
@ -46,7 +46,7 @@ public class MergeAndGetTest {
} }
@Test @Test
public void shouldThrowForOafAndOafEntity() { void shouldThrowForOafAndOafEntity() {
// given // given
Oaf a = mock(Oaf.class); Oaf a = mock(Oaf.class);
OafEntity b = mock(OafEntity.class); OafEntity b = mock(OafEntity.class);
@ -59,7 +59,7 @@ public class MergeAndGetTest {
} }
@Test @Test
public void shouldThrowForRelationAndOaf() { void shouldThrowForRelationAndOaf() {
// given // given
Relation a = mock(Relation.class); Relation a = mock(Relation.class);
Oaf b = mock(Oaf.class); Oaf b = mock(Oaf.class);
@ -72,7 +72,7 @@ public class MergeAndGetTest {
} }
@Test @Test
public void shouldThrowForRelationAndOafEntity() { void shouldThrowForRelationAndOafEntity() {
// given // given
Relation a = mock(Relation.class); Relation a = mock(Relation.class);
OafEntity b = mock(OafEntity.class); OafEntity b = mock(OafEntity.class);
@ -85,7 +85,7 @@ public class MergeAndGetTest {
} }
@Test @Test
public void shouldBehaveProperlyForRelationAndRelation() { void shouldBehaveProperlyForRelationAndRelation() {
// given // given
Relation a = mock(Relation.class); Relation a = mock(Relation.class);
Relation b = mock(Relation.class); Relation b = mock(Relation.class);
@ -101,7 +101,7 @@ public class MergeAndGetTest {
} }
@Test @Test
public void shouldThrowForOafEntityAndOaf() { void shouldThrowForOafEntityAndOaf() {
// given // given
OafEntity a = mock(OafEntity.class); OafEntity a = mock(OafEntity.class);
Oaf b = mock(Oaf.class); Oaf b = mock(Oaf.class);
@ -114,7 +114,7 @@ public class MergeAndGetTest {
} }
@Test @Test
public void shouldThrowForOafEntityAndRelation() { void shouldThrowForOafEntityAndRelation() {
// given // given
OafEntity a = mock(OafEntity.class); OafEntity a = mock(OafEntity.class);
Relation b = mock(Relation.class); Relation b = mock(Relation.class);
@ -127,7 +127,7 @@ public class MergeAndGetTest {
} }
@Test @Test
public void shouldThrowForOafEntityAndOafEntityButNotSubclasses() { void shouldThrowForOafEntityAndOafEntityButNotSubclasses() {
// given // given
class OafEntitySub1 extends OafEntity { class OafEntitySub1 extends OafEntity {
} }
@ -145,7 +145,7 @@ public class MergeAndGetTest {
} }
@Test @Test
public void shouldBehaveProperlyForOafEntityAndOafEntity() { void shouldBehaveProperlyForOafEntityAndOafEntity() {
// given // given
OafEntity a = mock(OafEntity.class); OafEntity a = mock(OafEntity.class);
OafEntity b = mock(OafEntity.class); OafEntity b = mock(OafEntity.class);
@ -165,7 +165,7 @@ public class MergeAndGetTest {
class SelectNewerAndGetStrategy { class SelectNewerAndGetStrategy {
@Test @Test
public void shouldThrowForOafEntityAndRelation() { void shouldThrowForOafEntityAndRelation() {
// given // given
OafEntity a = mock(OafEntity.class); OafEntity a = mock(OafEntity.class);
Relation b = mock(Relation.class); Relation b = mock(Relation.class);
@ -178,7 +178,7 @@ public class MergeAndGetTest {
} }
@Test @Test
public void shouldThrowForRelationAndOafEntity() { void shouldThrowForRelationAndOafEntity() {
// given // given
Relation a = mock(Relation.class); Relation a = mock(Relation.class);
OafEntity b = mock(OafEntity.class); OafEntity b = mock(OafEntity.class);
@ -191,7 +191,7 @@ public class MergeAndGetTest {
} }
@Test @Test
public void shouldThrowForOafEntityAndResult() { void shouldThrowForOafEntityAndResult() {
// given // given
OafEntity a = mock(OafEntity.class); OafEntity a = mock(OafEntity.class);
Result b = mock(Result.class); Result b = mock(Result.class);
@ -204,7 +204,7 @@ public class MergeAndGetTest {
} }
@Test @Test
public void shouldThrowWhenSuperTypeIsNewerForResultAndOafEntity() { void shouldThrowWhenSuperTypeIsNewerForResultAndOafEntity() {
// given // given
// real types must be used because subclass-superclass resolution does not work for // real types must be used because subclass-superclass resolution does not work for
// mocks // mocks
@ -221,7 +221,7 @@ public class MergeAndGetTest {
} }
@Test @Test
public void shouldShouldReturnLeftForOafEntityAndOafEntity() { void shouldShouldReturnLeftForOafEntityAndOafEntity() {
// given // given
OafEntity a = mock(OafEntity.class); OafEntity a = mock(OafEntity.class);
when(a.getLastupdatetimestamp()).thenReturn(1L); when(a.getLastupdatetimestamp()).thenReturn(1L);
@ -238,7 +238,7 @@ public class MergeAndGetTest {
} }
@Test @Test
public void shouldShouldReturnRightForOafEntityAndOafEntity() { void shouldShouldReturnRightForOafEntityAndOafEntity() {
// given // given
OafEntity a = mock(OafEntity.class); OafEntity a = mock(OafEntity.class);
when(a.getLastupdatetimestamp()).thenReturn(2L); when(a.getLastupdatetimestamp()).thenReturn(2L);

View File

@ -77,7 +77,7 @@ public class PromoteActionPayloadForGraphTableJobTest {
class Main { class Main {
@Test @Test
public void shouldThrowWhenGraphTableClassIsNotASubClassOfActionPayloadClass() { void shouldThrowWhenGraphTableClassIsNotASubClassOfActionPayloadClass() {
// given // given
Class<Relation> rowClazz = Relation.class; Class<Relation> rowClazz = Relation.class;
Class<OafEntity> actionPayloadClazz = OafEntity.class; Class<OafEntity> actionPayloadClazz = OafEntity.class;
@ -116,7 +116,7 @@ public class PromoteActionPayloadForGraphTableJobTest {
@ParameterizedTest(name = "strategy: {0}, graph table: {1}, action payload: {2}") @ParameterizedTest(name = "strategy: {0}, graph table: {1}, action payload: {2}")
@MethodSource("eu.dnetlib.dhp.actionmanager.promote.PromoteActionPayloadForGraphTableJobTest#promoteJobTestParams") @MethodSource("eu.dnetlib.dhp.actionmanager.promote.PromoteActionPayloadForGraphTableJobTest#promoteJobTestParams")
public void shouldPromoteActionPayloadForGraphTable( void shouldPromoteActionPayloadForGraphTable(
MergeAndGet.Strategy strategy, MergeAndGet.Strategy strategy,
Class<? extends Oaf> rowClazz, Class<? extends Oaf> rowClazz,
Class<? extends Oaf> actionPayloadClazz) Class<? extends Oaf> actionPayloadClazz)

View File

@ -44,7 +44,7 @@ public class PromoteActionPayloadFunctionsTest {
class JoinTableWithActionPayloadAndMerge { class JoinTableWithActionPayloadAndMerge {
@Test @Test
public void shouldThrowWhenTableTypeIsNotSubtypeOfActionPayloadType() { void shouldThrowWhenTableTypeIsNotSubtypeOfActionPayloadType() {
// given // given
class OafImpl extends Oaf { class OafImpl extends Oaf {
} }
@ -58,7 +58,7 @@ public class PromoteActionPayloadFunctionsTest {
} }
@Test @Test
public void shouldRunProperlyWhenActionPayloadTypeAndTableTypeAreTheSame() { void shouldRunProperlyWhenActionPayloadTypeAndTableTypeAreTheSame() {
// given // given
String id0 = "id0"; String id0 = "id0";
String id1 = "id1"; String id1 = "id1";
@ -138,7 +138,7 @@ public class PromoteActionPayloadFunctionsTest {
} }
@Test @Test
public void shouldRunProperlyWhenActionPayloadTypeIsSuperTypeOfTableType() { void shouldRunProperlyWhenActionPayloadTypeIsSuperTypeOfTableType() {
// given // given
String id0 = "id0"; String id0 = "id0";
String id1 = "id1"; String id1 = "id1";
@ -218,7 +218,7 @@ public class PromoteActionPayloadFunctionsTest {
class GroupTableByIdAndMerge { class GroupTableByIdAndMerge {
@Test @Test
public void shouldRunProperly() { void shouldRunProperly() {
// given // given
String id1 = "id1"; String id1 = "id1";
String id2 = "id2"; String id2 = "id2";

View File

@ -84,14 +84,6 @@
<artifactId>json</artifactId> <artifactId>json</artifactId>
</dependency> </dependency>
<!-- https://mvnrepository.com/artifact/org.apache.commons/commons-csv -->
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-csv</artifactId>
<version>1.8</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.poi/poi-ooxml --> <!-- https://mvnrepository.com/artifact/org.apache.poi/poi-ooxml -->
<dependency> <dependency>
<groupId>org.apache.poi</groupId> <groupId>org.apache.poi</groupId>

View File

@ -4,6 +4,7 @@ package eu.dnetlib.dhp.actionmanager.bipfinder;
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
import java.io.Serializable; import java.io.Serializable;
import java.util.Objects;
import java.util.Optional; import java.util.Optional;
import org.apache.commons.io.IOUtils; import org.apache.commons.io.IOUtils;
@ -28,15 +29,16 @@ import eu.dnetlib.dhp.schema.oaf.Result;
public class CollectAndSave implements Serializable { public class CollectAndSave implements Serializable {
private static final Logger log = LoggerFactory.getLogger(CollectAndSave.class); private static final Logger log = LoggerFactory.getLogger(CollectAndSave.class);
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
public static <I extends Result> void main(String[] args) throws Exception { public static void main(String[] args) throws Exception {
String jsonConfiguration = IOUtils String jsonConfiguration = IOUtils
.toString( .toString(
Objects
.requireNonNull(
CollectAndSave.class CollectAndSave.class
.getResourceAsStream( .getResourceAsStream(
"/eu/dnetlib/dhp/actionmanager/bipfinder/input_actionset_parameter.json")); "/eu/dnetlib/dhp/actionmanager/bipfinder/input_actionset_parameter.json")));
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);

View File

@ -87,7 +87,7 @@ public class SparkAtomicActionScoreJob implements Serializable {
private static <I extends Result> void prepareResults(SparkSession spark, String inputPath, String outputPath, private static <I extends Result> void prepareResults(SparkSession spark, String inputPath, String outputPath,
String bipScorePath, Class<I> inputClazz) { String bipScorePath, Class<I> inputClazz) {
final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext()); final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
JavaRDD<BipDeserialize> bipDeserializeJavaRDD = sc JavaRDD<BipDeserialize> bipDeserializeJavaRDD = sc
.textFile(bipScorePath) .textFile(bipScorePath)
@ -101,8 +101,6 @@ public class SparkAtomicActionScoreJob implements Serializable {
return bs; return bs;
}).collect(Collectors.toList()).iterator()).rdd(), Encoders.bean(BipScore.class)); }).collect(Collectors.toList()).iterator()).rdd(), Encoders.bean(BipScore.class));
System.out.println(bipScores.count());
Dataset<I> results = readPath(spark, inputPath, inputClazz); Dataset<I> results = readPath(spark, inputPath, inputClazz);
results.createOrReplaceTempView("result"); results.createOrReplaceTempView("result");
@ -124,7 +122,7 @@ public class SparkAtomicActionScoreJob implements Serializable {
ret.setId(value._2().getId()); ret.setId(value._2().getId());
return ret; return ret;
}, Encoders.bean(BipScore.class)) }, Encoders.bean(BipScore.class))
.groupByKey((MapFunction<BipScore, String>) value -> value.getId(), Encoders.STRING()) .groupByKey((MapFunction<BipScore, String>) BipScore::getId, Encoders.STRING())
.mapGroups((MapGroupsFunction<String, BipScore, Result>) (k, it) -> { .mapGroups((MapGroupsFunction<String, BipScore, Result>) (k, it) -> {
Result ret = new Result(); Result ret = new Result();
ret.setDataInfo(getDataInfo()); ret.setDataInfo(getDataInfo());

View File

@ -1,12 +1,14 @@
package eu.dnetlib.dhp.actionmanager.datacite package eu.dnetlib.dhp.actionmanager.datacite
import org.apache.commons.io.IOUtils import org.apache.commons.io.IOUtils
import org.apache.http.client.config.RequestConfig
import org.apache.http.client.methods.{HttpGet, HttpPost, HttpRequestBase, HttpUriRequest} import org.apache.http.client.methods.{HttpGet, HttpPost, HttpRequestBase, HttpUriRequest}
import org.apache.http.entity.StringEntity import org.apache.http.entity.StringEntity
import org.apache.http.impl.client.HttpClients import org.apache.http.impl.client.{HttpClientBuilder, HttpClients}
import java.io.IOException import java.io.IOException
abstract class AbstractRestClient extends Iterator[String] { abstract class AbstractRestClient extends Iterator[String] {
var buffer: List[String] = List() var buffer: List[String] = List()
@ -53,15 +55,18 @@ abstract class AbstractRestClient extends Iterator[String]{
} }
private def doHTTPRequest[A <: HttpUriRequest](r: A): String = { private def doHTTPRequest[A <: HttpUriRequest](r: A): String = {
val client = HttpClients.createDefault val timeout = 60; // seconds
var tries = 4 val config = RequestConfig.custom()
.setConnectTimeout(timeout * 1000)
.setConnectionRequestTimeout(timeout * 1000)
.setSocketTimeout(timeout * 1000).build()
val client = HttpClientBuilder.create().setDefaultRequestConfig(config).build()
try { try {
var tries = 4
while (tries > 0) { while (tries > 0) {
println(s"requesting ${r.getURI}") println(s"requesting ${r.getURI}")
try {
val response = client.execute(r) val response = client.execute(r)
println(s"get response with status${response.getStatusLine.getStatusCode}") println(s"get response with status${response.getStatusLine.getStatusCode}")
if (response.getStatusLine.getStatusCode > 400) { if (response.getStatusLine.getStatusCode > 400) {
@ -69,18 +74,19 @@ abstract class AbstractRestClient extends Iterator[String]{
} }
else else
return IOUtils.toString(response.getEntity.getContent) return IOUtils.toString(response.getEntity.getContent)
}
""
} catch { } catch {
case e: Throwable => case e: Throwable =>
throw new RuntimeException("Error on executing request ", e) println(s"Error on requesting ${r.getURI}")
} finally try client.close() e.printStackTrace()
catch { tries -= 1
case e: IOException => }
throw new RuntimeException("Unable to close client ", e) }
""
} finally {
if (client != null)
client.close()
} }
} }
getBufferData() getBufferData()
} }

View File

@ -367,7 +367,7 @@ object DataciteToOAFTransformation {
result.setDateofcollection(ISO8601FORMAT.format(d)) result.setDateofcollection(ISO8601FORMAT.format(d))
result.setDateoftransformation(ISO8601FORMAT.format(ts)) result.setDateoftransformation(ISO8601FORMAT.format(d))
result.setDataInfo(dataInfo) result.setDataInfo(dataInfo)
val creators = (json \\ "creators").extractOrElse[List[CreatorType]](List()) val creators = (json \\ "creators").extractOrElse[List[CreatorType]](List())

View File

@ -140,7 +140,7 @@ object ImportDatacite {
private def writeSequenceFile(hdfsTargetPath: Path, timestamp: Long, conf: Configuration, bs:Int): Long = { private def writeSequenceFile(hdfsTargetPath: Path, timestamp: Long, conf: Configuration, bs:Int): Long = {
var from:Long = timestamp * 1000 var from:Long = timestamp * 1000
val delta:Long = 50000000L val delta:Long = 100000000L
var client: DataciteAPIImporter = null var client: DataciteAPIImporter = null
val now :Long =System.currentTimeMillis() val now :Long =System.currentTimeMillis()
var i = 0 var i = 0

View File

@ -0,0 +1,53 @@
package eu.dnetlib.dhp.actionmanager.datacite
import eu.dnetlib.dhp.application.ArgumentApplicationParser
import eu.dnetlib.dhp.schema.oaf.{Oaf, Result}
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.LocalFileSystem
import org.apache.hadoop.hdfs.DistributedFileSystem
import org.apache.spark.SparkConf
import org.apache.spark.sql.{Encoder, Encoders, SparkSession}
import org.apache.spark.sql.functions.max
import org.slf4j.{Logger, LoggerFactory}
import java.text.SimpleDateFormat
import java.util.{Date, Locale}
import scala.io.Source
object SparkDownloadUpdateDatacite {
val log: Logger = LoggerFactory.getLogger(getClass)
def main(args: Array[String]): Unit = {
val conf = new SparkConf
val parser = new ArgumentApplicationParser(Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/actionmanager/datacite/generate_dataset_params.json")).mkString)
parser.parseArgument(args)
val master = parser.get("master")
val sourcePath = parser.get("sourcePath")
val workingPath = parser.get("workingPath")
val hdfsuri = parser.get("namenode")
log.info(s"namenode is $hdfsuri")
val spark: SparkSession = SparkSession.builder().config(conf)
.appName(getClass.getSimpleName)
.master(master)
.getOrCreate()
implicit val oafEncoder: Encoder[Oaf] = Encoders.kryo[Oaf]
implicit val resEncoder: Encoder[Result] = Encoders.kryo[Result]
import spark.implicits._
val maxDate:String = spark.read.load(workingPath).as[Oaf].filter(s => s.isInstanceOf[Result]).map(r => r.asInstanceOf[Result].getDateofcollection).select(max("value")).first().getString(0)
val ISO8601FORMAT = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssZ", Locale.US)
val string_to_date =ISO8601FORMAT.parse(maxDate)
val ts = string_to_date.getTime
}
}

View File

@ -20,7 +20,7 @@ import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.actionmanager.project.utils.CSVProgramme; import eu.dnetlib.dhp.actionmanager.project.utils.model.CSVProgramme;
import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.common.HdfsSupport; import eu.dnetlib.dhp.common.HdfsSupport;
import scala.Tuple2; import scala.Tuple2;
@ -171,26 +171,23 @@ public class PrepareProgramme {
} }
private static CSVProgramme groupProgrammeByCode(CSVProgramme a, CSVProgramme b) { private static CSVProgramme groupProgrammeByCode(CSVProgramme a, CSVProgramme b) {
if (!a.getLanguage().equals("en")) { if (!a.getLanguage().equals("en") && b.getLanguage().equalsIgnoreCase("en")) {
if (b.getLanguage().equalsIgnoreCase("en")) {
a.setTitle(b.getTitle()); a.setTitle(b.getTitle());
a.setLanguage(b.getLanguage()); a.setLanguage(b.getLanguage());
} }
} if (StringUtils.isEmpty(a.getShortTitle()) && !StringUtils.isEmpty(b.getShortTitle())) {
if (StringUtils.isEmpty(a.getShortTitle())) {
if (!StringUtils.isEmpty(b.getShortTitle())) {
a.setShortTitle(b.getShortTitle()); a.setShortTitle(b.getShortTitle());
} }
}
return a; return a;
} }
@SuppressWarnings("unchecked")
private static List<CSVProgramme> prepareClassification(JavaRDD<CSVProgramme> h2020Programmes) { private static List<CSVProgramme> prepareClassification(JavaRDD<CSVProgramme> h2020Programmes) {
Object[] codedescription = h2020Programmes Object[] codedescription = h2020Programmes
.map( .map(
value -> new Tuple2<>(value.getCode(), value -> new Tuple2<>(value.getCode(),
new Tuple2<String, String>(value.getTitle(), value.getShortTitle()))) new Tuple2<>(value.getTitle(), value.getShortTitle())))
.collect() .collect()
.toArray(); .toArray();
@ -216,7 +213,7 @@ public class PrepareProgramme {
String[] tmp = ent.split("\\."); String[] tmp = ent.split("\\.");
if (tmp.length <= 2) { if (tmp.length <= 2) {
if (StringUtils.isEmpty(entry._2()._2())) { if (StringUtils.isEmpty(entry._2()._2())) {
map.put(entry._1(), new Tuple2<String, String>(entry._2()._1(), entry._2()._1())); map.put(entry._1(), new Tuple2<>(entry._2()._1(), entry._2()._1()));
} else { } else {
map.put(entry._1(), entry._2()); map.put(entry._1(), entry._2());
} }

View File

@ -18,7 +18,7 @@ import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.actionmanager.project.utils.CSVProject; import eu.dnetlib.dhp.actionmanager.project.utils.model.CSVProject;
import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.common.HdfsSupport; import eu.dnetlib.dhp.common.HdfsSupport;
import scala.Tuple2; import scala.Tuple2;
@ -29,7 +29,7 @@ import scala.Tuple2;
*/ */
public class PrepareProjects { public class PrepareProjects {
private static final Logger log = LoggerFactory.getLogger(PrepareProgramme.class); private static final Logger log = LoggerFactory.getLogger(PrepareProjects.class);
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
public static void main(String[] args) throws Exception { public static void main(String[] args) throws Exception {

View File

@ -31,15 +31,16 @@ import eu.dnetlib.dhp.common.DbClient;
*/ */
public class ReadProjectsFromDB implements Closeable { public class ReadProjectsFromDB implements Closeable {
private final DbClient dbClient;
private static final Log log = LogFactory.getLog(ReadProjectsFromDB.class); private static final Log log = LogFactory.getLog(ReadProjectsFromDB.class);
private static final String query = "SELECT code " +
"from projects where id like 'corda__h2020%' ";
private final DbClient dbClient;
private final Configuration conf; private final Configuration conf;
private final BufferedWriter writer; private final BufferedWriter writer;
private final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); private final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
private final static String query = "SELECT code " +
"from projects where id like 'corda__h2020%' ";
public static void main(final String[] args) throws Exception { public static void main(final String[] args) throws Exception {
final ArgumentApplicationParser parser = new ArgumentApplicationParser( final ArgumentApplicationParser parser = new ArgumentApplicationParser(
IOUtils IOUtils
@ -65,9 +66,9 @@ public class ReadProjectsFromDB implements Closeable {
} }
} }
public void execute(final String sql, final Function<ResultSet, List<ProjectSubset>> producer) throws Exception { public void execute(final String sql, final Function<ResultSet, List<ProjectSubset>> producer) {
final Consumer<ResultSet> consumer = rs -> producer.apply(rs).forEach(r -> writeProject(r)); final Consumer<ResultSet> consumer = rs -> producer.apply(rs).forEach(this::writeProject);
dbClient.processResults(sql, consumer); dbClient.processResults(sql, consumer);
} }
@ -94,20 +95,20 @@ public class ReadProjectsFromDB implements Closeable {
public ReadProjectsFromDB( public ReadProjectsFromDB(
final String hdfsPath, String hdfsNameNode, final String dbUrl, final String dbUser, final String dbPassword) final String hdfsPath, String hdfsNameNode, final String dbUrl, final String dbUser, final String dbPassword)
throws Exception { throws IOException {
this.dbClient = new DbClient(dbUrl, dbUser, dbPassword); this.dbClient = new DbClient(dbUrl, dbUser, dbPassword);
this.conf = new Configuration(); this.conf = new Configuration();
this.conf.set("fs.defaultFS", hdfsNameNode); this.conf.set("fs.defaultFS", hdfsNameNode);
FileSystem fileSystem = FileSystem.get(this.conf); FileSystem fileSystem = FileSystem.get(this.conf);
Path hdfsWritePath = new Path(hdfsPath); Path hdfsWritePath = new Path(hdfsPath);
FSDataOutputStream fsDataOutputStream = null;
if (fileSystem.exists(hdfsWritePath)) { if (fileSystem.exists(hdfsWritePath)) {
fileSystem.delete(hdfsWritePath, false); fileSystem.delete(hdfsWritePath, false);
} }
fsDataOutputStream = fileSystem.create(hdfsWritePath); FSDataOutputStream fos = fileSystem.create(hdfsWritePath);
this.writer = new BufferedWriter(new OutputStreamWriter(fsDataOutputStream, StandardCharsets.UTF_8)); this.writer = new BufferedWriter(new OutputStreamWriter(fos, StandardCharsets.UTF_8));
} }
@Override @Override

View File

@ -4,7 +4,6 @@ package eu.dnetlib.dhp.actionmanager.project;
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
import java.util.Arrays; import java.util.Arrays;
import java.util.HashMap;
import java.util.Objects; import java.util.Objects;
import java.util.Optional; import java.util.Optional;
@ -22,15 +21,16 @@ import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.actionmanager.project.utils.CSVProgramme; import eu.dnetlib.dhp.actionmanager.project.utils.model.CSVProgramme;
import eu.dnetlib.dhp.actionmanager.project.utils.CSVProject; import eu.dnetlib.dhp.actionmanager.project.utils.model.CSVProject;
import eu.dnetlib.dhp.actionmanager.project.utils.EXCELTopic; import eu.dnetlib.dhp.actionmanager.project.utils.model.EXCELTopic;
import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.common.HdfsSupport; import eu.dnetlib.dhp.common.HdfsSupport;
import eu.dnetlib.dhp.schema.action.AtomicAction; import eu.dnetlib.dhp.schema.action.AtomicAction;
import eu.dnetlib.dhp.schema.common.ModelSupport; import eu.dnetlib.dhp.schema.common.ModelSupport;
import eu.dnetlib.dhp.schema.oaf.H2020Classification; import eu.dnetlib.dhp.schema.oaf.H2020Classification;
import eu.dnetlib.dhp.schema.oaf.H2020Programme; import eu.dnetlib.dhp.schema.oaf.H2020Programme;
import eu.dnetlib.dhp.schema.oaf.OafEntity;
import eu.dnetlib.dhp.schema.oaf.Project; import eu.dnetlib.dhp.schema.oaf.Project;
import eu.dnetlib.dhp.utils.DHPUtils; import eu.dnetlib.dhp.utils.DHPUtils;
import scala.Tuple2; import scala.Tuple2;
@ -47,13 +47,10 @@ import scala.Tuple2;
* *
* To produce one single entry for each project code a step of groupoing is needed: each project can be associated to more * To produce one single entry for each project code a step of groupoing is needed: each project can be associated to more
* than one programme. * than one programme.
*
*
*/ */
public class SparkAtomicActionJob { public class SparkAtomicActionJob {
private static final Logger log = LoggerFactory.getLogger(SparkAtomicActionJob.class); private static final Logger log = LoggerFactory.getLogger(SparkAtomicActionJob.class);
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
private static final HashMap<String, String> programmeMap = new HashMap<>();
public static void main(String[] args) throws Exception { public static void main(String[] args) throws Exception {
@ -137,7 +134,6 @@ public class SparkAtomicActionJob {
h2020classification.setClassification(csvProgramme.getClassification()); h2020classification.setClassification(csvProgramme.getClassification());
h2020classification.setH2020Programme(pm); h2020classification.setH2020Programme(pm);
setLevelsandProgramme(h2020classification, csvProgramme.getClassification_short()); setLevelsandProgramme(h2020classification, csvProgramme.getClassification_short());
// setProgramme(h2020classification, ocsvProgramme.get().getClassification());
pp.setH2020classification(Arrays.asList(h2020classification)); pp.setH2020classification(Arrays.asList(h2020classification));
return pp; return pp;
@ -152,20 +148,16 @@ public class SparkAtomicActionJob {
.map((MapFunction<Tuple2<Project, EXCELTopic>, Project>) p -> { .map((MapFunction<Tuple2<Project, EXCELTopic>, Project>) p -> {
Optional<EXCELTopic> op = Optional.ofNullable(p._2()); Optional<EXCELTopic> op = Optional.ofNullable(p._2());
Project rp = p._1(); Project rp = p._1();
if (op.isPresent()) { op.ifPresent(excelTopic -> rp.setH2020topicdescription(excelTopic.getTitle()));
rp.setH2020topicdescription(op.get().getTitle());
}
return rp; return rp;
}, Encoders.bean(Project.class)) }, Encoders.bean(Project.class))
.filter(Objects::nonNull) .filter(Objects::nonNull)
.groupByKey( .groupByKey(
(MapFunction<Project, String>) p -> p.getId(), (MapFunction<Project, String>) OafEntity::getId,
Encoders.STRING()) Encoders.STRING())
.mapGroups((MapGroupsFunction<String, Project, Project>) (s, it) -> { .mapGroups((MapGroupsFunction<String, Project, Project>) (s, it) -> {
Project first = it.next(); Project first = it.next();
it.forEachRemaining(p -> { it.forEachRemaining(first::mergeFrom);
first.mergeFrom(p);
});
return first; return first;
}, Encoders.bean(Project.class)) }, Encoders.bean(Project.class))
.toJavaRDD() .toJavaRDD()
@ -189,12 +181,6 @@ public class SparkAtomicActionJob {
h2020Classification.getH2020Programme().setDescription(tmp[tmp.length - 1]); h2020Classification.getH2020Programme().setDescription(tmp[tmp.length - 1]);
} }
// private static void setProgramme(H2020Classification h2020Classification, String classification) {
// String[] tmp = classification.split(" \\| ");
//
// h2020Classification.getH2020Programme().setDescription(tmp[tmp.length - 1]);
// }
public static <R> Dataset<R> readPath( public static <R> Dataset<R> readPath(
SparkSession spark, String inputPath, Class<R> clazz) { SparkSession spark, String inputPath, Class<R> clazz) {
return spark return spark

View File

@ -1,40 +0,0 @@
package eu.dnetlib.dhp.actionmanager.project.utils;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Set;
import org.apache.commons.csv.CSVFormat;
import org.apache.commons.csv.CSVRecord;
import org.apache.commons.lang.reflect.FieldUtils;
/**
* Reads a generic csv and maps it into classes that mirror its schema
*/
public class CSVParser {
public <R> List<R> parse(String csvFile, String classForName)
throws ClassNotFoundException, IOException, IllegalAccessException, InstantiationException {
final CSVFormat format = CSVFormat.EXCEL
.withHeader()
.withDelimiter(';')
.withQuote('"')
.withTrim();
List<R> ret = new ArrayList<>();
final org.apache.commons.csv.CSVParser parser = org.apache.commons.csv.CSVParser.parse(csvFile, format);
final Set<String> headers = parser.getHeaderMap().keySet();
Class<?> clazz = Class.forName(classForName);
for (CSVRecord csvRecord : parser.getRecords()) {
final Object cc = clazz.newInstance();
for (String header : headers) {
FieldUtils.writeField(cc, header, csvRecord.get(header), true);
}
ret.add((R) cc);
}
return ret;
}
}

View File

@ -1,200 +0,0 @@
package eu.dnetlib.dhp.actionmanager.project.utils;
import java.io.Serializable;
/**
* the mmodel for the projects csv file
*/
public class CSVProject implements Serializable {
private String rcn;
private String id;
private String acronym;
private String status;
private String programme;
private String topics;
private String frameworkProgramme;
private String title;
private String startDate;
private String endDate;
private String projectUrl;
private String objective;
private String totalCost;
private String ecMaxContribution;
private String call;
private String fundingScheme;
private String coordinator;
private String coordinatorCountry;
private String participants;
private String participantCountries;
private String subjects;
public String getRcn() {
return rcn;
}
public void setRcn(String rcn) {
this.rcn = rcn;
}
public String getId() {
return id;
}
public void setId(String id) {
this.id = id;
}
public String getAcronym() {
return acronym;
}
public void setAcronym(String acronym) {
this.acronym = acronym;
}
public String getStatus() {
return status;
}
public void setStatus(String status) {
this.status = status;
}
public String getProgramme() {
return programme;
}
public void setProgramme(String programme) {
this.programme = programme;
}
public String getTopics() {
return topics;
}
public void setTopics(String topics) {
this.topics = topics;
}
public String getFrameworkProgramme() {
return frameworkProgramme;
}
public void setFrameworkProgramme(String frameworkProgramme) {
this.frameworkProgramme = frameworkProgramme;
}
public String getTitle() {
return title;
}
public void setTitle(String title) {
this.title = title;
}
public String getStartDate() {
return startDate;
}
public void setStartDate(String startDate) {
this.startDate = startDate;
}
public String getEndDate() {
return endDate;
}
public void setEndDate(String endDate) {
this.endDate = endDate;
}
public String getProjectUrl() {
return projectUrl;
}
public void setProjectUrl(String projectUrl) {
this.projectUrl = projectUrl;
}
public String getObjective() {
return objective;
}
public void setObjective(String objective) {
this.objective = objective;
}
public String getTotalCost() {
return totalCost;
}
public void setTotalCost(String totalCost) {
this.totalCost = totalCost;
}
public String getEcMaxContribution() {
return ecMaxContribution;
}
public void setEcMaxContribution(String ecMaxContribution) {
this.ecMaxContribution = ecMaxContribution;
}
public String getCall() {
return call;
}
public void setCall(String call) {
this.call = call;
}
public String getFundingScheme() {
return fundingScheme;
}
public void setFundingScheme(String fundingScheme) {
this.fundingScheme = fundingScheme;
}
public String getCoordinator() {
return coordinator;
}
public void setCoordinator(String coordinator) {
this.coordinator = coordinator;
}
public String getCoordinatorCountry() {
return coordinatorCountry;
}
public void setCoordinatorCountry(String coordinatorCountry) {
this.coordinatorCountry = coordinatorCountry;
}
public String getParticipants() {
return participants;
}
public void setParticipants(String participants) {
this.participants = participants;
}
public String getParticipantCountries() {
return participantCountries;
}
public void setParticipantCountries(String participantCountries) {
this.participantCountries = participantCountries;
}
public String getSubjects() {
return subjects;
}
public void setSubjects(String subjects) {
this.subjects = subjects;
}
}

View File

@ -17,6 +17,8 @@ import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.xssf.usermodel.XSSFSheet; import org.apache.poi.xssf.usermodel.XSSFSheet;
import org.apache.poi.xssf.usermodel.XSSFWorkbook; import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import eu.dnetlib.dhp.actionmanager.project.utils.model.EXCELTopic;
/** /**
* Reads a generic excel file and maps it into classes that mirror its schema * Reads a generic excel file and maps it into classes that mirror its schema
*/ */
@ -26,13 +28,12 @@ public class EXCELParser {
throws ClassNotFoundException, IOException, IllegalAccessException, InstantiationException, throws ClassNotFoundException, IOException, IllegalAccessException, InstantiationException,
InvalidFormatException { InvalidFormatException {
OPCPackage pkg = OPCPackage.open(file); try (OPCPackage pkg = OPCPackage.open(file); XSSFWorkbook wb = new XSSFWorkbook(pkg)) {
XSSFWorkbook wb = new XSSFWorkbook(pkg);
XSSFSheet sheet = wb.getSheet(sheetName); XSSFSheet sheet = wb.getSheet(sheetName);
if (sheetName == null) { if (sheet == null) {
throw new RuntimeException("Sheet name " + sheetName + " not present in current file"); throw new IllegalArgumentException("Sheet name " + sheetName + " not present in current file");
} }
List<R> ret = new ArrayList<>(); List<R> ret = new ArrayList<>();
@ -73,5 +74,6 @@ public class EXCELParser {
return ret; return ret;
} }
}
} }

View File

@ -1,34 +1,21 @@
package eu.dnetlib.dhp.actionmanager.project.utils; package eu.dnetlib.dhp.actionmanager.project.utils;
import java.io.BufferedWriter; import java.io.*;
import java.io.Closeable; import java.util.Optional;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.nio.charset.StandardCharsets;
import org.apache.commons.io.IOUtils; import org.apache.commons.io.IOUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.collection.HttpConnector2; import eu.dnetlib.dhp.common.collection.GetCSV;
import eu.dnetlib.dhp.common.collection.HttpConnector2;
/** /**
* Applies the parsing of a csv file and writes the Serialization of it in hdfs * Applies the parsing of a csv file and writes the Serialization of it in hdfs
*/ */
public class ReadCSV implements Closeable { public class ReadCSV {
private static final Log log = LogFactory.getLog(ReadCSV.class);
private final Configuration conf;
private final BufferedWriter writer;
private final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
private final String csvFile;
public static void main(final String[] args) throws Exception { public static void main(final String[] args) throws Exception {
final ArgumentApplicationParser parser = new ArgumentApplicationParser( final ArgumentApplicationParser parser = new ArgumentApplicationParser(
@ -44,56 +31,22 @@ public class ReadCSV implements Closeable {
final String hdfsPath = parser.get("hdfsPath"); final String hdfsPath = parser.get("hdfsPath");
final String hdfsNameNode = parser.get("hdfsNameNode"); final String hdfsNameNode = parser.get("hdfsNameNode");
final String classForName = parser.get("classForName"); final String classForName = parser.get("classForName");
Optional<String> delimiter = Optional.ofNullable(parser.get("delimiter"));
char del = ';';
if (delimiter.isPresent())
del = delimiter.get().charAt(0);
try (final ReadCSV readCSV = new ReadCSV(hdfsPath, hdfsNameNode, fileURL)) { Configuration conf = new Configuration();
conf.set("fs.defaultFS", hdfsNameNode);
log.info("Getting CSV file..."); FileSystem fileSystem = FileSystem.get(conf);
readCSV.execute(classForName); BufferedReader reader = new BufferedReader(
new InputStreamReader(new HttpConnector2().getInputSourceAsStream(fileURL)));
} GetCSV.getCsv(fileSystem, reader, hdfsPath, classForName, del);
}
public void execute(final String classForName) throws Exception { reader.close();
CSVParser csvParser = new CSVParser();
csvParser
.parse(csvFile, classForName)
.stream()
.forEach(p -> write(p));
} }
@Override
public void close() throws IOException {
writer.close();
}
public ReadCSV(
final String hdfsPath,
final String hdfsNameNode,
final String fileURL)
throws Exception {
this.conf = new Configuration();
this.conf.set("fs.defaultFS", hdfsNameNode);
HttpConnector2 httpConnector = new HttpConnector2();
FileSystem fileSystem = FileSystem.get(this.conf);
Path hdfsWritePath = new Path(hdfsPath);
FSDataOutputStream fsDataOutputStream = null;
if (fileSystem.exists(hdfsWritePath)) {
fileSystem.delete(hdfsWritePath, false);
}
fsDataOutputStream = fileSystem.create(hdfsWritePath);
this.writer = new BufferedWriter(new OutputStreamWriter(fsDataOutputStream, StandardCharsets.UTF_8));
this.csvFile = httpConnector.getInputSource(fileURL);
}
protected void write(final Object p) {
try {
writer.write(OBJECT_MAPPER.writeValueAsString(p));
writer.newLine();
} catch (final Exception e) {
throw new RuntimeException(e);
}
}
} }

View File

@ -11,18 +11,20 @@ import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.Path;
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.collection.HttpConnector2; import eu.dnetlib.dhp.common.collection.CollectorException;
import eu.dnetlib.dhp.common.collection.HttpConnector2;
/** /**
* Applies the parsing of an excel file and writes the Serialization of it in hdfs * Applies the parsing of an excel file and writes the Serialization of it in hdfs
*/ */
public class ReadExcel implements Closeable { public class ReadExcel implements Closeable {
private static final Log log = LogFactory.getLog(ReadCSV.class); private static final Log log = LogFactory.getLog(ReadExcel.class);
private final Configuration conf;
private final BufferedWriter writer; private final BufferedWriter writer;
private final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); private final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
private final InputStream excelFile; private final InputStream excelFile;
@ -31,7 +33,7 @@ public class ReadExcel implements Closeable {
final ArgumentApplicationParser parser = new ArgumentApplicationParser( final ArgumentApplicationParser parser = new ArgumentApplicationParser(
IOUtils IOUtils
.toString( .toString(
ReadCSV.class ReadExcel.class
.getResourceAsStream( .getResourceAsStream(
"/eu/dnetlib/dhp/actionmanager/project/parameters.json"))); "/eu/dnetlib/dhp/actionmanager/project/parameters.json")));
@ -51,13 +53,15 @@ public class ReadExcel implements Closeable {
} }
} }
public void execute(final String classForName, final String sheetName) throws Exception { public void execute(final String classForName, final String sheetName)
throws IOException, ClassNotFoundException, InvalidFormatException, IllegalAccessException,
InstantiationException {
EXCELParser excelParser = new EXCELParser(); EXCELParser excelParser = new EXCELParser();
excelParser excelParser
.parse(excelFile, classForName, sheetName) .parse(excelFile, classForName, sheetName)
.stream() .stream()
.forEach(p -> write(p)); .forEach(this::write);
} }
@Override @Override
@ -68,20 +72,20 @@ public class ReadExcel implements Closeable {
public ReadExcel( public ReadExcel(
final String hdfsPath, final String hdfsPath,
final String hdfsNameNode, final String hdfsNameNode,
final String fileURL) final String fileURL) throws CollectorException, IOException {
throws Exception {
this.conf = new Configuration(); final Configuration conf = new Configuration();
this.conf.set("fs.defaultFS", hdfsNameNode); conf.set("fs.defaultFS", hdfsNameNode);
HttpConnector2 httpConnector = new HttpConnector2(); HttpConnector2 httpConnector = new HttpConnector2();
FileSystem fileSystem = FileSystem.get(this.conf); FileSystem fileSystem = FileSystem.get(conf);
Path hdfsWritePath = new Path(hdfsPath); Path hdfsWritePath = new Path(hdfsPath);
FSDataOutputStream fsDataOutputStream = null;
if (fileSystem.exists(hdfsWritePath)) { if (fileSystem.exists(hdfsWritePath)) {
fileSystem.delete(hdfsWritePath, false); fileSystem.delete(hdfsWritePath, false);
} }
fsDataOutputStream = fileSystem.create(hdfsWritePath); FSDataOutputStream fos = fileSystem.create(hdfsWritePath);
this.writer = new BufferedWriter(new OutputStreamWriter(fsDataOutputStream, StandardCharsets.UTF_8)); this.writer = new BufferedWriter(new OutputStreamWriter(fos, StandardCharsets.UTF_8));
this.excelFile = httpConnector.getInputSourceAsStream(fileURL); this.excelFile = httpConnector.getInputSourceAsStream(fileURL);
} }

View File

@ -1,20 +1,32 @@
package eu.dnetlib.dhp.actionmanager.project.utils; package eu.dnetlib.dhp.actionmanager.project.utils.model;
import java.io.Serializable; import java.io.Serializable;
import com.opencsv.bean.CsvBindByName;
import com.opencsv.bean.CsvIgnore;
/** /**
* The model for the programme csv file * The model for the programme csv file
*/ */
public class CSVProgramme implements Serializable { public class CSVProgramme implements Serializable {
private String rcn; @CsvBindByName(column = "code")
private String code; private String code;
@CsvBindByName(column = "title")
private String title; private String title;
@CsvBindByName(column = "shortTitle")
private String shortTitle; private String shortTitle;
@CsvBindByName(column = "language")
private String language; private String language;
@CsvIgnore
private String classification; private String classification;
@CsvIgnore
private String classification_short; private String classification_short;
public String getClassification_short() { public String getClassification_short() {
@ -33,14 +45,6 @@ public class CSVProgramme implements Serializable {
this.classification = classification; this.classification = classification;
} }
public String getRcn() {
return rcn;
}
public void setRcn(String rcn) {
this.rcn = rcn;
}
public String getCode() { public String getCode() {
return code; return code;
} }
@ -73,5 +77,4 @@ public class CSVProgramme implements Serializable {
this.language = language; this.language = language;
} }
//
} }

View File

@ -0,0 +1,46 @@
package eu.dnetlib.dhp.actionmanager.project.utils.model;
import java.io.Serializable;
import com.opencsv.bean.CsvBindByName;
/**
* the mmodel for the projects csv file
*/
public class CSVProject implements Serializable {
@CsvBindByName(column = "id")
private String id;
@CsvBindByName(column = "programme")
private String programme;
@CsvBindByName(column = "topics")
private String topics;
public String getId() {
return id;
}
public void setId(String id) {
this.id = id;
}
public String getProgramme() {
return programme;
}
public void setProgramme(String programme) {
this.programme = programme;
}
public String getTopics() {
return topics;
}
public void setTopics(String topics) {
this.topics = topics;
}
}

View File

@ -1,5 +1,5 @@
package eu.dnetlib.dhp.actionmanager.project.utils; package eu.dnetlib.dhp.actionmanager.project.utils.model;
import java.io.Serializable; import java.io.Serializable;

View File

@ -9,6 +9,7 @@ import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.listKeyValues;
import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.qualifier; import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.qualifier;
import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.structuredProperty; import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.structuredProperty;
import java.io.IOException;
import java.io.InputStream; import java.io.InputStream;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
@ -74,7 +75,7 @@ public class GenerateRorActionSetJob {
final String jsonConfiguration = IOUtils final String jsonConfiguration = IOUtils
.toString( .toString(
SparkAtomicActionJob.class GenerateRorActionSetJob.class
.getResourceAsStream("/eu/dnetlib/dhp/actionmanager/ror/action_set_parameters.json")); .getResourceAsStream("/eu/dnetlib/dhp/actionmanager/ror/action_set_parameters.json"));
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
@ -108,7 +109,7 @@ public class GenerateRorActionSetJob {
private static void processRorOrganizations(final SparkSession spark, private static void processRorOrganizations(final SparkSession spark,
final String inputPath, final String inputPath,
final String outputPath) throws Exception { final String outputPath) throws IOException {
readInputPath(spark, inputPath) readInputPath(spark, inputPath)
.map( .map(
@ -203,7 +204,7 @@ public class GenerateRorActionSetJob {
private static Dataset<RorOrganization> readInputPath( private static Dataset<RorOrganization> readInputPath(
final SparkSession spark, final SparkSession spark,
final String path) throws Exception { final String path) throws IOException {
try (final FileSystem fileSystem = FileSystem.get(new Configuration()); try (final FileSystem fileSystem = FileSystem.get(new Configuration());
final InputStream is = fileSystem.open(new Path(path))) { final InputStream is = fileSystem.open(new Path(path))) {

View File

@ -7,6 +7,8 @@ import com.fasterxml.jackson.annotation.JsonProperty;
public class Address implements Serializable { public class Address implements Serializable {
private static final long serialVersionUID = 2444635485253443195L;
@JsonProperty("lat") @JsonProperty("lat")
private Float lat; private Float lat;
@ -37,8 +39,6 @@ public class Address implements Serializable {
@JsonProperty("line") @JsonProperty("line")
private String line; private String line;
private final static long serialVersionUID = 2444635485253443195L;
public Float getLat() { public Float getLat() {
return lat; return lat;
} }

View File

@ -7,14 +7,14 @@ import com.fasterxml.jackson.annotation.JsonProperty;
public class Country implements Serializable { public class Country implements Serializable {
private static final long serialVersionUID = 4357848706229493627L;
@JsonProperty("country_code") @JsonProperty("country_code")
private String countryCode; private String countryCode;
@JsonProperty("country_name") @JsonProperty("country_name")
private String countryName; private String countryName;
private final static long serialVersionUID = 4357848706229493627L;
public String getCountryCode() { public String getCountryCode() {
return countryCode; return countryCode;
} }

View File

@ -13,7 +13,7 @@ public class ExternalIdType implements Serializable {
private String preferred; private String preferred;
private final static long serialVersionUID = 2616688352998387611L; private static final long serialVersionUID = 2616688352998387611L;
public ExternalIdType() { public ExternalIdType() {
} }

View File

@ -15,8 +15,7 @@ import com.fasterxml.jackson.databind.JsonNode;
public class ExternalIdTypeDeserializer extends JsonDeserializer<ExternalIdType> { public class ExternalIdTypeDeserializer extends JsonDeserializer<ExternalIdType> {
@Override @Override
public ExternalIdType deserialize(final JsonParser p, final DeserializationContext ctxt) public ExternalIdType deserialize(final JsonParser p, final DeserializationContext ctxt) throws IOException {
throws IOException, JsonProcessingException {
final ObjectCodec oc = p.getCodec(); final ObjectCodec oc = p.getCodec();
final JsonNode node = oc.readTree(p); final JsonNode node = oc.readTree(p);

View File

@ -19,7 +19,7 @@ public class GeonamesAdmin implements Serializable {
@JsonProperty("code") @JsonProperty("code")
private String code; private String code;
private final static long serialVersionUID = 7294958526269195673L; private static final long serialVersionUID = 7294958526269195673L;
public String getAsciiName() { public String getAsciiName() {
return asciiName; return asciiName;

View File

@ -31,7 +31,7 @@ public class GeonamesCity implements Serializable {
@JsonProperty("license") @JsonProperty("license")
private License license; private License license;
private final static long serialVersionUID = -8389480201526252955L; private static final long serialVersionUID = -8389480201526252955L;
public NameAndCode getNutsLevel2() { public NameAndCode getNutsLevel2() {
return nutsLevel2; return nutsLevel2;

View File

@ -13,7 +13,7 @@ public class Label implements Serializable {
@JsonProperty("label") @JsonProperty("label")
private String label; private String label;
private final static long serialVersionUID = -6576156103297850809L; private static final long serialVersionUID = -6576156103297850809L;
public String getIso639() { public String getIso639() {
return iso639; return iso639;

View File

@ -13,7 +13,7 @@ public class License implements Serializable {
@JsonProperty("license") @JsonProperty("license")
private String license; private String license;
private final static long serialVersionUID = -194308261058176439L; private static final long serialVersionUID = -194308261058176439L;
public String getAttribution() { public String getAttribution() {
return attribution; return attribution;

View File

@ -7,14 +7,14 @@ import com.fasterxml.jackson.annotation.JsonProperty;
public class NameAndCode implements Serializable { public class NameAndCode implements Serializable {
private static final long serialVersionUID = 5459836979206140843L;
@JsonProperty("name") @JsonProperty("name")
private String name; private String name;
@JsonProperty("code") @JsonProperty("code")
private String code; private String code;
private final static long serialVersionUID = 5459836979206140843L;
public String getName() { public String getName() {
return name; return name;
} }

View File

@ -7,6 +7,8 @@ import com.fasterxml.jackson.annotation.JsonProperty;
public class Relationship implements Serializable { public class Relationship implements Serializable {
private static final long serialVersionUID = 7847399503395576960L;
@JsonProperty("type") @JsonProperty("type")
private String type; private String type;
@ -16,8 +18,6 @@ public class Relationship implements Serializable {
@JsonProperty("label") @JsonProperty("label")
private String label; private String label;
private final static long serialVersionUID = 7847399503395576960L;
public String getType() { public String getType() {
return type; return type;
} }

View File

@ -11,6 +11,8 @@ import com.fasterxml.jackson.annotation.JsonProperty;
public class RorOrganization implements Serializable { public class RorOrganization implements Serializable {
private static final long serialVersionUID = -2658312087616043225L;
@JsonProperty("ip_addresses") @JsonProperty("ip_addresses")
private List<String> ipAddresses = new ArrayList<>(); private List<String> ipAddresses = new ArrayList<>();
@ -59,8 +61,6 @@ public class RorOrganization implements Serializable {
@JsonProperty("status") @JsonProperty("status")
private String status; private String status;
private final static long serialVersionUID = -2658312087616043225L;
public List<String> getIpAddresses() { public List<String> getIpAddresses() {
return ipAddresses; return ipAddresses;
} }

View File

@ -0,0 +1,69 @@
package eu.dnetlib.dhp.actionmanager.scholix
import eu.dnetlib.dhp.application.ArgumentApplicationParser
import eu.dnetlib.dhp.schema.oaf.{Oaf, Relation, Result}
import org.apache.spark.SparkConf
import org.apache.spark.sql._
import org.slf4j.{Logger, LoggerFactory}
import scala.io.Source
object SparkCreateActionset {
def main(args: Array[String]): Unit = {
val log: Logger = LoggerFactory.getLogger(getClass)
val conf: SparkConf = new SparkConf()
val parser = new ArgumentApplicationParser(Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/actionset/generate_actionset.json")).mkString)
parser.parseArgument(args)
val spark: SparkSession =
SparkSession
.builder()
.config(conf)
.appName(getClass.getSimpleName)
.master(parser.get("master")).getOrCreate()
val sourcePath = parser.get("sourcePath")
log.info(s"sourcePath -> $sourcePath")
val targetPath = parser.get("targetPath")
log.info(s"targetPath -> $targetPath")
val workingDirFolder = parser.get("workingDirFolder")
log.info(s"workingDirFolder -> $workingDirFolder")
implicit val oafEncoders: Encoder[Oaf] = Encoders.kryo[Oaf]
implicit val resultEncoders: Encoder[Result] = Encoders.kryo[Result]
implicit val relationEncoders: Encoder[Relation] = Encoders.kryo[Relation]
import spark.implicits._
val relation = spark.read.load(s"$sourcePath/relation").as[Relation]
relation.filter(r => (r.getDataInfo == null || r.getDataInfo.getDeletedbyinference == false) && !r.getRelClass.toLowerCase.contains("merge"))
.flatMap(r => List(r.getSource, r.getTarget)).distinct().write.mode(SaveMode.Overwrite).save(s"$workingDirFolder/id_relation")
val idRelation = spark.read.load(s"$workingDirFolder/id_relation").as[String]
log.info("extract source and target Identifier involved in relations")
log.info("save relation filtered")
relation.filter(r => (r.getDataInfo == null || r.getDataInfo.getDeletedbyinference == false) && !r.getRelClass.toLowerCase.contains("merge"))
.write.mode(SaveMode.Overwrite).save(s"$workingDirFolder/actionSetOaf")
log.info("saving entities")
val entities: Dataset[(String, Result)] = spark.read.load(s"$sourcePath/entities/*").as[Result].map(p => (p.getId, p))(Encoders.tuple(Encoders.STRING, resultEncoders))
entities
.joinWith(idRelation, entities("_1").equalTo(idRelation("value")))
.map(p => p._1._2)
.write.mode(SaveMode.Append).save(s"$workingDirFolder/actionSetOaf")
}
}

View File

@ -0,0 +1,86 @@
package eu.dnetlib.dhp.actionmanager.scholix
import com.fasterxml.jackson.databind.ObjectMapper
import eu.dnetlib.dhp.application.ArgumentApplicationParser
import eu.dnetlib.dhp.schema.action.AtomicAction
import eu.dnetlib.dhp.schema.oaf.{Oaf, Dataset => OafDataset,Publication, Software, OtherResearchProduct, Relation}
import org.apache.hadoop.io.Text
import org.apache.hadoop.io.compress.GzipCodec
import org.apache.hadoop.mapred.SequenceFileOutputFormat
import org.apache.spark.SparkConf
import org.apache.spark.sql.{Encoder, Encoders, SparkSession}
import org.slf4j.{Logger, LoggerFactory}
import scala.io.Source
object SparkSaveActionSet {
def toActionSet(item: Oaf): (String, String) = {
val mapper = new ObjectMapper()
item match {
case dataset: OafDataset =>
val a: AtomicAction[OafDataset] = new AtomicAction[OafDataset]
a.setClazz(classOf[OafDataset])
a.setPayload(dataset)
(dataset.getClass.getCanonicalName, mapper.writeValueAsString(a))
case publication: Publication =>
val a: AtomicAction[Publication] = new AtomicAction[Publication]
a.setClazz(classOf[Publication])
a.setPayload(publication)
(publication.getClass.getCanonicalName, mapper.writeValueAsString(a))
case software: Software =>
val a: AtomicAction[Software] = new AtomicAction[Software]
a.setClazz(classOf[Software])
a.setPayload(software)
(software.getClass.getCanonicalName, mapper.writeValueAsString(a))
case orp: OtherResearchProduct =>
val a: AtomicAction[OtherResearchProduct] = new AtomicAction[OtherResearchProduct]
a.setClazz(classOf[OtherResearchProduct])
a.setPayload(orp)
(orp.getClass.getCanonicalName, mapper.writeValueAsString(a))
case relation: Relation =>
val a: AtomicAction[Relation] = new AtomicAction[Relation]
a.setClazz(classOf[Relation])
a.setPayload(relation)
(relation.getClass.getCanonicalName, mapper.writeValueAsString(a))
case _ =>
null
}
}
def main(args: Array[String]): Unit = {
val log: Logger = LoggerFactory.getLogger(getClass)
val conf: SparkConf = new SparkConf()
val parser = new ArgumentApplicationParser(Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/actionset/save_actionset.json")).mkString)
parser.parseArgument(args)
val spark: SparkSession =
SparkSession
.builder()
.config(conf)
.appName(getClass.getSimpleName)
.master(parser.get("master")).getOrCreate()
val sourcePath = parser.get("sourcePath")
log.info(s"sourcePath -> $sourcePath")
val targetPath = parser.get("targetPath")
log.info(s"targetPath -> $targetPath")
implicit val oafEncoders: Encoder[Oaf] = Encoders.kryo[Oaf]
implicit val tEncoder: Encoder[(String, String)] = Encoders.tuple(Encoders.STRING, Encoders.STRING)
spark.read.load(sourcePath).as[Oaf]
.map(o => toActionSet(o))
.filter(o => o != null)
.rdd.map(s => (new Text(s._1), new Text(s._2))).saveAsHadoopFile(s"$targetPath", classOf[Text], classOf[Text], classOf[SequenceFileOutputFormat[Text, Text]], classOf[GzipCodec])
}
}

View File

@ -6,6 +6,8 @@ import java.util.concurrent.Executors;
import java.util.concurrent.ScheduledExecutorService; import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeUnit;
import eu.dnetlib.dhp.common.aggregation.AggregatorReport;
public abstract class ReportingJob { public abstract class ReportingJob {
/** /**
@ -22,7 +24,7 @@ public abstract class ReportingJob {
protected final AggregatorReport report; protected final AggregatorReport report;
public ReportingJob(AggregatorReport report) { protected ReportingJob(AggregatorReport report) {
this.report = report; this.report = report;
} }

View File

@ -25,7 +25,7 @@ public class MDStoreActionNode {
NEW_VERSION, ROLLBACK, COMMIT, READ_LOCK, READ_UNLOCK NEW_VERSION, ROLLBACK, COMMIT, READ_LOCK, READ_UNLOCK
} }
public static String NEW_VERSION_URI = "%s/mdstore/%s/newVersion"; public static final String NEW_VERSION_URI = "%s/mdstore/%s/newVersion";
public static final String COMMIT_VERSION_URL = "%s/version/%s/commit/%s"; public static final String COMMIT_VERSION_URL = "%s/version/%s/commit/%s";
public static final String ROLLBACK_VERSION_URL = "%s/version/%s/abort"; public static final String ROLLBACK_VERSION_URL = "%s/version/%s/abort";
@ -70,7 +70,7 @@ public class MDStoreActionNode {
if (StringUtils.isBlank(hdfsuri)) { if (StringUtils.isBlank(hdfsuri)) {
throw new IllegalArgumentException("missing or empty argument namenode"); throw new IllegalArgumentException("missing or empty argument namenode");
} }
final String mdStoreVersion_params = argumentParser.get("mdStoreVersion"); final String mdStoreVersion_params = argumentParser.get(MDSTOREVERSIONPARAM);
final MDStoreVersion mdStoreVersion = MAPPER.readValue(mdStoreVersion_params, MDStoreVersion.class); final MDStoreVersion mdStoreVersion = MAPPER.readValue(mdStoreVersion_params, MDStoreVersion.class);
if (StringUtils.isBlank(mdStoreVersion.getId())) { if (StringUtils.isBlank(mdStoreVersion.getId())) {
@ -94,7 +94,7 @@ public class MDStoreActionNode {
break; break;
} }
case ROLLBACK: { case ROLLBACK: {
final String mdStoreVersion_params = argumentParser.get("mdStoreVersion"); final String mdStoreVersion_params = argumentParser.get(MDSTOREVERSIONPARAM);
final MDStoreVersion mdStoreVersion = MAPPER.readValue(mdStoreVersion_params, MDStoreVersion.class); final MDStoreVersion mdStoreVersion = MAPPER.readValue(mdStoreVersion_params, MDStoreVersion.class);
if (StringUtils.isBlank(mdStoreVersion.getId())) { if (StringUtils.isBlank(mdStoreVersion.getId())) {

View File

@ -16,7 +16,6 @@ import org.apache.hadoop.io.compress.DeflateCodec;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import eu.dnetlib.dhp.aggregation.common.AggregatorReport;
import eu.dnetlib.dhp.aggregation.common.ReporterCallback; import eu.dnetlib.dhp.aggregation.common.ReporterCallback;
import eu.dnetlib.dhp.aggregation.common.ReportingJob; import eu.dnetlib.dhp.aggregation.common.ReportingJob;
import eu.dnetlib.dhp.collection.plugin.CollectorPlugin; import eu.dnetlib.dhp.collection.plugin.CollectorPlugin;
@ -24,6 +23,9 @@ import eu.dnetlib.dhp.collection.plugin.mongodb.MDStoreCollectorPlugin;
import eu.dnetlib.dhp.collection.plugin.mongodb.MongoDbDumpCollectorPlugin; import eu.dnetlib.dhp.collection.plugin.mongodb.MongoDbDumpCollectorPlugin;
import eu.dnetlib.dhp.collection.plugin.oai.OaiCollectorPlugin; import eu.dnetlib.dhp.collection.plugin.oai.OaiCollectorPlugin;
import eu.dnetlib.dhp.collection.plugin.rest.RestCollectorPlugin; import eu.dnetlib.dhp.collection.plugin.rest.RestCollectorPlugin;
import eu.dnetlib.dhp.common.aggregation.AggregatorReport;
import eu.dnetlib.dhp.common.collection.CollectorException;
import eu.dnetlib.dhp.common.collection.HttpClientParams;
import eu.dnetlib.dhp.schema.mdstore.MDStoreVersion; import eu.dnetlib.dhp.schema.mdstore.MDStoreVersion;
public class CollectorWorker extends ReportingJob { public class CollectorWorker extends ReportingJob {
@ -116,7 +118,7 @@ public class CollectorWorker extends ReportingJob {
final CollectorPlugin.NAME.OTHER_NAME plugin = Optional final CollectorPlugin.NAME.OTHER_NAME plugin = Optional
.ofNullable(api.getParams().get("other_plugin_type")) .ofNullable(api.getParams().get("other_plugin_type"))
.map(CollectorPlugin.NAME.OTHER_NAME::valueOf) .map(CollectorPlugin.NAME.OTHER_NAME::valueOf)
.get(); .orElseThrow(() -> new IllegalArgumentException("invalid other_plugin_type"));
switch (plugin) { switch (plugin) {
case mdstore_mongodb_dump: case mdstore_mongodb_dump:

View File

@ -13,8 +13,10 @@ import org.apache.hadoop.fs.FileSystem;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import eu.dnetlib.dhp.aggregation.common.AggregatorReport;
import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.common.aggregation.AggregatorReport;
import eu.dnetlib.dhp.common.collection.CollectorException;
import eu.dnetlib.dhp.common.collection.HttpClientParams;
import eu.dnetlib.dhp.message.MessageSender; import eu.dnetlib.dhp.message.MessageSender;
import eu.dnetlib.dhp.schema.mdstore.MDStoreVersion; import eu.dnetlib.dhp.schema.mdstore.MDStoreVersion;

View File

@ -207,6 +207,7 @@ public class GenerateNativeStoreSparkJob {
totalItems.add(1); totalItems.add(1);
try { try {
SAXReader reader = new SAXReader(); SAXReader reader = new SAXReader();
reader.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true);
Document document = reader.read(new ByteArrayInputStream(input.getBytes(StandardCharsets.UTF_8))); Document document = reader.read(new ByteArrayInputStream(input.getBytes(StandardCharsets.UTF_8)));
Node node = document.selectSingleNode(xpath); Node node = document.selectSingleNode(xpath);
final String originalIdentifier = node.getText(); final String originalIdentifier = node.getText();

View File

@ -3,9 +3,9 @@ package eu.dnetlib.dhp.collection.plugin;
import java.util.stream.Stream; import java.util.stream.Stream;
import eu.dnetlib.dhp.aggregation.common.AggregatorReport;
import eu.dnetlib.dhp.collection.ApiDescriptor; import eu.dnetlib.dhp.collection.ApiDescriptor;
import eu.dnetlib.dhp.collection.CollectorException; import eu.dnetlib.dhp.common.aggregation.AggregatorReport;
import eu.dnetlib.dhp.common.collection.CollectorException;
public interface CollectorPlugin { public interface CollectorPlugin {

View File

@ -11,15 +11,13 @@ import org.bson.Document;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import com.mongodb.MongoClient;
import com.mongodb.MongoClientURI;
import com.mongodb.client.MongoCollection; import com.mongodb.client.MongoCollection;
import eu.dnetlib.dhp.aggregation.common.AggregatorReport;
import eu.dnetlib.dhp.collection.ApiDescriptor; import eu.dnetlib.dhp.collection.ApiDescriptor;
import eu.dnetlib.dhp.collection.CollectorException;
import eu.dnetlib.dhp.collection.plugin.CollectorPlugin; import eu.dnetlib.dhp.collection.plugin.CollectorPlugin;
import eu.dnetlib.dhp.common.MdstoreClient; import eu.dnetlib.dhp.common.MdstoreClient;
import eu.dnetlib.dhp.common.aggregation.AggregatorReport;
import eu.dnetlib.dhp.common.collection.CollectorException;
public class MDStoreCollectorPlugin implements CollectorPlugin { public class MDStoreCollectorPlugin implements CollectorPlugin {

View File

@ -12,10 +12,10 @@ import java.util.zip.GZIPInputStream;
import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.Path;
import eu.dnetlib.dhp.aggregation.common.AggregatorReport;
import eu.dnetlib.dhp.collection.ApiDescriptor; import eu.dnetlib.dhp.collection.ApiDescriptor;
import eu.dnetlib.dhp.collection.CollectorException;
import eu.dnetlib.dhp.collection.plugin.CollectorPlugin; import eu.dnetlib.dhp.collection.plugin.CollectorPlugin;
import eu.dnetlib.dhp.common.aggregation.AggregatorReport;
import eu.dnetlib.dhp.common.collection.CollectorException;
import eu.dnetlib.dhp.utils.DHPUtils; import eu.dnetlib.dhp.utils.DHPUtils;
public class MongoDbDumpCollectorPlugin implements CollectorPlugin { public class MongoDbDumpCollectorPlugin implements CollectorPlugin {
@ -23,7 +23,7 @@ public class MongoDbDumpCollectorPlugin implements CollectorPlugin {
public static final String PATH_PARAM = "path"; public static final String PATH_PARAM = "path";
public static final String BODY_JSONPATH = "$.body"; public static final String BODY_JSONPATH = "$.body";
public FileSystem fileSystem; private final FileSystem fileSystem;
public MongoDbDumpCollectorPlugin(FileSystem fileSystem) { public MongoDbDumpCollectorPlugin(FileSystem fileSystem) {
this.fileSystem = fileSystem; this.fileSystem = fileSystem;

View File

@ -13,11 +13,11 @@ import com.google.common.base.Splitter;
import com.google.common.collect.Iterators; import com.google.common.collect.Iterators;
import com.google.common.collect.Lists; import com.google.common.collect.Lists;
import eu.dnetlib.dhp.aggregation.common.AggregatorReport;
import eu.dnetlib.dhp.collection.ApiDescriptor; import eu.dnetlib.dhp.collection.ApiDescriptor;
import eu.dnetlib.dhp.collection.CollectorException;
import eu.dnetlib.dhp.collection.HttpClientParams;
import eu.dnetlib.dhp.collection.plugin.CollectorPlugin; import eu.dnetlib.dhp.collection.plugin.CollectorPlugin;
import eu.dnetlib.dhp.common.aggregation.AggregatorReport;
import eu.dnetlib.dhp.common.collection.CollectorException;
import eu.dnetlib.dhp.common.collection.HttpClientParams;
public class OaiCollectorPlugin implements CollectorPlugin { public class OaiCollectorPlugin implements CollectorPlugin {
@ -66,11 +66,11 @@ public class OaiCollectorPlugin implements CollectorPlugin {
} }
if (fromDate != null && !fromDate.matches(DATE_REGEX) && !fromDate.matches(UTC_DATETIME_REGEX)) { if (fromDate != null && !fromDate.matches(DATE_REGEX) && !fromDate.matches(UTC_DATETIME_REGEX)) {
throw new CollectorException("Invalid date (YYYY-MM-DD): " + fromDate); throw new CollectorException("Invalid date (YYYY-MM-DD or YYYY-MM-DDT00:00:00Z): " + fromDate);
} }
if (untilDate != null && !untilDate.matches(DATE_REGEX) && !untilDate.matches(UTC_DATETIME_REGEX)) { if (untilDate != null && !untilDate.matches(DATE_REGEX) && !untilDate.matches(UTC_DATETIME_REGEX)) {
throw new CollectorException("Invalid date (YYYY-MM-DD): " + untilDate); throw new CollectorException("Invalid date (YYYY-MM-DD or YYYY-MM-DDT00:00:00Z): " + untilDate);
} }
final Iterator<Iterator<String>> iters = sets final Iterator<Iterator<String>> iters = sets

View File

@ -2,7 +2,6 @@
package eu.dnetlib.dhp.collection.plugin.oai; package eu.dnetlib.dhp.collection.plugin.oai;
import java.io.IOException; import java.io.IOException;
import java.io.StringReader;
import java.io.StringWriter; import java.io.StringWriter;
import java.io.UnsupportedEncodingException; import java.io.UnsupportedEncodingException;
import java.net.URLEncoder; import java.net.URLEncoder;
@ -16,21 +15,21 @@ import org.dom4j.DocumentException;
import org.dom4j.DocumentHelper; import org.dom4j.DocumentHelper;
import org.dom4j.Node; import org.dom4j.Node;
import org.dom4j.io.OutputFormat; import org.dom4j.io.OutputFormat;
import org.dom4j.io.SAXReader;
import org.dom4j.io.XMLWriter; import org.dom4j.io.XMLWriter;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import eu.dnetlib.dhp.aggregation.common.AggregatorReport;
import eu.dnetlib.dhp.collection.CollectorException;
import eu.dnetlib.dhp.collection.HttpConnector2;
import eu.dnetlib.dhp.collection.XmlCleaner; import eu.dnetlib.dhp.collection.XmlCleaner;
import eu.dnetlib.dhp.common.aggregation.AggregatorReport;
import eu.dnetlib.dhp.common.collection.CollectorException;
import eu.dnetlib.dhp.common.collection.HttpConnector2;
public class OaiIterator implements Iterator<String> { public class OaiIterator implements Iterator<String> {
private static final Logger log = LoggerFactory.getLogger(OaiIterator.class); private static final Logger log = LoggerFactory.getLogger(OaiIterator.class);
private final static String REPORT_PREFIX = "oai:"; private static final String REPORT_PREFIX = "oai:";
public static final String UTF_8 = "UTF-8";
private final Queue<String> queue = new PriorityBlockingQueue<>(); private final Queue<String> queue = new PriorityBlockingQueue<>();
@ -68,7 +67,7 @@ public class OaiIterator implements Iterator<String> {
try { try {
this.token = firstPage(); this.token = firstPage();
} catch (final CollectorException e) { } catch (final CollectorException e) {
throw new RuntimeException(e); throw new IllegalStateException(e);
} }
} }
} }
@ -90,7 +89,7 @@ public class OaiIterator implements Iterator<String> {
try { try {
token = otherPages(token); token = otherPages(token);
} catch (final CollectorException e) { } catch (final CollectorException e) {
throw new RuntimeException(e); throw new IllegalStateException(e);
} }
} }
return res; return res;
@ -99,23 +98,24 @@ public class OaiIterator implements Iterator<String> {
@Override @Override
public void remove() { public void remove() {
throw new UnsupportedOperationException();
} }
private String firstPage() throws CollectorException { private String firstPage() throws CollectorException {
try { try {
String url = baseUrl + "?verb=ListRecords&metadataPrefix=" + URLEncoder.encode(mdFormat, "UTF-8"); String url = baseUrl + "?verb=ListRecords&metadataPrefix=" + URLEncoder.encode(mdFormat, UTF_8);
if (set != null && !set.isEmpty()) { if (set != null && !set.isEmpty()) {
url += "&set=" + URLEncoder.encode(set, "UTF-8"); url += "&set=" + URLEncoder.encode(set, UTF_8);
} }
if (fromDate != null && (fromDate.matches(OaiCollectorPlugin.DATE_REGEX) if (fromDate != null && (fromDate.matches(OaiCollectorPlugin.DATE_REGEX)
|| fromDate.matches(OaiCollectorPlugin.UTC_DATETIME_REGEX))) { || fromDate.matches(OaiCollectorPlugin.UTC_DATETIME_REGEX))) {
url += "&from=" + URLEncoder.encode(fromDate, "UTF-8"); url += "&from=" + URLEncoder.encode(fromDate, UTF_8);
} }
if (untilDate != null && (untilDate.matches(OaiCollectorPlugin.DATE_REGEX) if (untilDate != null && (untilDate.matches(OaiCollectorPlugin.DATE_REGEX)
|| untilDate.matches(OaiCollectorPlugin.UTC_DATETIME_REGEX))) { || untilDate.matches(OaiCollectorPlugin.UTC_DATETIME_REGEX))) {
url += "&until=" + URLEncoder.encode(untilDate, "UTF-8"); url += "&until=" + URLEncoder.encode(untilDate, UTF_8);
} }
log.info("Start harvesting using url: " + url); log.info("Start harvesting using url: {}", url);
return downloadPage(url); return downloadPage(url);
} catch (final UnsupportedEncodingException e) { } catch (final UnsupportedEncodingException e) {
@ -143,7 +143,7 @@ public class OaiIterator implements Iterator<String> {
return downloadPage( return downloadPage(
baseUrl baseUrl
+ "?verb=ListRecords&resumptionToken=" + "?verb=ListRecords&resumptionToken="
+ URLEncoder.encode(resumptionToken, "UTF-8")); + URLEncoder.encode(resumptionToken, UTF_8));
} catch (final UnsupportedEncodingException e) { } catch (final UnsupportedEncodingException e) {
report.put(e.getClass().getName(), e.getMessage()); report.put(e.getClass().getName(), e.getMessage());
throw new CollectorException(e); throw new CollectorException(e);
@ -161,7 +161,7 @@ public class OaiIterator implements Iterator<String> {
report.put(e.getClass().getName(), e.getMessage()); report.put(e.getClass().getName(), e.getMessage());
final String cleaned = XmlCleaner.cleanAllEntities(xml); final String cleaned = XmlCleaner.cleanAllEntities(xml);
try { try {
doc = DocumentHelper.parseText(xml); doc = DocumentHelper.parseText(cleaned);
} catch (final DocumentException e1) { } catch (final DocumentException e1) {
final String resumptionToken = extractResumptionToken(xml); final String resumptionToken = extractResumptionToken(xml);
if (resumptionToken == null) { if (resumptionToken == null) {

View File

@ -3,9 +3,9 @@ package eu.dnetlib.dhp.collection.plugin.oai;
import java.util.Iterator; import java.util.Iterator;
import eu.dnetlib.dhp.aggregation.common.AggregatorReport; import eu.dnetlib.dhp.common.aggregation.AggregatorReport;
import eu.dnetlib.dhp.collection.HttpClientParams; import eu.dnetlib.dhp.common.collection.HttpClientParams;
import eu.dnetlib.dhp.collection.HttpConnector2; import eu.dnetlib.dhp.common.collection.HttpConnector2;
public class OaiIteratorFactory { public class OaiIteratorFactory {

View File

@ -9,11 +9,11 @@ import java.util.stream.StreamSupport;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
import eu.dnetlib.dhp.aggregation.common.AggregatorReport;
import eu.dnetlib.dhp.collection.ApiDescriptor; import eu.dnetlib.dhp.collection.ApiDescriptor;
import eu.dnetlib.dhp.collection.CollectorException;
import eu.dnetlib.dhp.collection.HttpClientParams;
import eu.dnetlib.dhp.collection.plugin.CollectorPlugin; import eu.dnetlib.dhp.collection.plugin.CollectorPlugin;
import eu.dnetlib.dhp.common.aggregation.AggregatorReport;
import eu.dnetlib.dhp.common.collection.CollectorException;
import eu.dnetlib.dhp.common.collection.HttpClientParams;
/** /**
* TODO: delegate HTTP requests to the common HttpConnector2 implementation. * TODO: delegate HTTP requests to the common HttpConnector2 implementation.

View File

@ -30,9 +30,9 @@ import org.w3c.dom.Node;
import org.w3c.dom.NodeList; import org.w3c.dom.NodeList;
import org.xml.sax.InputSource; import org.xml.sax.InputSource;
import eu.dnetlib.dhp.collection.CollectorException;
import eu.dnetlib.dhp.collection.HttpClientParams;
import eu.dnetlib.dhp.collection.JsonUtils; import eu.dnetlib.dhp.collection.JsonUtils;
import eu.dnetlib.dhp.common.collection.CollectorException;
import eu.dnetlib.dhp.common.collection.HttpClientParams;
/** /**
* log.info(...) equal to log.trace(...) in the application-logs * log.info(...) equal to log.trace(...) in the application-logs
@ -131,7 +131,8 @@ public class RestIterator implements Iterator<String> {
private void initXmlTransformation(String resultTotalXpath, String resumptionXpath, String entityXpath) private void initXmlTransformation(String resultTotalXpath, String resumptionXpath, String entityXpath)
throws TransformerConfigurationException, XPathExpressionException { throws TransformerConfigurationException, XPathExpressionException {
transformer = TransformerFactory.newInstance().newTransformer(); final TransformerFactory factory = TransformerFactory.newInstance();
transformer = factory.newTransformer();
transformer.setOutputProperty(OutputKeys.INDENT, "yes"); transformer.setOutputProperty(OutputKeys.INDENT, "yes");
transformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "3"); transformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "3");
xpath = XPathFactory.newInstance().newXPath(); xpath = XPathFactory.newInstance().newXPath();
@ -142,7 +143,7 @@ public class RestIterator implements Iterator<String> {
private void initQueue() { private void initQueue() {
query = baseUrl + "?" + queryParams + querySize + queryFormat; query = baseUrl + "?" + queryParams + querySize + queryFormat;
log.info("REST calls starting with " + query); log.info("REST calls starting with {}", query);
} }
private void disconnect() { private void disconnect() {
@ -174,7 +175,7 @@ public class RestIterator implements Iterator<String> {
try { try {
query = downloadPage(query); query = downloadPage(query);
} catch (CollectorException e) { } catch (CollectorException e) {
log.debug("CollectorPlugin.next()-Exception: " + e); log.debug("CollectorPlugin.next()-Exception: {}", e);
throw new RuntimeException(e); throw new RuntimeException(e);
} }
} }
@ -198,7 +199,7 @@ public class RestIterator implements Iterator<String> {
// check if cursor=* is initial set otherwise add it to the queryParam URL // check if cursor=* is initial set otherwise add it to the queryParam URL
if (resumptionType.equalsIgnoreCase("deep-cursor")) { if (resumptionType.equalsIgnoreCase("deep-cursor")) {
log.debug("check resumptionType deep-cursor and check cursor=*?" + query); log.debug("check resumptionType deep-cursor and check cursor=*?{}", query);
if (!query.contains("&cursor=")) { if (!query.contains("&cursor=")) {
query += "&cursor=*"; query += "&cursor=*";
} }
@ -208,16 +209,16 @@ public class RestIterator implements Iterator<String> {
log.info("requestig URL [{}]", query); log.info("requestig URL [{}]", query);
URL qUrl = new URL(query); URL qUrl = new URL(query);
log.debug("authMethod :" + authMethod); log.debug("authMethod: {}", authMethod);
if ("bearer".equalsIgnoreCase(this.authMethod)) { if ("bearer".equalsIgnoreCase(this.authMethod)) {
log.trace("authMethod before inputStream: " + resultXml); log.trace("authMethod before inputStream: {}", resultXml);
HttpURLConnection conn = (HttpURLConnection) qUrl.openConnection(); HttpURLConnection conn = (HttpURLConnection) qUrl.openConnection();
conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Bearer " + authToken); conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Bearer " + authToken);
conn.setRequestProperty(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON.getMimeType()); conn.setRequestProperty(HttpHeaders.CONTENT_TYPE, ContentType.APPLICATION_JSON.getMimeType());
conn.setRequestMethod("GET"); conn.setRequestMethod("GET");
theHttpInputStream = conn.getInputStream(); theHttpInputStream = conn.getInputStream();
} else if (BASIC.equalsIgnoreCase(this.authMethod)) { } else if (BASIC.equalsIgnoreCase(this.authMethod)) {
log.trace("authMethod before inputStream: " + resultXml); log.trace("authMethod before inputStream: {}", resultXml);
HttpURLConnection conn = (HttpURLConnection) qUrl.openConnection(); HttpURLConnection conn = (HttpURLConnection) qUrl.openConnection();
conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Basic " + authToken); conn.setRequestProperty(HttpHeaders.AUTHORIZATION, "Basic " + authToken);
conn.setRequestProperty(HttpHeaders.ACCEPT, ContentType.APPLICATION_XML.getMimeType()); conn.setRequestProperty(HttpHeaders.ACCEPT, ContentType.APPLICATION_XML.getMimeType());
@ -237,13 +238,13 @@ public class RestIterator implements Iterator<String> {
if (!(emptyXml).equalsIgnoreCase(resultXml)) { if (!(emptyXml).equalsIgnoreCase(resultXml)) {
resultNode = (Node) xpath.evaluate("/", new InputSource(resultStream), XPathConstants.NODE); resultNode = (Node) xpath.evaluate("/", new InputSource(resultStream), XPathConstants.NODE);
nodeList = (NodeList) xprEntity.evaluate(resultNode, XPathConstants.NODESET); nodeList = (NodeList) xprEntity.evaluate(resultNode, XPathConstants.NODESET);
log.debug("nodeList.length: " + nodeList.getLength()); log.debug("nodeList.length: {}", nodeList.getLength());
for (int i = 0; i < nodeList.getLength(); i++) { for (int i = 0; i < nodeList.getLength(); i++) {
StringWriter sw = new StringWriter(); StringWriter sw = new StringWriter();
transformer.transform(new DOMSource(nodeList.item(i)), new StreamResult(sw)); transformer.transform(new DOMSource(nodeList.item(i)), new StreamResult(sw));
String toEnqueue = sw.toString(); String toEnqueue = sw.toString();
if (toEnqueue == null || StringUtils.isBlank(toEnqueue) || emptyXml.equalsIgnoreCase(toEnqueue)) { if (toEnqueue == null || StringUtils.isBlank(toEnqueue) || emptyXml.equalsIgnoreCase(toEnqueue)) {
log.warn("The following record resulted in empty item for the feeding queue: " + resultXml); log.warn("The following record resulted in empty item for the feeding queue: {}", resultXml);
} else { } else {
recordQueue.add(sw.toString()); recordQueue.add(sw.toString());
} }
@ -274,9 +275,9 @@ public class RestIterator implements Iterator<String> {
String[] resumptionKeyValue = arrayUrlArgStr.split("="); String[] resumptionKeyValue = arrayUrlArgStr.split("=");
if (isInteger(resumptionKeyValue[1])) { if (isInteger(resumptionKeyValue[1])) {
urlOldResumptionSize = Integer.parseInt(resumptionKeyValue[1]); urlOldResumptionSize = Integer.parseInt(resumptionKeyValue[1]);
log.debug("discover OldResumptionSize from Url (int): " + urlOldResumptionSize); log.debug("discover OldResumptionSize from Url (int): {}", urlOldResumptionSize);
} else { } else {
log.debug("discover OldResumptionSize from Url (str): " + resumptionKeyValue[1]); log.debug("discover OldResumptionSize from Url (str): {}", resumptionKeyValue[1]);
} }
} }
} }

View File

@ -1,14 +1,12 @@
package eu.dnetlib.dhp.sx.graph.bio package eu.dnetlib.dhp.sx.bio
import eu.dnetlib.dhp.schema.common.ModelConstants import eu.dnetlib.dhp.schema.common.ModelConstants
import eu.dnetlib.dhp.schema.oaf.utils.{GraphCleaningFunctions, OafMapperUtils} import eu.dnetlib.dhp.schema.oaf.utils.{GraphCleaningFunctions, OafMapperUtils}
import eu.dnetlib.dhp.schema.oaf.{Author, DataInfo, Dataset, Instance, KeyValue, Oaf, Relation, StructuredProperty} import eu.dnetlib.dhp.schema.oaf._
import org.json4s.DefaultFormats import org.json4s.DefaultFormats
import org.json4s.JsonAST.{JField, JObject, JString} import org.json4s.JsonAST.{JField, JObject, JString}
import org.json4s.jackson.JsonMethods.{compact, parse, render} import org.json4s.jackson.JsonMethods.{compact, parse, render}
import collection.JavaConverters._
import scala.collection.JavaConverters._
object BioDBToOAF { object BioDBToOAF {
case class EBILinkItem(id: Long, links: String) {} case class EBILinkItem(id: Long, links: String) {}
@ -199,7 +197,7 @@ object BioDBToOAF {
d.setDateofacceptance(OafMapperUtils.field(i_date.get.date, DATA_INFO)) d.setDateofacceptance(OafMapperUtils.field(i_date.get.date, DATA_INFO))
} }
val relevant_dates: List[StructuredProperty] = dates.filter(d => !d.date_info.contains("entry version")) val relevant_dates: List[StructuredProperty] = dates.filter(d => !d.date_info.contains("entry version"))
.map(date => OafMapperUtils.structuredProperty(date.date, "UNKNOWN", "UNKNOWN", ModelConstants.DNET_DATACITE_DATE, ModelConstants.DNET_DATACITE_DATE, DATA_INFO)) .map(date => OafMapperUtils.structuredProperty(date.date, ModelConstants.UNKNOWN, ModelConstants.UNKNOWN, ModelConstants.DNET_DATACITE_DATE, ModelConstants.DNET_DATACITE_DATE, DATA_INFO))
if (relevant_dates != null && relevant_dates.nonEmpty) if (relevant_dates != null && relevant_dates.nonEmpty)
d.setRelevantdate(relevant_dates.asJava) d.setRelevantdate(relevant_dates.asJava)
d.setDateofacceptance(OafMapperUtils.field(i_date.get.date, DATA_INFO)) d.setDateofacceptance(OafMapperUtils.field(i_date.get.date, DATA_INFO))
@ -218,12 +216,12 @@ object BioDBToOAF {
if (references_pmid != null && references_pmid.nonEmpty) { if (references_pmid != null && references_pmid.nonEmpty) {
val rel = createRelation(references_pmid.head, "pmid", d.getId, collectedFromMap("uniprot"), "relationship", "isRelatedTo", if (i_date.isDefined) i_date.get.date else null) val rel = createRelation(references_pmid.head, "pmid", d.getId, collectedFromMap("uniprot"), ModelConstants.RELATIONSHIP, ModelConstants.IS_RELATED_TO, if (i_date.isDefined) i_date.get.date else null)
rel.getCollectedfrom rel.getCollectedfrom
List(d, rel) List(d, rel)
} }
else if (references_doi != null && references_doi.nonEmpty) { else if (references_doi != null && references_doi.nonEmpty) {
val rel = createRelation(references_doi.head, "doi", d.getId, collectedFromMap("uniprot"), "relationship", "isRelatedTo", if (i_date.isDefined) i_date.get.date else null) val rel = createRelation(references_doi.head, "doi", d.getId, collectedFromMap("uniprot"), ModelConstants.RELATIONSHIP, ModelConstants.IS_RELATED_TO, if (i_date.isDefined) i_date.get.date else null)
List(d, rel) List(d, rel)
} }
else else
@ -231,7 +229,6 @@ object BioDBToOAF {
} }
def generate_unresolved_id(pid: String, pidType: String): String = { def generate_unresolved_id(pid: String, pidType: String): String = {
s"unresolved::$pid::$pidType" s"unresolved::$pid::$pidType"
} }
@ -243,7 +240,7 @@ object BioDBToOAF {
rel.setCollectedfrom(List(collectedFromMap("pdb")).asJava) rel.setCollectedfrom(List(collectedFromMap("pdb")).asJava)
rel.setDataInfo(DATA_INFO) rel.setDataInfo(DATA_INFO)
rel.setRelType("resultResult") rel.setRelType(ModelConstants.RESULT_RESULT)
rel.setSubRelType(subRelType) rel.setSubRelType(subRelType)
rel.setRelClass(relClass) rel.setRelClass(relClass)
@ -263,7 +260,7 @@ object BioDBToOAF {
def createSupplementaryRelation(pid: String, pidType: String, sourceId: String, collectedFrom: KeyValue, date: String): Relation = { def createSupplementaryRelation(pid: String, pidType: String, sourceId: String, collectedFrom: KeyValue, date: String): Relation = {
createRelation(pid,pidType,sourceId,collectedFrom, "supplement","IsSupplementTo", date) createRelation(pid, pidType, sourceId, collectedFrom, ModelConstants.SUPPLEMENT, ModelConstants.IS_SUPPLEMENT_TO, date)
} }
@ -392,6 +389,6 @@ object BioDBToOAF {
i.setDateofacceptance(OafMapperUtils.field(GraphCleaningFunctions.cleanDate(input.date), DATA_INFO)) i.setDateofacceptance(OafMapperUtils.field(GraphCleaningFunctions.cleanDate(input.date), DATA_INFO))
d.setDateofacceptance(OafMapperUtils.field(GraphCleaningFunctions.cleanDate(input.date), DATA_INFO)) d.setDateofacceptance(OafMapperUtils.field(GraphCleaningFunctions.cleanDate(input.date), DATA_INFO))
List(d, createRelation(input.pmid, "pmid", d.getId, collectedFromMap("ebi"),"relationship", "isRelatedTo", GraphCleaningFunctions.cleanDate(input.date))) List(d, createRelation(input.pmid, "pmid", d.getId, collectedFromMap("ebi"), ModelConstants.RELATIONSHIP, ModelConstants.IS_RELATED_TO, GraphCleaningFunctions.cleanDate(input.date)))
} }
} }

Some files were not shown because too many files have changed in this diff Show More